@@ -170,26 +170,51 @@ public static class EncodingParams {
170170 }
171171 }
172172
173- /** Try all (exponent, factor) combos and pick the one with fewest exceptions. */
173+ /**
174+ * Try all (exponent, factor) combos and pick the one with the smallest estimated compressed size.
175+ *
176+ * <p>Estimated size (in bits) = {@code length * bitWidth + exceptions * (Float.SIZE + Short.SIZE)},
177+ * where bitWidth is the number of bits needed to represent the unsigned range of non-exception
178+ * encoded values after frame-of-reference subtraction. This matches the C++ ALP cost model and
179+ * produces better compression ratios than minimizing exception count alone.
180+ */
174181 static EncodingParams findBestFloatParams (float [] values , int offset , int length ) {
175182 int bestExponent = 0 ;
176183 int bestFactor = 0 ;
177184 int bestExceptions = length ;
185+ long bestEstimatedSize = Long .MAX_VALUE ;
178186
179187 for (int e = 0 ; e <= FLOAT_MAX_EXPONENT ; e ++) {
180188 for (int f = 0 ; f <= e ; f ++) {
181189 int exceptions = 0 ;
190+ int minEncoded = Integer .MAX_VALUE ;
191+ int maxEncoded = Integer .MIN_VALUE ;
182192 for (int i = 0 ; i < length ; i ++) {
183- if (isFloatException (values [offset + i ], e , f )) {
193+ float value = values [offset + i ];
194+ if (isFloatException (value , e , f )) {
184195 exceptions ++;
196+ } else {
197+ int encoded = encodeFloat (value , e , f );
198+ if (encoded < minEncoded ) minEncoded = encoded ;
199+ if (encoded > maxEncoded ) maxEncoded = encoded ;
185200 }
186201 }
187- if (exceptions < bestExceptions ) {
202+ int nonExceptions = length - exceptions ;
203+ if (nonExceptions == 0 ) continue ;
204+ long delta = (nonExceptions < 2 ) ? 0 :
205+ Integer .toUnsignedLong (maxEncoded ) - Integer .toUnsignedLong (minEncoded );
206+ int bitsPerValue = (delta == 0 ) ? 0 : (64 - Long .numberOfLeadingZeros (delta ));
207+ long estimatedSize = (long ) length * bitsPerValue
208+ + (long ) exceptions * (Float .SIZE + Short .SIZE );
209+ if (estimatedSize < bestEstimatedSize
210+ || (estimatedSize == bestEstimatedSize
211+ && (e > bestExponent || (e == bestExponent && f > bestFactor )))) {
212+ bestEstimatedSize = estimatedSize ;
188213 bestExponent = e ;
189214 bestFactor = f ;
190215 bestExceptions = exceptions ;
191- if (bestExceptions == 0 ) {
192- return new EncodingParams (bestExponent , bestFactor , bestExceptions );
216+ if (bestExceptions == 0 && bitsPerValue == 0 ) {
217+ return new EncodingParams (bestExponent , bestFactor , 0 );
193218 }
194219 }
195220 }
@@ -202,74 +227,130 @@ static EncodingParams findBestFloatParamsWithPresets(float[] values, int offset,
202227 int bestExponent = presets [0 ][0 ];
203228 int bestFactor = presets [0 ][1 ];
204229 int bestExceptions = length ;
230+ long bestEstimatedSize = Long .MAX_VALUE ;
205231
206232 for (int [] preset : presets ) {
207233 int e = preset [0 ];
208234 int f = preset [1 ];
209235 int exceptions = 0 ;
236+ int minEncoded = Integer .MAX_VALUE ;
237+ int maxEncoded = Integer .MIN_VALUE ;
210238 for (int i = 0 ; i < length ; i ++) {
211- if (isFloatException (values [offset + i ], e , f )) {
239+ float value = values [offset + i ];
240+ if (isFloatException (value , e , f )) {
212241 exceptions ++;
242+ } else {
243+ int encoded = encodeFloat (value , e , f );
244+ if (encoded < minEncoded ) minEncoded = encoded ;
245+ if (encoded > maxEncoded ) maxEncoded = encoded ;
213246 }
214247 }
215- if (exceptions < bestExceptions ) {
248+ int nonExceptions = length - exceptions ;
249+ if (nonExceptions == 0 ) continue ;
250+ long delta = (nonExceptions < 2 ) ? 0 :
251+ Integer .toUnsignedLong (maxEncoded ) - Integer .toUnsignedLong (minEncoded );
252+ int bitsPerValue = (delta == 0 ) ? 0 : (64 - Long .numberOfLeadingZeros (delta ));
253+ long estimatedSize = (long ) length * bitsPerValue
254+ + (long ) exceptions * (Float .SIZE + Short .SIZE );
255+ if (estimatedSize < bestEstimatedSize
256+ || (estimatedSize == bestEstimatedSize
257+ && (e > bestExponent || (e == bestExponent && f > bestFactor )))) {
258+ bestEstimatedSize = estimatedSize ;
216259 bestExponent = e ;
217260 bestFactor = f ;
218261 bestExceptions = exceptions ;
219- if (bestExceptions == 0 ) {
220- return new EncodingParams (bestExponent , bestFactor , bestExceptions );
262+ if (bestExceptions == 0 && bitsPerValue == 0 ) {
263+ return new EncodingParams (bestExponent , bestFactor , 0 );
221264 }
222265 }
223266 }
224267 return new EncodingParams (bestExponent , bestFactor , bestExceptions );
225268 }
226269
270+ /** Try all (exponent, factor) combos and pick the one with the smallest estimated compressed size. */
227271 static EncodingParams findBestDoubleParams (double [] values , int offset , int length ) {
228272 int bestExponent = 0 ;
229273 int bestFactor = 0 ;
230274 int bestExceptions = length ;
275+ long bestEstimatedSize = Long .MAX_VALUE ;
231276
232277 for (int e = 0 ; e <= DOUBLE_MAX_EXPONENT ; e ++) {
233278 for (int f = 0 ; f <= e ; f ++) {
234279 int exceptions = 0 ;
280+ long minEncoded = Long .MAX_VALUE ;
281+ long maxEncoded = Long .MIN_VALUE ;
235282 for (int i = 0 ; i < length ; i ++) {
236- if (isDoubleException (values [offset + i ], e , f )) {
283+ double value = values [offset + i ];
284+ if (isDoubleException (value , e , f )) {
237285 exceptions ++;
286+ } else {
287+ long encoded = encodeDouble (value , e , f );
288+ if (encoded < minEncoded ) minEncoded = encoded ;
289+ if (encoded > maxEncoded ) maxEncoded = encoded ;
238290 }
239291 }
240- if (exceptions < bestExceptions ) {
292+ int nonExceptions = length - exceptions ;
293+ if (nonExceptions == 0 ) continue ;
294+ // delta as signed subtraction; Long.numberOfLeadingZeros handles the unsigned bit width
295+ // correctly even when the subtraction overflows (large range → penalized with 64 bits).
296+ long delta = (nonExceptions < 2 ) ? 0 : (maxEncoded - minEncoded );
297+ int bitsPerValue = (delta == 0 ) ? 0 : (64 - Long .numberOfLeadingZeros (delta ));
298+ long estimatedSize = (long ) length * bitsPerValue
299+ + (long ) exceptions * (Double .SIZE + Short .SIZE );
300+ if (estimatedSize < bestEstimatedSize
301+ || (estimatedSize == bestEstimatedSize
302+ && (e > bestExponent || (e == bestExponent && f > bestFactor )))) {
303+ bestEstimatedSize = estimatedSize ;
241304 bestExponent = e ;
242305 bestFactor = f ;
243306 bestExceptions = exceptions ;
244- if (bestExceptions == 0 ) {
245- return new EncodingParams (bestExponent , bestFactor , bestExceptions );
307+ if (bestExceptions == 0 && bitsPerValue == 0 ) {
308+ return new EncodingParams (bestExponent , bestFactor , 0 );
246309 }
247310 }
248311 }
249312 }
250313 return new EncodingParams (bestExponent , bestFactor , bestExceptions );
251314 }
252315
316+ /** Same as findBestDoubleParams but only tries the cached preset combos. */
253317 static EncodingParams findBestDoubleParamsWithPresets (double [] values , int offset , int length , int [][] presets ) {
254318 int bestExponent = presets [0 ][0 ];
255319 int bestFactor = presets [0 ][1 ];
256320 int bestExceptions = length ;
321+ long bestEstimatedSize = Long .MAX_VALUE ;
257322
258323 for (int [] preset : presets ) {
259324 int e = preset [0 ];
260325 int f = preset [1 ];
261326 int exceptions = 0 ;
327+ long minEncoded = Long .MAX_VALUE ;
328+ long maxEncoded = Long .MIN_VALUE ;
262329 for (int i = 0 ; i < length ; i ++) {
263- if (isDoubleException (values [offset + i ], e , f )) {
330+ double value = values [offset + i ];
331+ if (isDoubleException (value , e , f )) {
264332 exceptions ++;
333+ } else {
334+ long encoded = encodeDouble (value , e , f );
335+ if (encoded < minEncoded ) minEncoded = encoded ;
336+ if (encoded > maxEncoded ) maxEncoded = encoded ;
265337 }
266338 }
267- if (exceptions < bestExceptions ) {
339+ int nonExceptions = length - exceptions ;
340+ if (nonExceptions == 0 ) continue ;
341+ long delta = (nonExceptions < 2 ) ? 0 : (maxEncoded - minEncoded );
342+ int bitsPerValue = (delta == 0 ) ? 0 : (64 - Long .numberOfLeadingZeros (delta ));
343+ long estimatedSize = (long ) length * bitsPerValue
344+ + (long ) exceptions * (Double .SIZE + Short .SIZE );
345+ if (estimatedSize < bestEstimatedSize
346+ || (estimatedSize == bestEstimatedSize
347+ && (e > bestExponent || (e == bestExponent && f > bestFactor )))) {
348+ bestEstimatedSize = estimatedSize ;
268349 bestExponent = e ;
269350 bestFactor = f ;
270351 bestExceptions = exceptions ;
271- if (bestExceptions == 0 ) {
272- return new EncodingParams (bestExponent , bestFactor , bestExceptions );
352+ if (bestExceptions == 0 && bitsPerValue == 0 ) {
353+ return new EncodingParams (bestExponent , bestFactor , 0 );
273354 }
274355 }
275356 }
0 commit comments