Revert nmhash.h and use KATs for big-endian validation

ijatinydv · ijatinydv · commit 0edda6b5c734 · 2026-03-16T01:11:01.000+05:30
diff --git a/test/hash_functions/nmhash.h b/test/hash_functions/nmhash.h
@@ -89,29 +89,6 @@ extern "C" {
 #  endif
 #endif
 
-/*
- * Endian-correct 16-bit multiply for scalar code paths.
- * On LE: u16[0] is the low half, u16[1] is the high half.
- * On BE: u16[0] is the HIGH half, u16[1] is the LOW half.
- * We need low_value *= low_constant, high_value *= high_constant,
- * so on BE we swap which constant half goes to which index.
- */
-#if NMHASH_LITTLE_ENDIAN
-#  define NMH_MULT16_SCALAR(u16_0, u16_1, m) do { \
-       (u16_0) *= (uint16_t)(m); \
-       (u16_1) *= (uint16_t)((m) >> 16); \
-   } while(0)
-#  define NMH_PACK_U16_HI(u16_arr, val) ((u16_arr)[1] = (val))
-#  define NMH_PACK_U16_LO(u16_arr, val) ((u16_arr)[0] = (val))
-#else
-#  define NMH_MULT16_SCALAR(u16_0, u16_1, m) do { \
-       (u16_0) *= (uint16_t)((m) >> 16); \
-       (u16_1) *= (uint16_t)(m); \
-   } while(0)
-#  define NMH_PACK_U16_HI(u16_arr, val) ((u16_arr)[0] = (val))
-#  define NMH_PACK_U16_LO(u16_arr, val) ((u16_arr)[1] = (val))
-#endif
-
 /* vector macros */
 #define NMH_SCALAR 0
 #define NMH_SSE2   1
@@ -229,12 +206,15 @@ NMHASH32_0to8(uint32_t const x, uint32_t const seed2)
 		union { uint32_t u32; uint16_t u16[2]; } vx;
 		vx.u32 = x;
 		vx.u32 ^= (vx.u32 >> 12) ^ (vx.u32 >> 6);
-		NMH_MULT16_SCALAR(vx.u16[0], vx.u16[1], m1);
+		vx.u16[0] *= (uint16_t)m1;
+		vx.u16[1] *= (uint16_t)(m1 >> 16);
 		vx.u32 ^= (vx.u32 << 11) ^ ( vx.u32 >> 19);
-		NMH_MULT16_SCALAR(vx.u16[0], vx.u16[1], m2);
+		vx.u16[0] *= (uint16_t)m2;
+		vx.u16[1] *= (uint16_t)(m2 >> 16);
 		vx.u32 ^= seed2;
 		vx.u32 ^= (vx.u32 >> 15) ^ ( vx.u32 >> 9);
-		NMH_MULT16_SCALAR(vx.u16[0], vx.u16[1], m3);
+		vx.u16[0] *= (uint16_t)m3;
+		vx.u16[1] *= (uint16_t)(m3 >> 16);
 		vx.u32 ^= (vx.u32 << 16) ^ ( vx.u32 >> 11);
 		return vx.u32;
 	}
@@ -310,18 +290,21 @@ NMHASH32_9to255(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t
 				for (j = 0; j < 4; ++j) x[j].u32 += y[j].u32;
 
 				for (j = 0; j < 4; ++j) {
-					NMH_MULT16_SCALAR(x[j].u16[0], x[j].u16[1], __NMH_M1);
+					x[j].u16[0] *= (uint16_t)(__NMH_M1 & 0xFFFF);
+					x[j].u16[1] *= (uint16_t)(__NMH_M1 >> 16);
 				}
 				for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 5) ^ (x[j].u32 >> 13);
 				for (j = 0; j < 4; ++j) {
-					NMH_MULT16_SCALAR(x[j].u16[0], x[j].u16[1], __NMH_M2);
+					x[j].u16[0] *= (uint16_t)(__NMH_M2 & 0xFFFF);
+					x[j].u16[1] *= (uint16_t)(__NMH_M2 >> 16);
 				}
 
 				for (j = 0; j < 4; ++j) x[j].u32 ^= y[j].u32;
 
 				for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 11) ^ (x[j].u32 >> 9);
 				for (j = 0; j < 4; ++j) {
-					NMH_MULT16_SCALAR(x[j].u16[0], x[j].u16[1], __NMH_M3);
+					x[j].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF);
+					x[j].u16[1] *= (uint16_t)(__NMH_M3 >> 16);
 				}
 				for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 >> 10) ^ (x[j].u32 >> 20);
 			}
@@ -343,18 +326,21 @@ NMHASH32_9to255(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t
 		for (j = 0; j < 4; ++j) y[j].u32 ^= (y[j].u32 << 17) ^ (y[j].u32 >> 6);
 
 		for (j = 0; j < 4; ++j) {
-			NMH_MULT16_SCALAR(x[j].u16[0], x[j].u16[1], __NMH_M1);
+			x[j].u16[0] *= (uint16_t)(__NMH_M1 & 0xFFFF);
+			x[j].u16[1] *= (uint16_t)(__NMH_M1 >> 16);
 		}
 		for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 5) ^ (x[j].u32 >> 13);
 		for (j = 0; j < 4; ++j) {
-			NMH_MULT16_SCALAR(x[j].u16[0], x[j].u16[1], __NMH_M2);
+			x[j].u16[0] *= (uint16_t)(__NMH_M2 & 0xFFFF);
+			x[j].u16[1] *= (uint16_t)(__NMH_M2 >> 16);
 		}
 
 		for (j = 0; j < 4; ++j) x[j].u32 ^= y[j].u32;
 
 		for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 << 11) ^ (x[j].u32 >> 9);
 		for (j = 0; j < 4; ++j) {
-			NMH_MULT16_SCALAR(x[j].u16[0], x[j].u16[1], __NMH_M3);
+			x[j].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF);
+			x[j].u16[1] *= (uint16_t)(__NMH_M3 >> 16);
 		}
 		for (j = 0; j < 4; ++j) x[j].u32 ^= (x[j].u32 >> 10) ^ (x[j].u32 >> 20);
 
@@ -366,7 +352,8 @@ NMHASH32_9to255(const uint8_t* const NMH_RESTRICT p, size_t const len, uint32_t
 		for (j = 1; j < 4; ++j) x[0].u32 += x[j].u32;
 
 		x[0].u32 ^= sl + (sl >> 5);
-		NMH_MULT16_SCALAR(x[0].u16[0], x[0].u16[1], __NMH_M3);
+		x[0].u16[0] *= (uint16_t)(__NMH_M3 & 0xFFFF);
+		x[0].u16[1] *= (uint16_t)(__NMH_M3 >> 16);
 		x[0].u32 ^= (x[0].u32 >> 10) ^ (x[0].u32 >> 20);
 
 		result = x[0].u32;
@@ -594,9 +581,11 @@ NMHASH32_avalanche32(uint32_t const x)
 	union { uint32_t u32; uint16_t u16[2]; } vx;
 	vx.u32    = x;
 	vx.u32   ^= (vx.u32 >> 8) ^ (vx.u32 >> 21);
-	NMH_MULT16_SCALAR(vx.u16[0], vx.u16[1], m1);
+	vx.u16[0] = (uint16_t)(vx.u16[0] * (uint16_t)m1);
+	vx.u16[1] = (uint16_t)(vx.u16[1] * (uint16_t)(m1 >> 16));
 	vx.u32   ^= (vx.u32 << 12) ^ (vx.u32 >> 7);
-	NMH_MULT16_SCALAR(vx.u16[0], vx.u16[1], m2);
+	vx.u16[0] = (uint16_t)(vx.u16[0] * (uint16_t)m2);
+	vx.u16[1] = (uint16_t)(vx.u16[1] * (uint16_t)(m2 >> 16));
 	return vx.u32 ^ (vx.u32 >> 8) ^ (vx.u32 >> 21);
 }
 
@@ -628,8 +617,8 @@ NMHASH32(const void* const NMH_RESTRICT input, size_t const len, uint32_t seed)
 					data.u32 = NMH_readLE16(p);
 					break;
 				case 3: seed += NMH_PRIME32_2 + (UINT32_C(3) << 24) + (3 << 1);
-					NMH_PACK_U16_HI(data.u16, p[2]);
-					NMH_PACK_U16_LO(data.u16, NMH_readLE16(p));
+					data.u16[1] = p[2];
+					data.u16[0] = NMH_readLE16(p);
 					break;
 				case 4: seed += NMH_PRIME32_3;
 					data.u32 = NMH_readLE32(p);
@@ -812,8 +801,8 @@ NMHASH32X(const void* const NMH_RESTRICT input, size_t const len, uint32_t seed)
 					data.u32 = NMH_readLE16(p);
 					break;
 				case 3: seed += NMH_PRIME32_2 + (UINT32_C(3) << 24) + (3 << 1);
-					NMH_PACK_U16_HI(data.u16, p[2]);
-					NMH_PACK_U16_LO(data.u16, NMH_readLE16(p));
+					data.u16[1] = p[2];
+					data.u16[0] = NMH_readLE16(p);
 					break;
 				case 4: seed += NMH_PRIME32_1;
 					data.u32 = NMH_readLE32(p);
diff --git a/test/hash_functions/test_hash_functions.f90 b/test/hash_functions/test_hash_functions.f90
@@ -43,6 +43,8 @@ subroutine collect_hash_functions(testsuite)
             , new_unittest("spooky_hash", test_spooky_hash) &
             , new_unittest("hash_determinism", test_hash_determinism) &
             , new_unittest("hash_distribution", test_hash_distribution) &
+            , new_unittest("nmhash32_kat", test_nmhash32_kat) &
+            , new_unittest("nmhash32x_kat", test_nmhash32x_kat) &
             ]
 
     end subroutine collect_hash_functions
@@ -67,6 +69,15 @@ subroutine test_nmhash32(error)
         integer(int8) :: key_array(size_key_array)
         integer(int32) :: c_hash(0:size_key_array)
 
+        ! The C reference implementation (nmhash.h) does not support
+        ! big-endian. Skip C-comparison on BE; value-correctness is
+        ! verified by the test_nmhash32_kat known-answer test instead.
+        if (.not. little_endian) then
+            call skip_test(error, &
+                "NMHASH32 C-comparison skipped on Big-Endian (see KAT test)")
+            return
+        end if
+
         call read_array("key_array.bin", key_array )
 
         ! Read hash array generated from key array by the C version of nmhash32
@@ -88,6 +99,15 @@ subroutine test_nmhash32x(error)
         integer(int8) :: key_array(size_key_array)
         integer(int32) :: c_hash(0:size_key_array)
 
+        ! The C reference implementation (nmhash.h) does not support
+        ! big-endian. Skip C-comparison on BE; value-correctness is
+        ! verified by the test_nmhash32x_kat known-answer test instead.
+        if (.not. little_endian) then
+            call skip_test(error, &
+                "NMHASH32X C-comparison skipped on Big-Endian (see KAT test)")
+            return
+        end if
+
         call read_array("key_array.bin", key_array )
 
         ! Read hash array generated from key array by the C version of nmhash32x
@@ -269,6 +289,97 @@ subroutine test_hash_distribution(error)
     end subroutine test_hash_distribution
 
 
+    !> Known-Answer Test for NMHASH32.
+    !> Verifies the Fortran implementation produces the exact canonical
+    !> LE-normalized hash values across all code paths. Reference values
+    !> were computed on a little-endian platform using the upstream C code.
+    !> This test runs on ALL platforms (LE and BE).
+    subroutine test_nmhash32_kat(error)
+        !> Error handling
+        type(error_type), allocatable, intent(out) :: error
+
+        ! Number of test vectors
+        integer, parameter :: num_kat = 14
+
+        ! Input lengths covering every code path:
+        ! 0=zero, 1/2/3/4=small, 7/8=5-8 path, 9/32=9-32 path,
+        ! 33/100/255=33-255 path, 256/300=long path (256+)
+        integer, parameter :: kat_lengths(num_kat) = [ &
+            0, 1, 2, 3, 4, 7, 8, &
+            9, 32, 33, 100, 255, 256, 300 ]
+
+        ! Reference NMHASH32 values (computed on LE with seed=0xDEADBEEF)
+        integer(int32), parameter :: kat_expected(num_kat) = [ &
+            int(z'B0D9C845', int32), int(z'D52AD23F', int32), &
+            int(z'E909FDFF', int32), int(z'FF1A009C', int32), &
+            int(z'097D4183', int32), int(z'55CC8BBF', int32), &
+            int(z'660D67B4', int32), int(z'CB939B94', int32), &
+            int(z'4CBE45F8', int32), int(z'2FD88BD0', int32), &
+            int(z'83AC6B02', int32), int(z'CC0E4E26', int32), &
+            int(z'567D6B58', int32), int(z'865F0BC9', int32) ]
+
+        ! Deterministic key: key(i) = IAND(i, 255)
+        integer(int8) :: key(300)
+        integer :: i
+        integer(int32) :: got
+
+        do i = 1, 300
+            key(i) = int(iand(i, 255), int8)
+        end do
+
+        do i = 1, num_kat
+            got = nmhash32(key(1:kat_lengths(i)), nm_seed)
+            call check(error, got == kat_expected(i), &
+                "NMHASH32 KAT failed")
+            if (allocated(error)) return
+        end do
+
+    end subroutine test_nmhash32_kat
+
+    !> Known-Answer Test for NMHASH32X.
+    !> Same approach as test_nmhash32_kat but for the NMHASH32X variant.
+    !> This test runs on ALL platforms (LE and BE).
+    subroutine test_nmhash32x_kat(error)
+        !> Error handling
+        type(error_type), allocatable, intent(out) :: error
+
+        ! Number of test vectors
+        integer, parameter :: num_kat = 14
+
+        ! Input lengths covering every code path
+        integer, parameter :: kat_lengths(num_kat) = [ &
+            0, 1, 2, 3, 4, 7, 8, &
+            9, 32, 33, 100, 255, 256, 300 ]
+
+        ! Reference NMHASH32X values (computed on LE with seed=0xDEADBEEF)
+        integer(int32), parameter :: kat_expected(num_kat) = [ &
+            int(z'76844735', int32), int(z'B7AE2C90', int32), &
+            int(z'EE2224FD', int32), int(z'BBE39609', int32), &
+            int(z'08467EE3', int32), int(z'10E572DA', int32), &
+            int(z'2570CFA8', int32), int(z'1A06128A', int32), &
+            int(z'EABBF1B8', int32), int(z'9B1B3428', int32), &
+            int(z'F6F0233D', int32), int(z'7EB7CAFC', int32), &
+            int(z'B34D6C45', int32), int(z'E89BEE9E', int32) ]
+
+        ! Deterministic key: key(i) = IAND(i, 255)
+        integer(int8) :: key(300)
+        integer :: i
+        integer(int32) :: got
+
+        do i = 1, 300
+            key(i) = int(iand(i, 255), int8)
+        end do
+
+        do i = 1, num_kat
+            got = nmhash32x(key(1:kat_lengths(i)), nm_seed)
+            call check(error, got == kat_expected(i), &
+                "NMHASH32X KAT failed")
+            if (allocated(error)) return
+        end do
+
+    end subroutine test_nmhash32x_kat
+
+
     subroutine generate_key_array()
     
         integer        :: i, lun