Add inline incremental hash functions for in-memory use

author John Naylor <[email protected]>
Mon, 27 Nov 2023 10:03:38 +0000 (17:03 +0700)
committer John Naylor <[email protected]>
Fri, 19 Jan 2024 05:44:09 +0000 (12:44 +0700)
diff --git a/src/include/common/hashfn_unstable.h b/src/include/common/hashfn_unstable.h
new file mode 100644 (file)
index 0000000..8dfef98
--- /dev/null
+++ b/src/include/common/hashfn_unstable.h
@@ -0,0 +1,221 @@
+/*
+ * hashfn_unstable.h
+ *
+ * Building blocks for creating fast inlineable hash functions. The
+ * functions in this file are not guaranteed to be stable between versions,
+ * and may differ by hardware platform. Hence they must not be used in
+ * indexes or other on-disk structures. See hashfn.h if you need stability.
+ *
+ *
+ * Portions Copyright (c) 2024, PostgreSQL Global Development Group
+ *
+ * src/include/common/hashfn_unstable.h
+ */
+#ifndef HASHFN_UNSTABLE_H
+#define HASHFN_UNSTABLE_H
+
+#include "port/pg_bitutils.h"
+#include "port/pg_bswap.h"
+
+/*
+ * fasthash is a modification of code taken from
+ * https://code.google.com/archive/p/fast-hash/source/default/source
+ * under the terms of the MIT license. The original copyright
+ * notice follows:
+ */
+
+/* The MIT License
+
+   Copyright (C) 2012 Zilong Tan ([email protected])
+
+   Permission is hereby granted, free of charge, to any person
+   obtaining a copy of this software and associated documentation
+   files (the "Software"), to deal in the Software without
+   restriction, including without limitation the rights to use, copy,
+   modify, merge, publish, distribute, sublicense, and/or sell copies
+   of the Software, and to permit persons to whom the Software is
+   furnished to do so, subject to the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/*
+ * fasthash as implemented here has two interfaces:
+ *
+ * 1) Standalone functions, e.g. fasthash32() for a single value with a
+ * known length.
+ *
+ * 2) Incremental interface. This can used for incorporating multiple
+ * inputs. The standalone functions use this internally, so see fasthash64()
+ * for an an example of how this works.
+ */
+
+
+typedef struct fasthash_state
+{
+   /* staging area for chunks of input */
+   uint64      accum;
+
+   uint64      hash;
+} fasthash_state;
+
+#define FH_SIZEOF_ACCUM sizeof(uint64)
+
+#define FH_UNKNOWN_LENGTH 1
+
+/*
+ * Initialize the hash state.
+ *
+ * 'len' is the length of the input, if known ahead of time.
+ * If that is not known, pass FH_UNKNOWN_LENGTH.
+ * 'seed' can be zero.
+ */
+static inline void
+fasthash_init(fasthash_state *hs, int len, uint64 seed)
+{
+   memset(hs, 0, sizeof(fasthash_state));
+   hs->hash = seed ^ (len * 0x880355f21e6d1965);
+}
+
+/* both the finalizer and part of the combining step */
+static inline uint64
+fasthash_mix(uint64 h, uint64 tweak)
+{
+   h ^= (h >> 23) + tweak;
+   h *= 0x2127599bf4325c37;
+   h ^= h >> 47;
+   return h;
+}
+
+/* combine one chunk of input into the hash */
+static inline void
+fasthash_combine(fasthash_state *hs)
+{
+   hs->hash ^= fasthash_mix(hs->accum, 0);
+   hs->hash *= 0x880355f21e6d1965;
+
+   /* reset hash state for next input */
+   hs->accum = 0;
+}
+
+/* accumulate up to 8 bytes of input and combine it into the hash */
+static inline void
+fasthash_accum(fasthash_state *hs, const char *k, int len)
+{
+   uint32      lower_four;
+
+   Assert(hs->accum == 0);
+   Assert(len <= FH_SIZEOF_ACCUM);
+
+   switch (len)
+   {
+       case 8:
+           memcpy(&hs->accum, k, 8);
+           break;
+       case 7:
+           hs->accum |= (uint64) k[6] << 48;
+           /* FALLTHROUGH */
+       case 6:
+           hs->accum |= (uint64) k[5] << 40;
+           /* FALLTHROUGH */
+       case 5:
+           hs->accum |= (uint64) k[4] << 32;
+           /* FALLTHROUGH */
+       case 4:
+           memcpy(&lower_four, k, sizeof(lower_four));
+           hs->accum |= lower_four;
+           break;
+       case 3:
+           hs->accum |= (uint64) k[2] << 16;
+           /* FALLTHROUGH */
+       case 2:
+           hs->accum |= (uint64) k[1] << 8;
+           /* FALLTHROUGH */
+       case 1:
+           hs->accum |= (uint64) k[0];
+           break;
+       case 0:
+           return;
+   }
+
+   fasthash_combine(hs);
+}
+
+/*
+ * The finalizer
+ *
+ * 'tweak' is intended to be the input length when the caller doesn't know
+ * the length ahead of time, such as for NUL-terminated strings, otherwise
+ * zero.
+ */
+static inline uint64
+fasthash_final64(fasthash_state *hs, uint64 tweak)
+{
+   return fasthash_mix(hs->hash, tweak);
+}
+
+/*
+ * Reduce a 64-bit hash to a 32-bit hash.
+ *
+ * This optional step provides a bit more additional mixing compared to
+ * just taking the lower 32-bits.
+ */
+static inline uint32
+fasthash_reduce32(uint64 h)
+{
+   /*
+    * Convert the 64-bit hashcode to Fermat residue, which shall retain
+    * information from both the higher and lower parts of hashcode.
+    */
+   return h - (h >> 32);
+}
+
+/* finalize and reduce */
+static inline uint32
+fasthash_final32(fasthash_state *hs, uint64 tweak)
+{
+   return fasthash_reduce32(fasthash_final64(hs, tweak));
+}
+
+/*
+ * The original fasthash64 function, re-implemented using the incremental
+ * interface. Returns a 64-bit hashcode. 'len' controls not only how
+ * many bytes to hash, but also modifies the internal seed.
+ * 'seed' can be zero.
+ */
+static inline uint64
+fasthash64(const char *k, int len, uint64 seed)
+{
+   fasthash_state hs;
+
+   fasthash_init(&hs, len, seed);
+
+   while (len >= FH_SIZEOF_ACCUM)
+   {
+       fasthash_accum(&hs, k, FH_SIZEOF_ACCUM);
+       k += FH_SIZEOF_ACCUM;
+       len -= FH_SIZEOF_ACCUM;
+   }
+
+   fasthash_accum(&hs, k, len);
+   return fasthash_final64(&hs, 0);
+}
+
+/* like fasthash64, but returns a 32-bit hashcode */
+static inline uint64
+fasthash32(const char *k, int len, uint64 seed)
+{
+   return fasthash_reduce32(fasthash64(k, len, seed));
+}
+
+#endif                         /* HASHFN_UNSTABLE_H */
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index 9862589f360245a8e92a9679a74b9199121df557..0cb8a58cba7961bf653f038e2536df7a0f2116f8 100644 (file)
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -14,7 +14,7 @@
  #define PGSTAT_INTERNAL_H
  
  
-#include "common/hashfn.h"
+#include "common/hashfn_unstable.h"
  #include "lib/dshash.h"
  #include "lib/ilist.h"
  #include "pgstat.h"
@@ -776,16 +776,10 @@ pgstat_cmp_hash_key(const void *a, const void *b, size_t size, void *arg)
  static inline uint32
  pgstat_hash_hash_key(const void *d, size_t size, void *arg)
  {
-   const PgStat_HashKey *key = (PgStat_HashKey *) d;
-   uint32      hash;
+   const char *key = (const char *) d;
  
     Assert(size == sizeof(PgStat_HashKey) && arg == NULL);
-
-   hash = murmurhash32(key->kind);
-   hash = hash_combine(hash, murmurhash32(key->dboid));
-   hash = hash_combine(hash, murmurhash32(key->objoid));
-
-   return hash;
+   return fasthash32(key, size, 0);
  }
  
  /*
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 17c010437649d194b996c34638575ebf9500e823..aa74ed695eba5d8ff5eef6638a30da79d7ca7f9f 100644 (file)
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3328,6 +3328,7 @@ exec_thread_arg
  execution_state
  explain_get_index_name_hook_type
  f_smgr
+fasthash_state
  fd_set
  fe_scram_state
  fe_scram_state_enum
author	John Naylor <[email protected]>
	Mon, 27 Nov 2023 10:03:38 +0000 (17:03 +0700)
committer	John Naylor <[email protected]>
	Fri, 19 Jan 2024 05:44:09 +0000 (12:44 +0700)
src/include/common/hashfn_unstable.h	[new file with mode: 0644]	\| blob
src/include/utils/pgstat_internal.h		\| blob \| blame \| history
src/tools/pgindent/typedefs.list		\| blob \| blame \| history