}
}
+/*
+ * Build an ExprState that calls the given hash function(s) on the attnums
+ * given by 'keyColIdx' . When numCols > 1, the hash values returned by each
+ * hash function are combined to produce a single hash value.
+ *
+ * desc: tuple descriptor for the to-be-hashed columns
+ * ops: TupleTableSlotOps to use for the give TupleDesc
+ * hashfunctions: FmgrInfos for each hash function to call, one per numCols.
+ * These are used directly in the returned ExprState so must remain allocated.
+ * collations: collation to use when calling the hash function.
+ * numCols: array length of hashfunctions, collations and keyColIdx.
+ * parent: PlanState node that the resulting ExprState will be evaluated at
+ * init_value: Normally 0, but can be set to other values to seed the hash
+ * with. Non-zero is marginally slower, so best to only use if it's provably
+ * worthwhile.
+ */
+ExprState *
+ExecBuildHash32FromAttrs(TupleDesc desc, const TupleTableSlotOps *ops,
+ FmgrInfo *hashfunctions, Oid *collations,
+ int numCols, AttrNumber *keyColIdx,
+ PlanState *parent, uint32 init_value)
+{
+ ExprState *state = makeNode(ExprState);
+ ExprEvalStep scratch = {0};
+ NullableDatum *iresult = NULL;
+ intptr_t opcode;
+ AttrNumber last_attnum = 0;
+
+ Assert(numCols >= 0);
+
+ state->parent = parent;
+
+ /*
+ * Make a place to store intermediate hash values between subsequent
+ * hashing of individual columns. We only need this if there is more than
+ * one column to hash or an initial value plus one column.
+ */
+ if ((int64) numCols + (init_value != 0) > 1)
+ iresult = palloc(sizeof(NullableDatum));
+
+ /* find the highest attnum so we deform the tuple to that point */
+ for (int i = 0; i < numCols; i++)
+ last_attnum = Max(last_attnum, keyColIdx[i]);
+
+ scratch.opcode = EEOP_INNER_FETCHSOME;
+ scratch.d.fetch.last_var = last_attnum;
+ scratch.d.fetch.fixed = false;
+ scratch.d.fetch.kind = ops;
+ scratch.d.fetch.known_desc = desc;
+ if (ExecComputeSlotInfo(state, &scratch))
+ ExprEvalPushStep(state, &scratch);
+
+ if (init_value == 0)
+ {
+ /*
+ * No initial value, so we can assign the result of the hash function
+ * for the first attribute without having to concern ourselves with
+ * combining the result with any initial value.
+ */
+ opcode = EEOP_HASHDATUM_FIRST;
+ }
+ else
+ {
+ /*
+ * Set up operation to set the initial value. Normally we store this
+ * in the intermediate hash value location, but if there are no
+ * columns to hash, store it in the ExprState's result field.
+ */
+ scratch.opcode = EEOP_HASHDATUM_SET_INITVAL;
+ scratch.d.hashdatum_initvalue.init_value = UInt32GetDatum(init_value);
+ scratch.resvalue = numCols > 0 ? &iresult->value : &state->resvalue;
+ scratch.resnull = numCols > 0 ? &iresult->isnull : &state->resnull;
+
+ ExprEvalPushStep(state, &scratch);
+
+ /*
+ * When using an initial value use the NEXT32 ops as the FIRST ops
+ * would overwrite the stored initial value.
+ */
+ opcode = EEOP_HASHDATUM_NEXT32;
+ }
+
+ for (int i = 0; i < numCols; i++)
+ {
+ FmgrInfo *finfo;
+ FunctionCallInfo fcinfo;
+ Oid inputcollid = collations[i];
+ AttrNumber attnum = keyColIdx[i] - 1;
+
+ finfo = &hashfunctions[i];
+ fcinfo = palloc0(SizeForFunctionCallInfo(1));
+
+ /* Initialize function call parameter structure too */
+ InitFunctionCallInfoData(*fcinfo, finfo, 1, inputcollid, NULL, NULL);
+
+ /*
+ * Fetch inner Var for this attnum and store it in the 1st arg of the
+ * hash func.
+ */
+ scratch.opcode = EEOP_INNER_VAR;
+ scratch.resvalue = &fcinfo->args[0].value;
+ scratch.resnull = &fcinfo->args[0].isnull;
+ scratch.d.var.attnum = attnum;
+ scratch.d.var.vartype = TupleDescAttr(desc, attnum)->atttypid;
+
+ ExprEvalPushStep(state, &scratch);
+
+ /* Call the hash function */
+ scratch.opcode = opcode;
+
+ if (i == numCols - 1)
+ {
+ /*
+ * The result for hashing the final column is stored in the
+ * ExprState.
+ */
+ scratch.resvalue = &state->resvalue;
+ scratch.resnull = &state->resnull;
+ }
+ else
+ {
+ Assert(iresult != NULL);
+
+ /* intermediate values are stored in an intermediate result */
+ scratch.resvalue = &iresult->value;
+ scratch.resnull = &iresult->isnull;
+ }
+
+ /*
+ * NEXT32 opcodes need to look at the intermediate result. We might
+ * as well just set this for all ops. FIRSTs won't look at it.
+ */
+ scratch.d.hashdatum.iresult = iresult;
+
+ scratch.d.hashdatum.finfo = finfo;
+ scratch.d.hashdatum.fcinfo_data = fcinfo;
+ scratch.d.hashdatum.fn_addr = finfo->fn_addr;
+ scratch.d.hashdatum.jumpdone = -1;
+
+ ExprEvalPushStep(state, &scratch);
+
+ /* subsequent attnums must be combined with the previous */
+ opcode = EEOP_HASHDATUM_NEXT32;
+ }
+
+ scratch.resvalue = NULL;
+ scratch.resnull = NULL;
+ scratch.opcode = EEOP_DONE;
+ ExprEvalPushStep(state, &scratch);
+
+ ExecReadyExpr(state);
+
+ return state;
+}
+
/*
* Build an ExprState that calls the given hash function(s) on the given
* 'hash_exprs'. When multiple expressions are present, the hash values
Size hash_mem_limit;
MemoryContext oldcontext;
bool allow_jit;
+ uint32 hash_iv = 0;
Assert(nbuckets > 0);
hashtable->numCols = numCols;
hashtable->keyColIdx = keyColIdx;
- hashtable->tab_hash_funcs = hashfunctions;
hashtable->tab_collations = collations;
hashtable->tablecxt = tablecxt;
hashtable->tempcxt = tempcxt;
hashtable->entrysize = entrysize;
hashtable->tableslot = NULL; /* will be made on first lookup */
hashtable->inputslot = NULL;
- hashtable->in_hash_funcs = NULL;
+ hashtable->in_hash_expr = NULL;
hashtable->cur_eq_func = NULL;
/*
* underestimated.
*/
if (use_variable_hash_iv)
- hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
- else
- hashtable->hash_iv = 0;
+ hash_iv = murmurhash32(ParallelWorkerNumber);
hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
*/
allow_jit = metacxt != tablecxt;
+ /* build hash ExprState for all columns */
+ hashtable->tab_hash_expr = ExecBuildHash32FromAttrs(inputDesc,
+ &TTSOpsMinimalTuple,
+ hashfunctions,
+ collations,
+ numCols,
+ keyColIdx,
+ allow_jit ? parent : NULL,
+ hash_iv);
+
/* build comparator for all columns */
/* XXX: should we support non-minimal tuples for the inputslot? */
hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
/* set up data needed by hash and match functions */
hashtable->inputslot = slot;
- hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+ hashtable->in_hash_expr = hashtable->tab_hash_expr;
hashtable->cur_eq_func = hashtable->tab_eq_func;
local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL);
uint32 hash;
hashtable->inputslot = slot;
- hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+ hashtable->in_hash_expr = hashtable->tab_hash_expr;
/* Need to run the hash functions in short-lived context */
oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
/* set up data needed by hash and match functions */
hashtable->inputslot = slot;
- hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
+ hashtable->in_hash_expr = hashtable->tab_hash_expr;
hashtable->cur_eq_func = hashtable->tab_eq_func;
entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash);
* created if there's not a match. This is similar to the non-creating
* case of LookupTupleHashEntry, except that it supports cross-type
* comparisons, in which the given tuple is not of the same type as the
- * table entries. The caller must provide the hash functions to use for
- * the input tuple, as well as the equality functions, since these may be
+ * table entries. The caller must provide the hash ExprState to use for
+ * the input tuple, as well as the equality ExprState, since these may be
* different from the table's internal functions.
*/
TupleHashEntry
FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
ExprState *eqcomp,
- FmgrInfo *hashfunctions)
+ ExprState *hashexpr)
{
TupleHashEntry entry;
MemoryContext oldContext;
/* Set up data needed by hash and match functions */
hashtable->inputslot = slot;
- hashtable->in_hash_funcs = hashfunctions;
+ hashtable->in_hash_expr = hashexpr;
hashtable->cur_eq_func = eqcomp;
/* Search the hash table */
* copied into the table.
*
* Also, the caller must select an appropriate memory context for running
- * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
+ * the hash functions.
*/
static uint32
TupleHashTableHash_internal(struct tuplehash_hash *tb,
const MinimalTuple tuple)
{
TupleHashTable hashtable = (TupleHashTable) tb->private_data;
- int numCols = hashtable->numCols;
- AttrNumber *keyColIdx = hashtable->keyColIdx;
- uint32 hashkey = hashtable->hash_iv;
+ uint32 hashkey;
TupleTableSlot *slot;
- FmgrInfo *hashfunctions;
- int i;
+ bool isnull;
if (tuple == NULL)
{
/* Process the current input tuple for the table */
- slot = hashtable->inputslot;
- hashfunctions = hashtable->in_hash_funcs;
+ hashtable->exprcontext->ecxt_innertuple = hashtable->inputslot;
+ hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->in_hash_expr,
+ hashtable->exprcontext,
+ &isnull));
}
else
{
* (this case never actually occurs due to the way simplehash.h is
* used, as the hash-value is stored in the entries)
*/
- slot = hashtable->tableslot;
+ slot = hashtable->exprcontext->ecxt_innertuple = hashtable->tableslot;
ExecStoreMinimalTuple(tuple, slot, false);
- hashfunctions = hashtable->tab_hash_funcs;
- }
-
- for (i = 0; i < numCols; i++)
- {
- AttrNumber att = keyColIdx[i];
- Datum attr;
- bool isNull;
-
- /* combine successive hashkeys by rotating */
- hashkey = pg_rotate_left32(hashkey, 1);
-
- attr = slot_getattr(slot, att, &isNull);
-
- if (!isNull) /* treat nulls as having hash key 0 */
- {
- uint32 hkey;
-
- hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
- hashtable->tab_collations[i],
- attr));
- hashkey ^= hkey;
- }
+ hashkey = DatumGetUInt32(ExecEvalExpr(hashtable->tab_hash_expr,
+ hashtable->exprcontext,
+ &isnull));
}
/*
- * The way hashes are combined above, among each other and with the IV,
- * doesn't lead to good bit perturbation. As the IV's goal is to lead to
- * achieve that, perform a round of hashing of the combined hash -
- * resulting in near perfect perturbation.
+ * The hashing done above, even with an initial value, doesn't tend to
+ * result in good hash perturbation. Running the value produced above
+ * through murmurhash32 leads to near perfect hash perturbation.
*/
return murmurhash32(hashkey);
}
FindTupleHashEntry(node->hashtable,
slot,
node->cur_eq_comp,
- node->lhs_hash_funcs) != NULL)
+ node->lhs_hash_expr) != NULL)
{
ExecClearTuple(slot);
return BoolGetDatum(true);
sstate->tab_eq_funcoids = NULL;
sstate->tab_hash_funcs = NULL;
sstate->tab_collations = NULL;
- sstate->lhs_hash_funcs = NULL;
sstate->cur_eq_funcs = NULL;
/*
TupleDesc tupDescRight;
Oid *cross_eq_funcoids;
TupleTableSlot *slot;
+ FmgrInfo *lhs_hash_funcs;
List *oplist,
*lefttlist,
*righttlist;
sstate->tab_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid));
sstate->tab_collations = (Oid *) palloc(ncols * sizeof(Oid));
sstate->tab_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
- sstate->lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
+ lhs_hash_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
sstate->cur_eq_funcs = (FmgrInfo *) palloc(ncols * sizeof(FmgrInfo));
/* we'll need the cross-type equality fns below, but not in sstate */
cross_eq_funcoids = (Oid *) palloc(ncols * sizeof(Oid));
&left_hashfn, &right_hashfn))
elog(ERROR, "could not find hash function for hash operator %u",
opexpr->opno);
- fmgr_info(left_hashfn, &sstate->lhs_hash_funcs[i - 1]);
+ fmgr_info(left_hashfn, &lhs_hash_funcs[i - 1]);
fmgr_info(right_hashfn, &sstate->tab_hash_funcs[i - 1]);
/* Set collation */
sstate->planstate,
NULL);
+ /* Build the ExprState for generating hash values */
+ sstate->lhs_hash_expr = ExecBuildHash32FromAttrs(tupDescLeft,
+ &TTSOpsVirtual,
+ lhs_hash_funcs,
+ sstate->tab_collations,
+ sstate->numCols,
+ sstate->keyColIdx,
+ parent,
+ 0);
+
/*
* Create comparator for lookups of rows in the table (potentially
* cross-type comparisons).
extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
TupleTableSlot *slot,
ExprState *eqcomp,
- FmgrInfo *hashfunctions);
+ ExprState *hashexpr);
extern void ResetTupleHashTable(TupleHashTable hashtable);
/*
extern List *ExecInitExprList(List *nodes, PlanState *parent);
extern ExprState *ExecBuildAggTrans(AggState *aggstate, struct AggStatePerPhaseData *phase,
bool doSort, bool doHash, bool nullcheck);
+extern ExprState *ExecBuildHash32FromAttrs(TupleDesc desc,
+ const TupleTableSlotOps *ops,
+ FmgrInfo *hashfunctions,
+ Oid *collations,
+ int numCols,
+ AttrNumber *keyColIdx,
+ PlanState *parent,
+ uint32 init_value);
extern ExprState *ExecBuildHash32Expr(TupleDesc desc,
const TupleTableSlotOps *ops,
const Oid *hashfunc_oids,
*
* All-in-memory tuple hash tables are used for a number of purposes.
*
- * Note: tab_hash_funcs are for the key datatype(s) stored in the table,
- * and tab_eq_func are non-cross-type equality operators for those types.
- * Normally these are the only functions used, but FindTupleHashEntry()
- * supports searching a hashtable using cross-data-type hashing. For that,
- * the caller must supply hash functions for the LHS datatype as well as
- * the cross-type equality operators to use. in_hash_funcs and cur_eq_func
- * are set to point to the caller's function arrays while doing such a search.
- * During LookupTupleHashEntry(), they point to tab_hash_funcs and
- * tab_eq_func respectively.
+ * Note: tab_hash_expr is for hashing the key datatype(s) stored in the table,
+ * and tab_eq_func is a non-cross-type ExprState for equality checks on those
+ * types. Normally these are the only ExprStates used, but
+ * FindTupleHashEntry() supports searching a hashtable using cross-data-type
+ * hashing. For that, the caller must supply an ExprState to hash the LHS
+ * datatype as well as the cross-type equality ExprState to use. in_hash_expr
+ * and cur_eq_func are set to point to the caller's hash and equality
+ * ExprStates while doing such a search. During LookupTupleHashEntry(), they
+ * point to tab_hash_expr and tab_eq_func respectively.
* ----------------------------------------------------------------
*/
typedef struct TupleHashEntryData *TupleHashEntry;
tuplehash_hash *hashtab; /* underlying hash table */
int numCols; /* number of columns in lookup key */
AttrNumber *keyColIdx; /* attr numbers of key columns */
- FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */
+ ExprState *tab_hash_expr; /* ExprState for hashing table datatype(s) */
ExprState *tab_eq_func; /* comparator for table datatype(s) */
Oid *tab_collations; /* collations for hash and comparison */
MemoryContext tablecxt; /* memory context containing table */
TupleTableSlot *tableslot; /* slot for referencing table entries */
/* The following fields are set transiently for each table search: */
TupleTableSlot *inputslot; /* current input tuple's slot */
- FmgrInfo *in_hash_funcs; /* hash functions for input datatype(s) */
+ ExprState *in_hash_expr; /* ExprState for hashing input datatype(s) */
ExprState *cur_eq_func; /* comparator for input vs. table */
- uint32 hash_iv; /* hash-function IV */
ExprContext *exprcontext; /* expression context */
} TupleHashTableData;
* datatype(s) */
Oid *tab_collations; /* collations for hash and comparison */
FmgrInfo *tab_hash_funcs; /* hash functions for table datatype(s) */
- FmgrInfo *lhs_hash_funcs; /* hash functions for lefthand datatype(s) */
+ ExprState *lhs_hash_expr; /* hash expr for lefthand datatype(s) */
FmgrInfo *cur_eq_funcs; /* equality functions for LHS vs. table */
ExprState *cur_eq_comp; /* equality comparator for LHS vs. table */
} SubPlanState;