*
* A bitmap set can represent any set of nonnegative integers, although
* it is mainly intended for sets where the maximum value is not large,
- * say at most a few hundred. By convention, we always represent the
- * empty set by a NULL pointer.
+ * say at most a few hundred. By convention, we always represent a set with
+ * the minimum possible number of words, i.e, there are never any trailing
+ * zero words. Enforcing this requires that an empty set is represented as
+ * NULL. Because an empty Bitmapset is represented as NULL, a non-NULL
+ * Bitmapset always has at least 1 Bitmapword. We can exploit this fact to
+ * speed up various loops over the Bitmapset's words array by using "do while"
+ * loops instead of "for" loops. This means the code does not waste time
+ * checking the loop condition before the first iteration. For Bitmapsets
+ * containing only a single word (likely the majority of them) this halves the
+ * number of loop condition checks.
*
*
* Copyright (c) 2003-2023, PostgreSQL Global Development Group
#error "invalid BITS_PER_BITMAPWORD"
#endif
-static bool bms_is_empty_internal(const Bitmapset *a);
-
/*
* bms_copy - make a palloc'd copy of a bitmapset
}
/*
- * bms_equal - are two bitmapsets equal?
- *
- * This is logical not physical equality; in particular, a NULL pointer will
- * be reported as equal to a palloc'd value containing no members.
+ * bms_equal - are two bitmapsets equal? or both NULL?
*/
bool
bms_equal(const Bitmapset *a, const Bitmapset *b)
{
- const Bitmapset *shorter;
- const Bitmapset *longer;
- int shortlen;
- int longlen;
int i;
+ Assert(a == NULL || a->words[a->nwords - 1] != 0);
+ Assert(b == NULL || b->words[b->nwords - 1] != 0);
+
/* Handle cases where either input is NULL */
if (a == NULL)
{
}
else if (b == NULL)
return false;
- /* Identify shorter and longer input */
- if (a->nwords <= b->nwords)
- {
- shorter = a;
- longer = b;
- }
- else
- {
- shorter = b;
- longer = a;
- }
- /* And process */
- shortlen = shorter->nwords;
- for (i = 0; i < shortlen; i++)
- {
- if (shorter->words[i] != longer->words[i])
- return false;
- }
- longlen = longer->nwords;
- for (; i < longlen; i++)
+
+ /* can't be equal if the word counts don't match */
+ if (a->nwords != b->nwords)
+ return false;
+
+ /* check each word matches */
+ i = 0;
+ do
{
- if (longer->words[i] != 0)
+ if (a->words[i] != b->words[i])
return false;
- }
+ } while (++i < a->nwords);
+
return true;
}
int
bms_compare(const Bitmapset *a, const Bitmapset *b)
{
- int shortlen;
int i;
+ Assert(a == NULL || a->words[a->nwords - 1] != 0);
+ Assert(b == NULL || b->words[b->nwords - 1] != 0);
+
/* Handle cases where either input is NULL */
if (a == NULL)
return (b == NULL) ? 0 : -1;
else if (b == NULL)
return +1;
- /* Handle cases where one input is longer than the other */
- shortlen = Min(a->nwords, b->nwords);
- for (i = shortlen; i < a->nwords; i++)
- {
- if (a->words[i] != 0)
- return +1;
- }
- for (i = shortlen; i < b->nwords; i++)
- {
- if (b->words[i] != 0)
- return -1;
- }
- /* Process words in common */
- i = shortlen;
- while (--i >= 0)
+
+ /* the set with the most words must be greater */
+ if (a->nwords != b->nwords)
+ return (a->nwords > b->nwords) ? +1 : -1;
+
+ i = a->nwords - 1;
+ do
{
bitmapword aw = a->words[i];
bitmapword bw = b->words[i];
if (aw != bw)
return (aw > bw) ? +1 : -1;
- }
+ } while (--i >= 0);
return 0;
}
}
/* And union the shorter input into the result */
otherlen = other->nwords;
- for (i = 0; i < otherlen; i++)
+ i = 0;
+ do
+ {
result->words[i] |= other->words[i];
+ } while (++i < otherlen);
return result;
}
{
Bitmapset *result;
const Bitmapset *other;
+ int lastnonzero;
int resultlen;
int i;
}
/* And intersect the longer input with the result */
resultlen = result->nwords;
- for (i = 0; i < resultlen; i++)
+ lastnonzero = -1;
+ i = 0;
+ do
+ {
result->words[i] &= other->words[i];
+
+ if (result->words[i] != 0)
+ lastnonzero = i;
+ } while (++i < resultlen);
/* If we computed an empty result, we must return NULL */
- if (bms_is_empty_internal(result))
+ if (lastnonzero == -1)
{
pfree(result);
return NULL;
}
+
+ /* get rid of trailing zero words */
+ result->nwords = lastnonzero + 1;
return result;
}
bms_difference(const Bitmapset *a, const Bitmapset *b)
{
Bitmapset *result;
- int shortlen;
int i;
+ Assert(a == NULL || a->words[a->nwords - 1] != 0);
+ Assert(b == NULL || b->words[b->nwords - 1] != 0);
+
/* Handle cases where either input is NULL */
if (a == NULL)
return NULL;
/* Copy the left input */
result = bms_copy(a);
+
/* And remove b's bits from result */
- shortlen = Min(a->nwords, b->nwords);
- for (i = 0; i < shortlen; i++)
- result->words[i] &= ~b->words[i];
+ if (result->nwords > b->nwords)
+ {
+ /*
+ * We'll never need to remove trailing zero words when 'a' has more
+ * words than 'b' as the additional words must be non-zero.
+ */
+ i = 0;
+ do
+ {
+ result->words[i] &= ~b->words[i];
+ } while (++i < b->nwords);
+ }
+ else
+ {
+ int lastnonzero = -1;
+
+ /* we may need to remove trailing zero words from the result. */
+ i = 0;
+ do
+ {
+ result->words[i] &= ~b->words[i];
+
+ /* remember the last non-zero word */
+ if (result->words[i] != 0)
+ lastnonzero = i;
+ } while (++i < result->nwords);
+
+ /* trim off trailing zero words */
+ result->nwords = lastnonzero + 1;
+ }
+ Assert(result->nwords != 0);
+
/* Need not check for empty result, since we handled that case above */
return result;
}
bool
bms_is_subset(const Bitmapset *a, const Bitmapset *b)
{
- int shortlen;
- int longlen;
int i;
+ Assert(a == NULL || a->words[a->nwords - 1] != 0);
+ Assert(b == NULL || b->words[b->nwords - 1] != 0);
+
/* Handle cases where either input is NULL */
if (a == NULL)
return true; /* empty set is a subset of anything */
if (b == NULL)
return false;
- /* Check common words */
- shortlen = Min(a->nwords, b->nwords);
- for (i = 0; i < shortlen; i++)
+
+ /* 'a' can't be a subset of 'b' if it contains more words */
+ if (a->nwords > b->nwords)
+ return false;
+
+ /* Check all 'a' members are set in 'b' */
+ i = 0;
+ do
{
if ((a->words[i] & ~b->words[i]) != 0)
return false;
- }
- /* Check extra words */
- if (a->nwords > b->nwords)
- {
- longlen = a->nwords;
- for (; i < longlen; i++)
- {
- if (a->words[i] != 0)
- return false;
- }
- }
+ } while (++i < a->nwords);
return true;
}
{
BMS_Comparison result;
int shortlen;
- int longlen;
int i;
+ Assert(a == NULL || a->words[a->nwords - 1] != 0);
+ Assert(b == NULL || b->words[b->nwords - 1] != 0);
+
/* Handle cases where either input is NULL */
if (a == NULL)
{
/* Check common words */
result = BMS_EQUAL; /* status so far */
shortlen = Min(a->nwords, b->nwords);
- for (i = 0; i < shortlen; i++)
+ i = 0;
+ do
{
bitmapword aword = a->words[i];
bitmapword bword = b->words[i];
return BMS_DIFFERENT;
result = BMS_SUBSET1;
}
- }
+ } while (++i < shortlen);
/* Check extra words */
if (a->nwords > b->nwords)
{
- longlen = a->nwords;
- for (; i < longlen; i++)
- {
- if (a->words[i] != 0)
- {
- /* a is not a subset of b */
- if (result == BMS_SUBSET1)
- return BMS_DIFFERENT;
- result = BMS_SUBSET2;
- }
- }
+ /* if a has more words then a is not a subset of b */
+ if (result == BMS_SUBSET1)
+ return BMS_DIFFERENT;
+ return BMS_SUBSET2;
}
else if (a->nwords < b->nwords)
{
- longlen = b->nwords;
- for (; i < longlen; i++)
- {
- if (b->words[i] != 0)
- {
- /* b is not a subset of a */
- if (result == BMS_SUBSET2)
- return BMS_DIFFERENT;
- result = BMS_SUBSET1;
- }
- }
+ /* if b has more words then b is not a subset of a */
+ if (result == BMS_SUBSET2)
+ return BMS_DIFFERENT;
+ return BMS_SUBSET1;
}
return result;
}
return false;
/* Check words in common */
shortlen = Min(a->nwords, b->nwords);
- for (i = 0; i < shortlen; i++)
+ i = 0;
+ do
{
if ((a->words[i] & b->words[i]) != 0)
return true;
- }
+ } while (++i < shortlen);
return false;
}
bool
bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
{
- int shortlen;
int i;
+ Assert(a == NULL || a->words[a->nwords - 1] != 0);
+ Assert(b == NULL || b->words[b->nwords - 1] != 0);
+
/* Handle cases where either input is NULL */
if (a == NULL)
return false;
if (b == NULL)
return true;
- /* Check words in common */
- shortlen = Min(a->nwords, b->nwords);
- for (i = 0; i < shortlen; i++)
+ /* if 'a' has more words then it must contain additional members */
+ if (a->nwords > b->nwords)
+ return true;
+ /* Check all 'a' members are set in 'b' */
+ i = 0;
+ do
{
if ((a->words[i] & ~b->words[i]) != 0)
return true;
- }
- /* Check extra words in a */
- for (; i < a->nwords; i++)
- {
- if (a->words[i] != 0)
- return true;
- }
+ } while (++i < a->nwords);
return false;
}
if (a == NULL)
elog(ERROR, "bitmapset is empty");
nwords = a->nwords;
- for (wordnum = 0; wordnum < nwords; wordnum++)
+ wordnum = 0;
+ do
{
bitmapword w = a->words[wordnum];
result = wordnum * BITS_PER_BITMAPWORD;
result += bmw_rightmost_one_pos(w);
}
- }
- if (result < 0)
- elog(ERROR, "bitmapset is empty");
+ } while (++wordnum < nwords);
+
+ /* we don't expect non-NULL sets to be empty */
+ Assert(result >= 0);
return result;
}
if (a == NULL)
return false;
nwords = a->nwords;
- for (wordnum = 0; wordnum < nwords; wordnum++)
+ wordnum = 0;
+ do
{
bitmapword w = a->words[wordnum];
result = wordnum * BITS_PER_BITMAPWORD;
result += bmw_rightmost_one_pos(w);
}
- }
- if (result < 0)
- return false;
+ } while (++wordnum < nwords);
+
+ /* we don't expect non-NULL sets to be empty */
+ Assert(result >= 0);
*member = result;
return true;
}
if (a == NULL)
return 0;
nwords = a->nwords;
- for (wordnum = 0; wordnum < nwords; wordnum++)
+ wordnum = 0;
+ do
{
bitmapword w = a->words[wordnum];
/* No need to count the bits in a zero word */
if (w != 0)
result += bmw_popcount(w);
- }
+ } while (++wordnum < nwords);
return result;
}
if (a == NULL)
return BMS_EMPTY_SET;
nwords = a->nwords;
- for (wordnum = 0; wordnum < nwords; wordnum++)
+ wordnum = 0;
+ do
{
bitmapword w = a->words[wordnum];
return BMS_MULTIPLE;
result = BMS_SINGLETON;
}
- }
+ } while (++wordnum < nwords);
return result;
}
-/*
- * bms_is_empty_internal - is a set empty?
- *
- * This is now used only locally, to detect cases where a function has
- * computed an empty set that we must now get rid of. Hence, we can
- * assume the input isn't NULL.
- */
-static bool
-bms_is_empty_internal(const Bitmapset *a)
-{
- int nwords;
- int wordnum;
-
- nwords = a->nwords;
- for (wordnum = 0; wordnum < nwords; wordnum++)
- {
- bitmapword w = a->words[wordnum];
-
- if (w != 0)
- return false;
- }
- return true;
-}
-
/*
* These operations all "recycle" their non-const inputs, ie, either
a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(wordnum + 1));
a->nwords = wordnum + 1;
/* zero out the enlarged portion */
- for (i = oldnwords; i < a->nwords; i++)
+ i = oldnwords;
+ do
+ {
a->words[i] = 0;
+ } while (++i < a->nwords);
}
a->words[wordnum] |= ((bitmapword) 1 << bitnum);
return NULL;
wordnum = WORDNUM(x);
bitnum = BITNUM(x);
- if (wordnum < a->nwords)
- a->words[wordnum] &= ~((bitmapword) 1 << bitnum);
- /* If we computed an empty result, we must return NULL */
- if (bms_is_empty_internal(a))
+
+ /* member can't exist. Return 'a' unmodified */
+ if (unlikely(wordnum >= a->nwords))
+ return a;
+
+ a->words[wordnum] &= ~((bitmapword) 1 << bitnum);
+
+ /* when last word becomes empty, trim off all trailing empty words */
+ if (a->words[wordnum] == 0 && wordnum == a->nwords - 1)
{
+ /* find the last non-empty word and make that the new final word */
+ for (int i = wordnum - 1; i >= 0; i--)
+ {
+ if (a->words[i] != 0)
+ {
+ a->nwords = i + 1;
+ return a;
+ }
+ }
+
+ /* the set is now empty */
pfree(a);
return NULL;
}
}
/* And union the shorter input into the result */
otherlen = other->nwords;
- for (i = 0; i < otherlen; i++)
+ i = 0;
+ do
+ {
result->words[i] |= other->words[i];
+ } while (++i < otherlen);
if (result != a)
pfree(a);
return result;
a = (Bitmapset *) repalloc(a, BITMAPSET_SIZE(uwordnum + 1));
a->nwords = uwordnum + 1;
/* zero out the enlarged portion */
- for (i = oldnwords; i < a->nwords; i++)
+ i = oldnwords;
+ do
+ {
a->words[i] = 0;
+ } while (++i < a->nwords);
}
wordnum = lwordnum = WORDNUM(lower);
Bitmapset *
bms_int_members(Bitmapset *a, const Bitmapset *b)
{
+ int lastnonzero;
int shortlen;
int i;
}
/* Intersect b into a; we need never copy */
shortlen = Min(a->nwords, b->nwords);
- for (i = 0; i < shortlen; i++)
+ lastnonzero = -1;
+ i = 0;
+ do
+ {
a->words[i] &= b->words[i];
- for (; i < a->nwords; i++)
- a->words[i] = 0;
+
+ if (a->words[i] != 0)
+ lastnonzero = i;
+ } while (++i < shortlen);
+
/* If we computed an empty result, we must return NULL */
- if (bms_is_empty_internal(a))
+ if (lastnonzero == -1)
{
pfree(a);
return NULL;
}
+
+ /* get rid of trailing zero words */
+ a->nwords = lastnonzero + 1;
return a;
}
Bitmapset *
bms_del_members(Bitmapset *a, const Bitmapset *b)
{
- int shortlen;
int i;
+ Assert(a == NULL || a->words[a->nwords - 1] != 0);
+ Assert(b == NULL || b->words[b->nwords - 1] != 0);
+
/* Handle cases where either input is NULL */
if (a == NULL)
return NULL;
if (b == NULL)
return a;
/* Remove b's bits from a; we need never copy */
- shortlen = Min(a->nwords, b->nwords);
- for (i = 0; i < shortlen; i++)
- a->words[i] &= ~b->words[i];
- /* If we computed an empty result, we must return NULL */
- if (bms_is_empty_internal(a))
+ if (a->nwords > b->nwords)
{
- pfree(a);
- return NULL;
+ /*
+ * We'll never need to remove trailing zero words when 'a' has more
+ * words than 'b'.
+ */
+ i = 0;
+ do
+ {
+ a->words[i] &= ~b->words[i];
+ } while (++i < b->nwords);
+ }
+ else
+ {
+ int lastnonzero = -1;
+
+ /* we may need to remove trailing zero words from the result. */
+ i = 0;
+ do
+ {
+ a->words[i] &= ~b->words[i];
+
+ /* remember the last non-zero word */
+ if (a->words[i] != 0)
+ lastnonzero = i;
+ } while (++i < a->nwords);
+
+ /* check if 'a' has become empty */
+ if (lastnonzero == -1)
+ {
+ pfree(a);
+ return NULL;
+ }
+
+ /* trim off any trailing zero words */
+ a->nwords = lastnonzero + 1;
}
+
return a;
}
}
/* And union the shorter input into the result */
otherlen = other->nwords;
- for (i = 0; i < otherlen; i++)
+ i = 0;
+ do
+ {
result->words[i] |= other->words[i];
+ } while (++i < otherlen);
if (other != result) /* pure paranoia */
pfree(other);
return result;
/*
* bms_hash_value - compute a hash key for a Bitmapset
- *
- * Note: we must ensure that any two bitmapsets that are bms_equal() will
- * hash to the same value; in practice this means that trailing all-zero
- * words must not affect the result. Hence we strip those before applying
- * hash_any().
*/
uint32
bms_hash_value(const Bitmapset *a)
{
- int lastword;
-
if (a == NULL)
return 0; /* All empty sets hash to 0 */
- for (lastword = a->nwords; --lastword >= 0;)
- {
- if (a->words[lastword] != 0)
- break;
- }
- if (lastword < 0)
- return 0; /* All empty sets hash to 0 */
return DatumGetUInt32(hash_any((const unsigned char *) a->words,
- (lastword + 1) * sizeof(bitmapword)));
+ a->nwords * sizeof(bitmapword)));
}
/*