--- /dev/null
+/*-------------------------------------------------------------------------
+ *
+ * integerset.c
+ * Data structure to hold a large set of 64-bit integers efficiently
+ *
+ * IntegerSet provides an in-memory data structure to hold a set of
+ * arbitrary 64-bit integers. Internally, the values are stored in a
+ * B-tree, with a special packed representation at the leaf level using
+ * the Simple-8b algorithm, which can pack hold clusters of nearby values
+ * very tightly.
+ *
+ * Memory consumption depends on the number of values stored, but also
+ * on how far the values are from each other. In the best case, with
+ * long runs of consecutive integers, memory consumption can be as low as
+ * 0.1 bytes per integer. In the worst case, if integers are more than
+ * 2^32 apart, it uses about 8 bytes per integer. In typical use, the
+ * consumption per integer is somewhere between those extremes, depending
+ * on the range of integers stored, and how "clustered" they are.
+ *
+ *
+ * Interface
+ * ---------
+ *
+ * intset_create - Create a new empty set.
+ * intset_add_member - Add an integer to the set.
+ * intset_is_member - Test if an integer is in the set
+ * intset_begin_iterate - Begin iterating through all integers in set
+ * intset_iterate_next - Return next integer
+ *
+ * intset_create() creates the set in the current memory context. Note
+ * that there is no function to free an integer set. If you need to do that,
+ * create a dedicated memory context to hold it, and destroy the memory
+ * context instead.
+ *
+ *
+ * Limitations
+ * -----------
+ *
+ * - Values must be added in order. (Random insertions would require
+ * splitting nodes, which hasn't been implemented.)
+ *
+ * - Values cannot be added while iteration is in progress.
+ *
+ * - No support for removing values.
+ *
+ * None of these limitations are fundamental to the data structure, and
+ * could be lifted if needed, by writing some new code. But the current
+ * users of this facility don't need them.
+ *
+ *
+ * References
+ * ----------
+ *
+ * Simple-8b encoding is based on:
+ *
+ * Vo Ngoc Anh , Alistair Moffat, Index compression using 64-bit words,
+ * Software - Practice & Experience, v.40 n.2, p.131-147, February 2010
+ * (https://doi.org/10.1002/spe.948)
+ *
+ *
+ * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ * src/backend/lib/integerset.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "lib/integerset.h"
+#include "port/pg_bitutils.h"
+#include "utils/memutils.h"
+
+
+/*
+ * Maximum number of integers that can be encoded in a single Single-8b
+ * codeword. (Defined here before anything else, so that we can size arrays
+ * using this).
+ */
+#define SIMPLE8B_MAX_VALUES_PER_CODEWORD 240
+
+/*
+ * Parameters for shape of the in-memory B-tree.
+ *
+ * These set the size of each internal and leaf node. They don't necessarily
+ * need to be the same, because the tree is just an in-memory structure.
+ * With the default 64, each node is about 1 kb.
+ *
+ * If you change these, you must recalculate MAX_TREE_LEVELS, too!
+ */
+#define MAX_INTERNAL_ITEMS 64
+#define MAX_LEAF_ITEMS 64
+
+/*
+ * Maximum height of the tree.
+ *
+ * MAX_TREE_ITEMS is calculated from the "fan-out" of the B-tree. The
+ * theoretical maximum number of items that we can store in a set is 2^64,
+ * so MAX_TREE_LEVELS should be set so that:
+ *
+ * MAX_LEAF_ITEMS * MAX_INTERNAL_ITEMS ^ (MAX_TREE_LEVELS - 1) >= 2^64.
+ *
+ * In practice, we'll need far fewer levels, because you will run out of
+ * memory long before reaching that number, but let's be conservative.
+ */
+#define MAX_TREE_LEVELS 11
+
+/*
+ * Node structures, for the in-memory B-tree.
+ *
+ * An internal node holds a number of downlink pointers to leaf nodes, or
+ * to internal nodes on lower level. For each downlink, the key value
+ * corresponding the lower level node is stored in a sorted array. The
+ * stored key values are low keys. In other words, if the downlink has value
+ * X, then all items stored on that child are >= X.
+ *
+ * Each leaf node holds a number of "items", with a varying number of
+ * integers packed into each item. Each item consists of two 64-bit words:
+ * The first word holds first integer stored in the item, in plain format.
+ * The second word contains between 0 and 240 more integers, packed using
+ * Simple-8b encoding. By storing the first integer in plain, unpacked,
+ * format, we can use binary search to quickly find an item that holds (or
+ * would hold) a particular integer. And by storing the rest in packed form,
+ * we still get pretty good memory density, if there are clusters of integers
+ * with similar values.
+ *
+ * Each leaf node also has a pointer to the next leaf node, so that the leaf
+ * nodes can be easily walked from beginning to end, when iterating.
+ */
+typedef struct intset_node intset_node;
+typedef struct intset_leaf_node intset_leaf_node;
+typedef struct intset_internal_node intset_internal_node;
+
+/* Common structure of both leaf and internal nodes. */
+struct intset_node
+{
+ uint16 level;
+ uint16 num_items;
+};
+
+/* Internal node */
+struct intset_internal_node
+{
+ /* common header, must match intset_node */
+ uint16 level; /* >= 1 on internal nodes */
+ uint16 num_items;
+
+ /*
+ * 'values' is an array of key values, and 'downlinks' are pointers to
+ * lower-level nodes, corresponding to the key values.
+ */
+ uint64 values[MAX_INTERNAL_ITEMS];
+ intset_node *downlinks[MAX_INTERNAL_ITEMS];
+};
+
+/* Leaf node */
+typedef struct
+{
+ uint64 first; /* first integer in this item */
+ uint64 codeword; /* simple8b encoded differences from 'first' */
+} leaf_item;
+
+#define MAX_VALUES_PER_LEAF_ITEM (1 + SIMPLE8B_MAX_VALUES_PER_CODEWORD)
+
+struct intset_leaf_node
+{
+ /* common header, must match intset_node */
+ uint16 level; /* 0 on leafs */
+ uint16 num_items;
+
+ intset_leaf_node *next; /* right sibling, if any */
+
+ leaf_item items[MAX_LEAF_ITEMS];
+};
+
+/*
+ * We buffer insertions in a simple array, before packing and inserting them
+ * into the B-tree. MAX_BUFFERED_VALUES sets the size of the buffer. The
+ * encoder assumes that it is large enough, that we can always fill a leaf
+ * item with buffered new items. In other words, MAX_BUFFERED_VALUES must be
+ * larger than MAX_VALUES_PER_LEAF_ITEM. For efficiency, make it much larger.
+ */
+#define MAX_BUFFERED_VALUES (MAX_VALUES_PER_LEAF_ITEM * 2)
+
+/*
+ * IntegerSet is the top-level object representing the set.
+ *
+ * The integers are stored in an in-memory B-tree structure, and an array
+ * for newly-added integers. IntegerSet also tracks information about memory
+ * usage, as well as the current position, when iterating the set with
+ * intset_begin_iterate / intset_iterate_next.
+ */
+struct IntegerSet
+{
+ /*
+ * 'context' is the memory context holding this integer set and all its
+ * tree nodes.
+ *
+ * 'mem_used' tracks the amount of memory used. We don't do anything with
+ * it in integerset.c itself, but the callers can ask for it with
+ * intset_memory_usage().
+ */
+ MemoryContext context;
+ uint64 mem_used;
+
+ uint64 num_entries; /* total # of values in the set */
+ uint64 highest_value; /* highest value stored in this set */
+
+ /*
+ * B-tree to hold the packed values.
+ *
+ * 'rightmost_nodes' hold pointers to the rightmost node on each level.
+ * rightmost_parent[0] is rightmost leaf, rightmost_parent[1] is its
+ * parent, and so forth, all the way up to the root. These are needed when
+ * adding new values. (Currently, we require that new values are added at
+ * the end.)
+ */
+ int num_levels; /* height of the tree */
+ intset_node *root; /* root node */
+ intset_node *rightmost_nodes[MAX_TREE_LEVELS];
+ intset_leaf_node *leftmost_leaf; /* leftmost leaf node */
+
+ /*
+ * Holding area for new items that haven't been inserted to the tree yet.
+ */
+ uint64 buffered_values[MAX_BUFFERED_VALUES];
+ int num_buffered_values;
+
+ /*
+ * Iterator support.
+ *
+ * 'iter_values' is an array of integers ready to be returned to the
+ * caller. 'item_node' and 'item_itemno' point to the leaf node, and item
+ * within the leaf node, to get the next batch of values from.
+ *
+ * Normally, 'iter_values' points 'iter_values_buf', which holds items
+ * decoded from a leaf item. But after we have scanned the whole B-tree,
+ * we iterate through all the unbuffered values, too, by pointing
+ * iter_values to 'buffered_values'.
+ */
+ uint64 *iter_values;
+ int iter_num_values; /* number of elements in 'iter_values' */
+ int iter_valueno; /* index into 'iter_values' */
+ intset_leaf_node *iter_node; /* current leaf node */
+ int iter_itemno; /* next item 'iter_node' to decode */
+
+ uint64 iter_values_buf[MAX_VALUES_PER_LEAF_ITEM];
+};
+
+/*
+ * s for internal functions.
+ */
+static void intset_update_upper(IntegerSet *intset, int level,
+ intset_node *new_node, uint64 new_node_item);
+static void intset_flush_buffered_values(IntegerSet *intset);
+
+static int intset_binsrch_uint64(uint64 value, uint64 *arr, int arr_elems,
+ bool nextkey);
+static int intset_binsrch_leaf(uint64 value, leaf_item *arr, int arr_elems,
+ bool nextkey);
+
+static uint64 simple8b_encode(uint64 *ints, int *num_encoded, uint64 base);
+static int simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base);
+static bool simple8b_contains(uint64 codeword, uint64 key, uint64 base);
+
+
+/*
+ * Create a new, initially empty, integer set.
+ *
+ * The integer set is created in the current memory context.
+ */
+IntegerSet *
+intset_create(void)
+{
+ IntegerSet *intset;
+
+ /*
+ * Allocate the IntegerSet object in the current memory context. Remember
+ * the context, so that we will do all subsequent allocations in the same
+ * context, too, regardless of which memory context is current when new
+ * integers are added to the set.
+ */
+ intset = (IntegerSet *) palloc(sizeof(IntegerSet));
+ intset->context = CurrentMemoryContext;
+ intset->mem_used = GetMemoryChunkSpace(intset);
+
+ intset->num_entries = 0;
+ intset->highest_value = 0;
+
+ intset->num_levels = 0;
+ intset->root = NULL;
+ memset(intset->rightmost_nodes, 0, sizeof(intset->rightmost_nodes));
+ intset->leftmost_leaf = NULL;
+
+ intset->num_buffered_values = 0;
+
+ intset->iter_node = NULL;
+ intset->iter_itemno = 0;
+ intset->iter_valueno = 0;
+ intset->iter_num_values = 0;
+
+ return intset;
+}
+
+/*
+ * Allocate a new node.
+ */
+static intset_internal_node *
+intset_new_internal_node(IntegerSet *intset)
+{
+ intset_internal_node *n;
+
+ n = (intset_internal_node *) MemoryContextAlloc(intset->context,
+ sizeof(intset_internal_node));
+ intset->mem_used += GetMemoryChunkSpace(n);
+
+ n->level = 0; /* caller must set */
+ n->num_items = 0;
+
+ return n;
+}
+
+static intset_leaf_node *
+intset_new_leaf_node(IntegerSet *intset)
+{
+ intset_leaf_node *n;
+
+ n = (intset_leaf_node *) MemoryContextAlloc(intset->context,
+ sizeof(intset_leaf_node));
+ intset->mem_used += GetMemoryChunkSpace(n);
+
+ n->level = 0;
+ n->num_items = 0;
+ n->next = NULL;
+
+ return n;
+}
+
+/*
+ * Return the number of entries in the integer set.
+ */
+uint64
+intset_num_entries(IntegerSet *intset)
+{
+ return intset->num_entries;
+}
+
+/*
+ * Return the amount of memory used by the integer set.
+ */
+uint64
+intset_memory_usage(IntegerSet *intset)
+{
+ return intset->mem_used;
+}
+
+/*
+ * Add a value to the set.
+ *
+ * Values must be added in order.
+ */
+void
+intset_add_member(IntegerSet *intset, uint64 x)
+{
+ if (intset->iter_node)
+ elog(ERROR, "cannot add new values to integer set when iteration is in progress");
+
+ if (x <= intset->highest_value && intset->num_entries > 0)
+ elog(ERROR, "cannot add value to integer set out of order");
+
+ if (intset->num_buffered_values >= MAX_BUFFERED_VALUES)
+ {
+ /* Time to flush our buffer */
+ intset_flush_buffered_values(intset);
+ Assert(intset->num_buffered_values < MAX_BUFFERED_VALUES);
+ }
+
+ /* Add it to the buffer of newly-added values */
+ intset->buffered_values[intset->num_buffered_values] = x;
+ intset->num_buffered_values++;
+ intset->num_entries++;
+ intset->highest_value = x;
+}
+
+/*
+ * Take a batch of buffered values, and pack them into the B-tree.
+ */
+static void
+intset_flush_buffered_values(IntegerSet *intset)
+{
+ uint64 *values = intset->buffered_values;
+ uint64 num_values = intset->num_buffered_values;
+ int num_packed = 0;
+ intset_leaf_node *leaf;
+
+ leaf = (intset_leaf_node *) intset->rightmost_nodes[0];
+
+ /*
+ * If the tree is completely empty, create the first leaf page, which is
+ * also the root.
+ */
+ if (leaf == NULL)
+ {
+ /*
+ * This is the very first item in the set.
+ *
+ * Allocate root node. It's also a leaf.
+ */
+ leaf = intset_new_leaf_node(intset);
+
+ intset->root = (intset_node *) leaf;
+ intset->leftmost_leaf = leaf;
+ intset->rightmost_nodes[0] = (intset_node *) leaf;
+ intset->num_levels = 1;
+ }
+
+ /*
+ * If there are less than MAX_VALUES_PER_LEAF_ITEM values in the buffer,
+ * stop. In most cases, we cannot encode that many values in a single
+ * value, but this way, the encoder doesn't have to worry about running
+ * out of input.
+ */
+ while (num_values - num_packed >= MAX_VALUES_PER_LEAF_ITEM)
+ {
+ leaf_item item;
+ int num_encoded;
+
+ /*
+ * Construct the next leaf item, packing as many buffered values as
+ * possible.
+ */
+ item.first = values[num_packed];
+ item.codeword = simple8b_encode(&values[num_packed + 1],
+ &num_encoded,
+ item.first);
+
+ /*
+ * Add the item to the node, allocating a new node if the old one is
+ * full.
+ */
+ if (leaf->num_items >= MAX_LEAF_ITEMS)
+ {
+ /* Allocate new leaf and link it to the tree */
+ intset_leaf_node *old_leaf = leaf;
+
+ leaf = intset_new_leaf_node(intset);
+ old_leaf->next = leaf;
+ intset->rightmost_nodes[0] = (intset_node *) leaf;
+ intset_update_upper(intset, 1, (intset_node *) leaf, item.first);
+ }
+ leaf->items[leaf->num_items++] = item;
+
+ num_packed += 1 + num_encoded;
+ }
+
+ /*
+ * Move any remaining buffered values to the beginning of the array.
+ */
+ if (num_packed < intset->num_buffered_values)
+ {
+ memmove(&intset->buffered_values[0],
+ &intset->buffered_values[num_packed],
+ (intset->num_buffered_values - num_packed) * sizeof(uint64));
+ }
+ intset->num_buffered_values -= num_packed;
+}
+
+/*
+ * Insert a downlink into parent node, after creating a new node.
+ *
+ * Recurses if the parent node is full, too.
+ */
+static void
+intset_update_upper(IntegerSet *intset, int level, intset_node *child,
+ uint64 child_key)
+{
+ intset_internal_node *parent;
+
+ Assert(level > 0);
+
+ /*
+ * Create a new root node, if necessary.
+ */
+ if (level >= intset->num_levels)
+ {
+ intset_node *oldroot = intset->root;
+ uint64 downlink_key;
+
+ /* MAX_TREE_LEVELS should be more than enough, this shouldn't happen */
+ if (intset->num_levels == MAX_TREE_LEVELS)
+ elog(ERROR, "could not expand integer set, maximum number of levels reached");
+ intset->num_levels++;
+
+ /*
+ * Get the first value on the old root page, to be used as the
+ * downlink.
+ */
+ if (intset->root->level == 0)
+ downlink_key = ((intset_leaf_node *) oldroot)->items[0].first;
+ else
+ downlink_key = ((intset_internal_node *) oldroot)->values[0];
+
+ parent = intset_new_internal_node(intset);
+ parent->level = level;
+ parent->values[0] = downlink_key;
+ parent->downlinks[0] = oldroot;
+ parent->num_items = 1;
+
+ intset->root = (intset_node *) parent;
+ intset->rightmost_nodes[level] = (intset_node *) parent;
+ }
+
+ /*
+ * Place the downlink on the parent page.
+ */
+ parent = (intset_internal_node *) intset->rightmost_nodes[level];
+
+ if (parent->num_items < MAX_INTERNAL_ITEMS)
+ {
+ parent->values[parent->num_items] = child_key;
+ parent->downlinks[parent->num_items] = child;
+ parent->num_items++;
+ }
+ else
+ {
+ /*
+ * Doesn't fit. Allocate new parent, with the downlink as the first
+ * item on it, and recursively insert the downlink to the new parent
+ * to the grandparent.
+ */
+ parent = intset_new_internal_node(intset);
+ parent->level = level;
+ parent->values[0] = child_key;
+ parent->downlinks[0] = child;
+ parent->num_items = 1;
+
+ intset->rightmost_nodes[level] = (intset_node *) parent;
+
+ intset_update_upper(intset, level + 1, (intset_node *) parent, child_key);
+ }
+}
+
+/*
+ * Does the set contain the given value?
+ */
+bool
+intset_is_member(IntegerSet *intset, uint64 x)
+{
+ intset_node *node;
+ intset_leaf_node *leaf;
+ int level;
+ int itemno;
+ leaf_item *item;
+
+ /*
+ * The value might be in the buffer of newly-added values.
+ */
+ if (intset->num_buffered_values > 0 && x >= intset->buffered_values[0])
+ {
+ int itemno;
+
+ itemno = intset_binsrch_uint64(x,
+ intset->buffered_values,
+ intset->num_buffered_values,
+ false);
+ if (itemno >= intset->num_buffered_values)
+ return false;
+ else
+ return intset->buffered_values[itemno] == x;
+ }
+
+ /*
+ * Start from the root, and walk down the B-tree to find the right leaf
+ * node.
+ */
+ if (!intset->root)
+ return false;
+ node = intset->root;
+ for (level = intset->num_levels - 1; level > 0; level--)
+ {
+ intset_internal_node *n = (intset_internal_node *) node;
+
+ Assert(node->level == level);
+
+ itemno = intset_binsrch_uint64(x, n->values, n->num_items, true);
+ if (itemno == 0)
+ return false;
+ node = n->downlinks[itemno - 1];
+ }
+ Assert(node->level == 0);
+ leaf = (intset_leaf_node *) node;
+
+ /*
+ * Binary search the right item on the leaf page
+ */
+ itemno = intset_binsrch_leaf(x, leaf->items, leaf->num_items, true);
+ if (itemno == 0)
+ return false;
+ item = &leaf->items[itemno - 1];
+
+ /* Is this a match to the first value on the item? */
+ if (item->first == x)
+ return true;
+ Assert(x > item->first);
+
+ /* Is it in the packed codeword? */
+ if (simple8b_contains(item->codeword, x, item->first))
+ return true;
+
+ return false;
+}
+
+/*
+ * Begin in-order scan through all the values.
+ *
+ * While the iteration is in-progress, you cannot add new values to the set.
+ */
+void
+intset_begin_iterate(IntegerSet *intset)
+{
+ intset->iter_node = intset->leftmost_leaf;
+ intset->iter_itemno = 0;
+ intset->iter_valueno = 0;
+ intset->iter_num_values = 0;
+ intset->iter_values = intset->iter_values_buf;
+}
+
+/*
+ * Returns the next integer, when iterating.
+ *
+ * intset_begin_iterate() must be called first. intset_iterate_next() returns
+ * the next value in the set. Returns true, if there was another value, and
+ * stores the value in *next. Otherwise, returns false.
+ */
+bool
+intset_iterate_next(IntegerSet *intset, uint64 *next)
+{
+ for (;;)
+ {
+ if (intset->iter_valueno < intset->iter_num_values)
+ {
+ *next = intset->iter_values[intset->iter_valueno++];
+ return true;
+ }
+
+ /* Our queue is empty, decode next leaf item */
+ if (intset->iter_node && intset->iter_itemno < intset->iter_node->num_items)
+ {
+ /* We have reached end of this packed item. Step to the next one. */
+ leaf_item *item;
+ int num_decoded;
+
+ item = &intset->iter_node->items[intset->iter_itemno++];
+
+ intset->iter_values[0] = item->first;
+ num_decoded = simple8b_decode(item->codeword, &intset->iter_values[1], item->first);
+ intset->iter_num_values = num_decoded + 1;
+
+ intset->iter_valueno = 0;
+ continue;
+ }
+
+ /* No more items on this leaf, step to next node */
+ if (intset->iter_node)
+ {
+ /* No more matches on this bucket. Step to the next node. */
+ intset->iter_node = intset->iter_node->next;
+ intset->iter_itemno = 0;
+ intset->iter_valueno = 0;
+ intset->iter_num_values = 0;
+ continue;
+ }
+
+ /*
+ * We have reached the end of the B-tree. But we might still have
+ * some integers in the buffer of newly-added values.
+ */
+ if (intset->iter_values == intset->iter_values_buf)
+ {
+ intset->iter_values = intset->buffered_values;
+ intset->iter_num_values = intset->num_buffered_values;
+ continue;
+ }
+
+ break;
+ }
+
+ /* No more results. */
+ *next = 0;
+ return false;
+}
+
+/*
+ * intset_binsrch_uint64() -- search a sorted array of uint64s
+ *
+ * Returns the first position with key equal or less than the given key.
+ * The returned position would be the "insert" location for the given key,
+ * that is, the position where the new key should be inserted to.
+ *
+ * 'nextkey' affects the behavior on equal keys. If true, and there is an
+ * equal key in the array, this returns the position immediately after the
+ * equal key. If false, this returns the position of the equal key itself.
+ */
+static int
+intset_binsrch_uint64(uint64 item, uint64 *arr, int arr_elems, bool nextkey)
+{
+ int low,
+ high,
+ mid;
+
+ low = 0;
+ high = arr_elems;
+ while (high > low)
+ {
+ mid = low + (high - low) / 2;
+
+ if (nextkey)
+ {
+ if (item >= arr[mid])
+ low = mid + 1;
+ else
+ high = mid;
+ }
+ else
+ {
+ if (item > arr[mid])
+ low = mid + 1;
+ else
+ high = mid;
+ }
+ }
+
+ return low;
+}
+
+/* same, but for an array of leaf items */
+static int
+intset_binsrch_leaf(uint64 item, leaf_item *arr, int arr_elems, bool nextkey)
+{
+ int low,
+ high,
+ mid;
+
+ low = 0;
+ high = arr_elems;
+ while (high > low)
+ {
+ mid = low + (high - low) / 2;
+
+ if (nextkey)
+ {
+ if (item >= arr[mid].first)
+ low = mid + 1;
+ else
+ high = mid;
+ }
+ else
+ {
+ if (item > arr[mid].first)
+ low = mid + 1;
+ else
+ high = mid;
+ }
+ }
+
+ return low;
+}
+
+/*
+ * Simple-8b encoding.
+ *
+ * Simple-8b algorithm packs between 1 and 240 integers into 64-bit words,
+ * called "codewords". The number of integers packed into a single codeword
+ * depends on the integers being packed: small integers are encoded using
+ * fewer bits than large integers. A single codeword can store a single
+ * 60-bit integer, or two 30-bit integers, for example.
+ *
+ * Since we're storing a unique, sorted, set of integers, we actually encode
+ * the *differences* between consecutive integers. That way, clusters of
+ * integers that are close to each other are packed efficiently, regardless
+ * of the absolute values.
+ *
+ * In Simple-8b, each codeword consists of a 4-bit selector, which indicates
+ * how many integers are encoded in the codeword, and the encoded integers
+ * packed into the remaining 60 bits. The selector allows for 16 different
+ * ways of using the remaining 60 bits, "modes". The number of integers
+ * packed into a single codeword is listed in the simple8b_modes table below.
+ * For example, consider the following codeword:
+ *
+ * 20-bit integer 20-bit integer 20-bit integer
+ * 1101 00000000000000010010 01111010000100100000 00000000000000010100
+ * ^
+ * selector
+ *
+ * The selector 1101 is 13 in decimal. From the modes table below, we see
+ * that it means that the codeword encodes three 12-bit integers. In decimal,
+ * those integers are 18, 500000 and 20. Because we encode deltas rather than
+ * absolute values, the actual values that they represent are 18, 500018 and
+ * 500038.
+ *
+ * Modes 0 and 1 are a bit special; they encode a run of 240 or 120 zeros
+ * (which means 240 or 120 consecutive integers, since we're encoding the
+ * the deltas between integers), without using the rest of the codeword bits
+ * for anything.
+ *
+ * Simple-8b cannot encode integers larger than 60 bits. Values larger than
+ * that are always stored in the 'first' field of a leaf item, never in the
+ * packed codeword. If there is a sequence of integers that are more than
+ * 2^60 apart, the codeword will go unused on those items. To represent that,
+ * we use a magic EMPTY_CODEWORD codeword.
+ */
+static const struct
+{
+ uint8 bits_per_int;
+ uint8 num_ints;
+} simple8b_modes[17] =
+{
+ {0, 240}, /* mode 0: 240 zeros */
+ {0, 120}, /* mode 1: 120 zeros */
+ {1, 60}, /* mode 2: sixty 1-bit integers */
+ {2, 30}, /* mode 3: thirty 2-bit integers */
+ {3, 20}, /* mode 4: twenty 3-bit integers */
+ {4, 15}, /* mode 5: fifteen 4-bit integers */
+ {5, 12}, /* mode 6: twelve 5-bit integers */
+ {6, 10}, /* mode 7: ten 6-bit integers */
+ {7, 8}, /* mode 8: eight 7-bit integers (four bits
+ * are wasted) */
+ {8, 7}, /* mode 9: seven 8-bit integers (four bits
+ * are wasted) */
+ {10, 6}, /* mode 10: six 10-bit integers */
+ {12, 5}, /* mode 11: five 12-bit integers */
+ {15, 4}, /* mode 12: four 15-bit integers */
+ {20, 3}, /* mode 13: three 20-bit integers */
+ {30, 2}, /* mode 14: two 30-bit integers */
+ {60, 1}, /* mode 15: one 60-bit integer */
+ {0, 0} /* sentinel value */
+};
+
+/*
+ * EMPTY_CODEWORD is a special value, used to indicate "no values".
+ * It is used if the next value is too large to be encoded with Simple-8b.
+ *
+ * This value looks like a 0-mode codeword, but we check for it
+ * specifically. (In a real 0-mode codeword, all the unused bits are zero.)
+ */
+#define EMPTY_CODEWORD (0xFFFFFFFFFFFFFFF0)
+
+/*
+ * Encode a number of integers into a Simple-8b codeword.
+ *
+ * Returns the number of integers that were encoded.
+ */
+static uint64
+simple8b_encode(uint64 *ints, int *num_encoded, uint64 base)
+{
+ int selector;
+ int nints;
+ int bits;
+ uint64 diff;
+ uint64 last_val;
+ uint64 codeword;
+ uint64 diffs[60];
+ int i;
+
+ Assert(ints[0] > base);
+
+ /*
+ * Select the "mode" to use for the next codeword.
+ *
+ * In each iteration, check if the next value can be represented in the
+ * current mode we're considering. If it's too large, then step up the
+ * mode to a wider one, and repeat. If it fits, move on to the next
+ * integer. Repeat until the codeword is full, given the current mode.
+ *
+ * Note that we don't have any way to represent unused slots in the
+ * codeword, so we require each codeword to be "full".
+ */
+ selector = 0;
+ nints = simple8b_modes[0].num_ints;
+ bits = simple8b_modes[0].bits_per_int;
+ diff = ints[0] - base - 1;
+ last_val = ints[0];
+ i = 0;
+ for (;;)
+ {
+ if (diff >= (1L << bits))
+ {
+ /* too large, step up to next mode */
+ selector++;
+ nints = simple8b_modes[selector].num_ints;
+ bits = simple8b_modes[selector].bits_per_int;
+ if (i >= nints)
+ break;
+ }
+ else
+ {
+ if (i < 60)
+ diffs[i] = diff;
+ i++;
+ if (i >= nints)
+ break;
+
+ Assert(ints[i] > last_val);
+ diff = ints[i] - last_val - 1;
+ last_val = ints[i];
+ }
+ }
+
+ if (nints == 0)
+ {
+ /* The next value is too large and be encoded with Simple-8b */
+ Assert(i == 0);
+ *num_encoded = 0;
+ return EMPTY_CODEWORD;
+ }
+
+ /*
+ * Encode the integers using the selected mode. Note that we shift them
+ * into the codeword in reverse order, so that they will come out in the
+ * correct order in the decoder.
+ */
+ codeword = 0;
+ if (bits > 0)
+ {
+ for (i = nints - 1; i >= 0; i--)
+ {
+ codeword <<= bits;
+ codeword |= diffs[i];
+ }
+ }
+
+ /* add selector to the codeword, and return */
+ codeword <<= 4;
+ codeword |= selector;
+
+ *num_encoded = nints;
+ return codeword;
+}
+
+/*
+ * Decode a codeword into an array of integers.
+ */
+static int
+simple8b_decode(uint64 codeword, uint64 *decoded, uint64 base)
+{
+ int selector = codeword & 0x0f;
+ int nints = simple8b_modes[selector].num_ints;
+ uint64 bits = simple8b_modes[selector].bits_per_int;
+ uint64 mask = (1L << bits) - 1;
+ uint64 prev_value;
+
+ if (codeword == EMPTY_CODEWORD)
+ return 0;
+
+ codeword >>= 4; /* shift out the selector */
+
+ prev_value = base;
+ for (int i = 0; i < nints; i++)
+ {
+ uint64 diff = codeword & mask;
+
+ decoded[i] = prev_value + 1L + diff;
+ prev_value = decoded[i];
+ codeword >>= bits;
+ }
+ return nints;
+}
+
+/*
+ * This is very similar to simple8b_decode(), but instead of decoding all
+ * the values to an array, it just checks if the given integer is part of
+ * the codeword.
+ */
+static bool
+simple8b_contains(uint64 codeword, uint64 key, uint64 base)
+{
+ int selector = codeword & 0x0f;
+ int nints = simple8b_modes[selector].num_ints;
+ int bits = simple8b_modes[selector].bits_per_int;
+
+ if (codeword == EMPTY_CODEWORD)
+ return false;
+
+ codeword >>= 4; /* shift out the selector */
+
+ if (bits == 0)
+ {
+ /* Special handling for 0-bit cases. */
+ return key - base <= nints;
+ }
+ else
+ {
+ int mask = (1L << bits) - 1;
+ uint64 prev_value;
+
+ prev_value = base;
+ for (int i = 0; i < nints; i++)
+ {
+ uint64 diff = codeword & mask;
+ uint64 curr_value;
+
+ curr_value = prev_value + 1L + diff;
+
+ if (curr_value >= key)
+ {
+ if (curr_value == key)
+ return true;
+ else
+ return false;
+ }
+
+ codeword >>= bits;
+ prev_value = curr_value;
+ }
+ }
+ return false;
+}
--- /dev/null
+/*--------------------------------------------------------------------------
+ *
+ * test_integerset.c
+ * Test integer set data structure.
+ *
+ * Copyright (c) 2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/test/modules/test_integerset/test_integerset.c
+ *
+ * -------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "fmgr.h"
+#include "lib/integerset.h"
+#include "nodes/bitmapset.h"
+#include "utils/memutils.h"
+#include "utils/timestamp.h"
+#include "storage/block.h"
+#include "storage/itemptr.h"
+#include "miscadmin.h"
+
+/*
+ * If you enable this, the "pattern" tests will print information about
+ * how long populating, probing, and iterating the test set takes, and
+ * how much memory the test set consumed. That can be used as
+ * micro-benchmark of various operations and input patterns (you might
+ * want to increase the number of values used in each of the test, if
+ * you do that, to reduce noise)
+ *
+ * The information is printed to the server's stderr, mostly because
+ * that's where MemoryContextStats() output goes.
+ */
+static const bool intset_test_stats = false;
+
+PG_MODULE_MAGIC;
+
+PG_FUNCTION_INFO_V1(test_integerset);
+
+/*
+ * A struct to define a pattern of integers, for use with test_pattern()
+ * function.
+ */
+typedef struct
+{
+ char *test_name; /* short name of the test, for humans */
+ char *pattern_str; /* a bit pattern */
+ uint64 spacing; /* pattern repeats at this interval */
+ uint64 num_values; /* number of integers to set in total */
+} test_spec;
+
+static const test_spec test_specs[] = {
+ {
+ "all ones", "1111111111",
+ 10, 10000000
+ },
+ {
+ "alternating bits", "0101010101",
+ 10, 10000000
+ },
+ {
+ "clusters of ten", "1111111111",
+ 10000, 10000000
+ },
+ {
+ "clusters of hundred",
+ "1111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111",
+ 10000, 100000000
+ },
+ {
+ "one-every-64k", "1",
+ 65536, 10000000
+ },
+ {
+ "sparse", "100000000000000000000000000000001",
+ 10000000, 10000000
+ },
+ {
+ "single values, distance > 2^32", "1",
+ 10000000000L, 1000000
+ },
+ {
+ "clusters, distance > 2^32", "10101010",
+ 10000000000L, 10000000
+ },
+ {
+ "clusters, distance > 2^60", "10101010",
+ 2000000000000000000L,
+ 23 /* can't be much higher than this, or we
+ * overflow uint64 */
+ }
+};
+
+static void test_pattern(const test_spec *spec);
+static void test_empty(void);
+static void test_single_value(uint64 value);
+static void check_with_filler(IntegerSet *intset, uint64 x, uint64 value, uint64 filler_min, uint64 filler_max);
+static void test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max);
+static void test_huge_distances(void);
+
+/*
+ * SQL-callable entry point to perform all tests.
+ */
+Datum
+test_integerset(PG_FUNCTION_ARGS)
+{
+ MemoryContext test_ctx;
+
+ test_ctx = AllocSetContextCreate(CurrentMemoryContext,
+ "test_integerset context",
+ ALLOCSET_DEFAULT_SIZES);
+
+ /* Tests for various corner cases */
+ test_empty();
+ test_huge_distances();
+ test_single_value(0);
+ test_single_value(1);
+ test_single_value(PG_UINT64_MAX - 1);
+ test_single_value(PG_UINT64_MAX);
+ test_single_value_and_filler(0, 1000, 2000);
+ test_single_value_and_filler(1, 1000, 2000);
+ test_single_value_and_filler(1, 1000, 2000000);
+ test_single_value_and_filler(PG_UINT64_MAX - 1, 1000, 2000);
+ test_single_value_and_filler(PG_UINT64_MAX, 1000, 2000);
+
+ /* Test different test patterns, with lots of entries */
+ for (int i = 0; i < lengthof(test_specs); i++)
+ {
+ MemoryContextReset(test_ctx);
+ test_pattern(&test_specs[i]);
+ }
+
+ MemoryContextDelete(test_ctx);
+
+ PG_RETURN_VOID();
+}
+
+/*
+ * Test with a repeating pattern, defined by the 'spec'.
+ */
+static void
+test_pattern(const test_spec *spec)
+{
+ IntegerSet *intset;
+ MemoryContext intset_ctx;
+ MemoryContext old_ctx;
+ TimestampTz starttime;
+ TimestampTz endtime;
+ uint64 n;
+ uint64 last_int;
+ int patternlen;
+ uint64 *pattern_values;
+ uint64 pattern_num_values;
+
+ elog(NOTICE, "testing intset with pattern \"%s\"", spec->test_name);
+ if (intset_test_stats)
+ fprintf(stderr, "-----\ntesting intset with pattern \"%s\"\n", spec->test_name);
+
+ /* Pre-process the pattern, creating an array of integers from it. */
+ patternlen = strlen(spec->pattern_str);
+ pattern_values = palloc(patternlen * sizeof(uint64));
+ pattern_num_values = 0;
+ for (int i = 0; i < patternlen; i++)
+ {
+ if (spec->pattern_str[i] == '1')
+ pattern_values[pattern_num_values++] = i;
+ }
+
+ /*
+ * Allocate the integer set.
+ *
+ * Allocate it in a separate memory context, so that we can print its
+ * memory usage easily. (intset_create() creates a memory context of its
+ * own, too, but we don't have direct access to it, so we cannot call
+ * MemoryContextStats() on it directly).
+ */
+ intset_ctx = AllocSetContextCreate(CurrentMemoryContext,
+ "intset test",
+ ALLOCSET_SMALL_SIZES);
+ MemoryContextSetIdentifier(intset_ctx, spec->test_name);
+ old_ctx = MemoryContextSwitchTo(intset_ctx);
+ intset = intset_create();
+ MemoryContextSwitchTo(old_ctx);
+
+ /*
+ * Add values to the set.
+ */
+ starttime = GetCurrentTimestamp();
+
+ n = 0;
+ last_int = 0;
+ while (n < spec->num_values)
+ {
+ uint64 x = 0;
+
+ for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
+ {
+ x = last_int + pattern_values[i];
+
+ intset_add_member(intset, x);
+ n++;
+ }
+ last_int += spec->spacing;
+ }
+
+ endtime = GetCurrentTimestamp();
+
+ if (intset_test_stats)
+ fprintf(stderr, "added %lu values in %lu ms\n",
+ spec->num_values, (endtime - starttime) / 1000);
+
+ /*
+ * Print stats on the amount of memory used.
+ *
+ * We print the usage reported by intset_memory_usage(), as well as the
+ * stats from the memory context. They should be in the same ballpark,
+ * but it's hard to automate testing that, so if you're making changes to
+ * the implementation, just observe that manually.
+ */
+ if (intset_test_stats)
+ {
+ uint64 mem_usage;
+
+ /*
+ * Also print memory usage as reported by intset_memory_usage(). It
+ * should be in the same ballpark as the usage reported by
+ * MemoryContextStats().
+ */
+ mem_usage = intset_memory_usage(intset);
+ fprintf(stderr, "intset_memory_usage() reported %lu (%0.2f bytes / integer)\n",
+ mem_usage, (double) mem_usage / spec->num_values);
+
+ MemoryContextStats(intset_ctx);
+ }
+
+ /* Check that intset_get_num_entries works */
+ n = intset_num_entries(intset);
+ if (n != spec->num_values)
+ elog(ERROR, "intset_num_entries returned %lu, expected %lu", n, spec->num_values);
+
+ /*
+ * Test random-access probes with intset_is_member()
+ */
+ starttime = GetCurrentTimestamp();
+
+ for (n = 0; n < 100000; n++)
+ {
+ bool b;
+ bool expected;
+ uint64 x;
+
+ /*
+ * Pick next value to probe at random. We limit the probes to the
+ * last integer that we added to the set, plus an arbitrary constant
+ * (1000). There's no point in probing the whole 0 - 2^64 range, if
+ * only a small part of the integer space is used. We would very
+ * rarely hit hit values that are actually in the set.
+ */
+ x = (pg_lrand48() << 31) | pg_lrand48();
+ x = x % (last_int + 1000);
+
+ /* Do we expect this value to be present in the set? */
+ if (x >= last_int)
+ expected = false;
+ else
+ {
+ uint64 idx = x % spec->spacing;
+
+ if (idx >= patternlen)
+ expected = false;
+ else if (spec->pattern_str[idx] == '1')
+ expected = true;
+ else
+ expected = false;
+ }
+
+ /* Is it present according to intset_is_member() ? */
+ b = intset_is_member(intset, x);
+
+ if (b != expected)
+ elog(ERROR, "mismatch at %lu: %d vs %d", x, b, expected);
+ }
+ endtime = GetCurrentTimestamp();
+ if (intset_test_stats)
+ fprintf(stderr, "probed %lu values in %lu ms\n", n, (endtime - starttime) / 1000);
+
+ /*
+ * Test iterator
+ */
+ starttime = GetCurrentTimestamp();
+
+ intset_begin_iterate(intset);
+ n = 0;
+ last_int = 0;
+ while (n < spec->num_values)
+ {
+ for (int i = 0; i < pattern_num_values && n < spec->num_values; i++)
+ {
+ uint64 expected = last_int + pattern_values[i];
+ uint64 x;
+
+ if (!intset_iterate_next(intset, &x))
+ break;
+
+ if (x != expected)
+ elog(ERROR, "iterate returned wrong value; got %lu, expected %lu", x, expected);
+ n++;
+ }
+ last_int += spec->spacing;
+ }
+ endtime = GetCurrentTimestamp();
+ if (intset_test_stats)
+ fprintf(stderr, "iterated %lu values in %lu ms\n", n, (endtime - starttime) / 1000);
+
+ if (n < spec->num_values)
+ elog(ERROR, "iterator stopped short after %lu entries, expected %lu", n, spec->num_values);
+ if (n > spec->num_values)
+ elog(ERROR, "iterator returned %lu entries, %lu was expected", n, spec->num_values);
+
+ MemoryContextDelete(intset_ctx);
+}
+
+/*
+ * Test with a set containing a single integer.
+ */
+static void
+test_single_value(uint64 value)
+{
+ IntegerSet *intset;
+ uint64 x;
+ uint64 num_entries;
+ bool found;
+
+ elog(NOTICE, "testing intset with single value %lu", value);
+
+ /* Create the set. */
+ intset = intset_create();
+ intset_add_member(intset, value);
+
+ /* Test intset_get_num_entries() */
+ num_entries = intset_num_entries(intset);
+ if (num_entries != 1)
+ elog(ERROR, "intset_num_entries returned %lu, expected %lu", num_entries, 1L);
+
+ /*
+ * Test intset_is_member() at various special values, like 0 and and
+ * maximum possible 64-bit integer, as well as the value itself.
+ */
+ if (intset_is_member(intset, 0) != (value == 0))
+ elog(ERROR, "intset_is_member failed for 0");
+ if (intset_is_member(intset, 1) != (value == 1))
+ elog(ERROR, "intset_is_member failed for 1");
+ if (intset_is_member(intset, PG_UINT64_MAX) != (value == PG_UINT64_MAX))
+ elog(ERROR, "intset_is_member failed for PG_UINT64_MAX");
+ if (intset_is_member(intset, value) != true)
+ elog(ERROR, "intset_is_member failed for the tested value");
+
+ /*
+ * Test iterator
+ */
+ intset_begin_iterate(intset);
+ found = intset_iterate_next(intset, &x);
+ if (!found || x != value)
+ elog(ERROR, "intset_iterate_next failed for %lu", x);
+
+ found = intset_iterate_next(intset, &x);
+ if (found)
+ elog(ERROR, "intset_iterate_next failed %lu", x);
+}
+
+/*
+ * Test with an integer set that contains:
+ *
+ * - a given single 'value', and
+ * - all integers between 'filler_min' and 'filler_max'.
+ *
+ * This exercises different codepaths than testing just with a single value,
+ * because the implementation buffers newly-added values. If we add just
+ * single value to the set, we won't test the internal B-tree code at all,
+ * just the code that deals with the buffer.
+ */
+static void
+test_single_value_and_filler(uint64 value, uint64 filler_min, uint64 filler_max)
+{
+ IntegerSet *intset;
+ uint64 x;
+ bool found;
+ uint64 *iter_expected;
+ uint64 n = 0;
+ uint64 num_entries = 0;
+ uint64 mem_usage;
+
+ elog(NOTICE, "testing intset with value %lu, and all between %lu and %lu",
+ value, filler_min, filler_max);
+
+ intset = intset_create();
+
+ iter_expected = palloc(sizeof(uint64) * (filler_max - filler_min + 1));
+ if (value < filler_min)
+ {
+ intset_add_member(intset, value);
+ iter_expected[n++] = value;
+ }
+
+ for (x = filler_min; x < filler_max; x++)
+ {
+ intset_add_member(intset, x);
+ iter_expected[n++] = x;
+ }
+
+ if (value >= filler_max)
+ {
+ intset_add_member(intset, value);
+ iter_expected[n++] = value;
+ }
+
+ /* Test intset_get_num_entries() */
+ num_entries = intset_num_entries(intset);
+ if (num_entries != n)
+ elog(ERROR, "intset_num_entries returned %lu, expected %lu", num_entries, n);
+
+ /*
+ * Test intset_is_member() at various spots, at and around the values that
+ * we expect to be set, as well as 0 and the maximum possible value.
+ */
+ check_with_filler(intset, 0,
+ value, filler_min, filler_max);
+ check_with_filler(intset, 1,
+ value, filler_min, filler_max);
+ check_with_filler(intset, filler_min - 1,
+ value, filler_min, filler_max);
+ check_with_filler(intset, filler_min,
+ value, filler_min, filler_max);
+ check_with_filler(intset, filler_min + 1,
+ value, filler_min, filler_max);
+ check_with_filler(intset, value - 1,
+ value, filler_min, filler_max);
+ check_with_filler(intset, value,
+ value, filler_min, filler_max);
+ check_with_filler(intset, value + 1,
+ value, filler_min, filler_max);
+ check_with_filler(intset, filler_max - 1,
+ value, filler_min, filler_max);
+ check_with_filler(intset, filler_max,
+ value, filler_min, filler_max);
+ check_with_filler(intset, filler_max + 1,
+ value, filler_min, filler_max);
+ check_with_filler(intset, PG_UINT64_MAX - 1,
+ value, filler_min, filler_max);
+ check_with_filler(intset, PG_UINT64_MAX,
+ value, filler_min, filler_max);
+
+ intset_begin_iterate(intset);
+ for (uint64 i = 0; i < n; i++)
+ {
+ found = intset_iterate_next(intset, &x);
+ if (!found || x != iter_expected[i])
+ elog(ERROR, "intset_iterate_next failed for %lu", x);
+ }
+ found = intset_iterate_next(intset, &x);
+ if (found)
+ elog(ERROR, "intset_iterate_next failed %lu", x);
+
+ mem_usage = intset_memory_usage(intset);
+ if (mem_usage < 5000 || mem_usage > 500000000)
+ elog(ERROR, "intset_memory_usage() reported suspicous value: %lu", mem_usage);
+}
+
+/*
+ * Helper function for test_single_value_and_filler.
+ *
+ * Calls intset_is_member() for value 'x', and checks that the result is what
+ * we expect.
+ */
+static void
+check_with_filler(IntegerSet *intset, uint64 x,
+ uint64 value, uint64 filler_min, uint64 filler_max)
+{
+ bool expected;
+ bool actual;
+
+ expected = (x == value || (filler_min <= x && x < filler_max));
+
+ actual = intset_is_member(intset, x);
+
+ if (actual != expected)
+ elog(ERROR, "intset_is_member failed for %lu", x);
+}
+
+/*
+ * Test empty set
+ */
+static void
+test_empty(void)
+{
+ IntegerSet *intset;
+ uint64 x;
+
+ elog(NOTICE, "testing intset with empty set");
+
+ intset = intset_create();
+
+ /* Test intset_is_member() */
+ if (intset_is_member(intset, 0) != false)
+ elog(ERROR, "intset_is_member on empty set returned true");
+ if (intset_is_member(intset, 1) != false)
+ elog(ERROR, "intset_is_member on empty set returned true");
+ if (intset_is_member(intset, PG_UINT64_MAX) != false)
+ elog(ERROR, "intset_is_member on empty set returned true");
+
+ /* Test iterator */
+ intset_begin_iterate(intset);
+ if (intset_iterate_next(intset, &x))
+ elog(ERROR, "intset_iterate_next on empty set returned a value (%lu)", x);
+}
+
+/*
+ * Test with integers that are more than 2^60 apart.
+ *
+ * The Simple-8b encoding used by the set implementation can only encode
+ * values up to 2^60. That makes large differences like this interesting
+ * to test.
+ */
+static void
+test_huge_distances(void)
+{
+ IntegerSet *intset;
+ uint64 values[1000];
+ int num_values = 0;
+ uint64 val = 0;
+ bool found;
+ uint64 x;
+
+ elog(NOTICE, "testing intset with distances > 2^60 between values");
+
+ val = 0;
+ values[num_values++] = val;
+
+ val += 1152921504606846976L - 1; /* 2^60 - 1 */
+ values[num_values++] = val;
+
+ val += 1152921504606846976L - 1; /* 2^60 - 1 */
+ values[num_values++] = val;
+
+ val += 1152921504606846976L; /* 2^60 */
+ values[num_values++] = val;
+
+ val += 1152921504606846976L; /* 2^60 */
+ values[num_values++] = val;
+
+ val += 1152921504606846976L; /* 2^60 */
+ values[num_values++] = val;
+
+ val += 1152921504606846976L + 1; /* 2^60 + 1 */
+ values[num_values++] = val;
+
+ val += 1152921504606846976L + 1; /* 2^60 + 1 */
+ values[num_values++] = val;
+
+ val += 1152921504606846976L + 1; /* 2^60 + 1 */
+ values[num_values++] = val;
+
+ val += 1152921504606846976L; /* 2^60 */
+ values[num_values++] = val;
+
+ /* we're now very close to 2^64, so can't add large values anymore */
+
+ intset = intset_create();
+
+ /*
+ * Add many more values to the end, to make sure that all the above values
+ * get flushed and packed into the tree structure.
+ */
+ while (num_values < 1000)
+ {
+ val += pg_lrand48();
+ values[num_values++] = val;
+ }
+
+ /* Add these numbers to the set */
+ for (int i = 0; i < num_values; i++)
+ intset_add_member(intset, values[i]);
+
+ /*
+ * Test iterset_is_member() around each of these values
+ */
+ for (int i = 0; i < num_values; i++)
+ {
+ uint64 x = values[i];
+ bool result;
+
+ if (x > 0)
+ {
+ result = intset_is_member(intset, x - 1);
+ if (result != false)
+ elog(ERROR, "intset_is_member failed for %lu", x - 1);
+ }
+
+ result = intset_is_member(intset, x);
+ if (result != true)
+ elog(ERROR, "intset_is_member failed for %lu", x);
+
+ result = intset_is_member(intset, x + 1);
+ if (result != false)
+ elog(ERROR, "intset_is_member failed for %lu", x + 1);
+ }
+
+ /*
+ * Test iterator
+ */
+ intset_begin_iterate(intset);
+ for (int i = 0; i < num_values; i++)
+ {
+ found = intset_iterate_next(intset, &x);
+ if (!found || x != values[i])
+ elog(ERROR, "intset_iterate_next failed for %lu", x);
+ }
+ found = intset_iterate_next(intset, &x);
+ if (found)
+ elog(ERROR, "intset_iterate_next failed %lu", x);
+}