Skip to content

Commit 7f5b198

Browse files
Support connection load balancing in libpq
This adds support for load balancing connections with libpq using a connection parameter: load_balance_hosts=<string>. When setting the param to random, hosts and addresses will be connected to in random order. This then results in load balancing across these addresses and hosts when multiple clients or frequent connection setups are used. The randomization employed performs two levels of shuffling: 1. The given hosts are randomly shuffled, before resolving them one-by-one. 2. Once a host its addresses get resolved, the returned addresses are shuffled, before trying to connect to them one-by-one. Author: Jelte Fennema <[email protected]> Reviewed-by: Aleksander Alekseev <[email protected]> Reviewed-by: Michael Banck <[email protected]> Reviewed-by: Andrey Borodin <[email protected]> Discussion: https://postgr.es/m/PR3PR83MB04768E2FF04818EEB2179949F7A69@PR3PR83MB0476.EURPRD83.prod.outlook.
1 parent 44d85ba commit 7f5b198

File tree

10 files changed

+431
-3
lines changed

10 files changed

+431
-3
lines changed

‎.cirrus.yml

+15-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ env:
2525
MTEST_ARGS: --print-errorlogs --no-rebuild -C build
2626
PGCTLTIMEOUT: 120 # avoids spurious failures during parallel tests
2727
TEMP_CONFIG: ${CIRRUS_WORKING_DIR}/src/tools/ci/pg_ci_base.conf
28-
PG_TEST_EXTRA: kerberos ldap ssl
28+
PG_TEST_EXTRA: kerberos ldap ssl load_balance
2929

3030

3131
# What files to preserve in case tests fail
@@ -313,6 +313,14 @@ task:
313313
mkdir -m 770 /tmp/cores
314314
chown root:postgres /tmp/cores
315315
sysctl kernel.core_pattern='/tmp/cores/%e-%s-%p.core'
316+
317+
setup_hosts_file_script: |
318+
cat >> /etc/hosts <<-EOF
319+
127.0.0.1 pg-loadbalancetest
320+
127.0.0.2 pg-loadbalancetest
321+
127.0.0.3 pg-loadbalancetest
322+
EOF
323+
316324
setup_additional_packages_script: |
317325
#apt-get update
318326
#DEBIAN_FRONTEND=noninteractive apt-get -y install ...
@@ -564,6 +572,12 @@ task:
564572
setup_additional_packages_script: |
565573
REM choco install -y --no-progress ...
566574
575+
setup_hosts_file_script: |
576+
echo 127.0.0.1 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts
577+
echo 127.0.0.2 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts
578+
echo 127.0.0.3 pg-loadbalancetest >> c:\Windows\System32\Drivers\etc\hosts
579+
type c:\Windows\System32\Drivers\etc\hosts
580+
567581
# Use /DEBUG:FASTLINK to avoid high memory usage during linking
568582
configure_script: |
569583
vcvarsall x64

‎doc/src/sgml/libpq.sgml

+61
Original file line numberDiff line numberDiff line change
@@ -2115,6 +2115,67 @@ postgresql://%2Fvar%2Flib%2Fpostgresql/dbname
21152115
</para>
21162116
</listitem>
21172117
</varlistentry>
2118+
2119+
<varlistentry id="libpq-connect-load-balance-hosts" xreflabel="load_balance_hosts">
2120+
<term><literal>load_balance_hosts</literal></term>
2121+
<listitem>
2122+
<para>
2123+
Controls the order in which the client tries to connect to the available
2124+
hosts and addresses. Once a connection attempt is successful no other
2125+
hosts and addresses will be tried. This parameter is typically used in
2126+
combination with multiple host names or a DNS record that returns
2127+
multiple IPs. This parameter can be used in combination with
2128+
<xref linkend="libpq-connect-target-session-attrs"/>
2129+
to, for example, load balance over standby servers only. Once successfully
2130+
connected, subsequent queries on the returned connection will all be
2131+
sent to the same server. There are currently two modes:
2132+
<variablelist>
2133+
<varlistentry>
2134+
<term><literal>disable</literal> (default)</term>
2135+
<listitem>
2136+
<para>
2137+
No load balancing across hosts is performed. Hosts are tried in
2138+
the order in which they are provided and addresses are tried in
2139+
the order they are received from DNS or a hosts file.
2140+
</para>
2141+
</listitem>
2142+
</varlistentry>
2143+
2144+
<varlistentry>
2145+
<term><literal>random</literal></term>
2146+
<listitem>
2147+
<para>
2148+
Hosts and addresses are tried in random order. This value is mostly
2149+
useful when opening multiple connections at the same time, possibly
2150+
from different machines. This way connections can be load balanced
2151+
across multiple <productname>PostgreSQL</productname> servers.
2152+
</para>
2153+
<para>
2154+
While random load balancing, due to its random nature, will almost
2155+
never result in a completely uniform distribution, it statistically
2156+
gets quite close. One important aspect here is that this algorithm
2157+
uses two levels of random choices: First the hosts
2158+
will be resolved in random order. Then secondly, before resolving
2159+
the next host, all resolved addresses for the current host will be
2160+
tried in random order. This behaviour can skew the amount of
2161+
connections each node gets greatly in certain cases, for instance
2162+
when some hosts resolve to more addresses than others. But such a
2163+
skew can also be used on purpose, e.g. to increase the number of
2164+
connections a larger server gets by providing its hostname multiple
2165+
times in the host string.
2166+
</para>
2167+
<para>
2168+
When using this value it's recommended to also configure a reasonable
2169+
value for <xref linkend="libpq-connect-connect-timeout"/>. Because then,
2170+
if one of the nodes that are used for load balancing is not responding,
2171+
a new node will be tried.
2172+
</para>
2173+
</listitem>
2174+
</varlistentry>
2175+
</variablelist>
2176+
</para>
2177+
</listitem>
2178+
</varlistentry>
21182179
</variablelist>
21192180
</para>
21202181
</sect2>

‎doc/src/sgml/regress.sgml

+12-1
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,7 @@ make check-world -j8 >/dev/null
256256
<varname>PG_TEST_EXTRA</varname> to a whitespace-separated list, for
257257
example:
258258
<programlisting>
259-
make check-world PG_TEST_EXTRA='kerberos ldap ssl'
259+
make check-world PG_TEST_EXTRA='kerberos ldap ssl load_balance'
260260
</programlisting>
261261
The following values are currently supported:
262262
<variablelist>
@@ -290,6 +290,17 @@ make check-world PG_TEST_EXTRA='kerberos ldap ssl'
290290
</listitem>
291291
</varlistentry>
292292

293+
<varlistentry>
294+
<term><literal>load_balance</literal></term>
295+
<listitem>
296+
<para>
297+
Runs the test <filename>src/interfaces/libpq/t/004_load_balance_dns.pl</filename>.
298+
This requires editing the system <filename>hosts</filename> file and
299+
opens TCP/IP listen sockets.
300+
</para>
301+
</listitem>
302+
</varlistentry>
303+
293304
<varlistentry>
294305
<term><literal>wal_consistency_checking</literal></term>
295306
<listitem>

‎src/interfaces/libpq/fe-connect.c

+103
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ static int ldapServiceLookup(const char *purl, PQconninfoOption *options,
123123
#define DefaultChannelBinding "disable"
124124
#endif
125125
#define DefaultTargetSessionAttrs "any"
126+
#define DefaultLoadBalanceHosts "disable"
126127
#ifdef USE_SSL
127128
#define DefaultSSLMode "prefer"
128129
#define DefaultSSLCertMode "allow"
@@ -351,6 +352,11 @@ static const internalPQconninfoOption PQconninfoOptions[] = {
351352
"Target-Session-Attrs", "", 15, /* sizeof("prefer-standby") = 15 */
352353
offsetof(struct pg_conn, target_session_attrs)},
353354

355+
{"load_balance_hosts", "PGLOADBALANCEHOSTS",
356+
DefaultLoadBalanceHosts, NULL,
357+
"Load-Balance-Hosts", "", 8, /* sizeof("disable") = 8 */
358+
offsetof(struct pg_conn, load_balance_hosts)},
359+
354360
/* Terminating entry --- MUST BE LAST */
355361
{NULL, NULL, NULL, NULL,
356362
NULL, NULL, 0}
@@ -435,6 +441,8 @@ static void pgpassfileWarning(PGconn *conn);
435441
static void default_threadlock(int acquire);
436442
static bool sslVerifyProtocolVersion(const char *version);
437443
static bool sslVerifyProtocolRange(const char *min, const char *max);
444+
static bool parse_int_param(const char *value, int *result, PGconn *conn,
445+
const char *context);
438446

439447

440448
/* global variable because fe-auth.c needs to access it */
@@ -1020,6 +1028,31 @@ parse_comma_separated_list(char **startptr, bool *more)
10201028
return p;
10211029
}
10221030

1031+
/*
1032+
* Initializes the prng_state field of the connection. We want something
1033+
* unpredictable, so if possible, use high-quality random bits for the
1034+
* seed. Otherwise, fall back to a seed based on the connection address,
1035+
* timestamp and PID.
1036+
*/
1037+
static void
1038+
libpq_prng_init(PGconn *conn)
1039+
{
1040+
uint64 rseed;
1041+
struct timeval tval = {0};
1042+
1043+
if (pg_prng_strong_seed(&conn->prng_state))
1044+
return;
1045+
1046+
gettimeofday(&tval, NULL);
1047+
1048+
rseed = ((uint64) conn) ^
1049+
((uint64) getpid()) ^
1050+
((uint64) tval.tv_usec) ^
1051+
((uint64) tval.tv_sec);
1052+
1053+
pg_prng_seed(&conn->prng_state, rseed);
1054+
}
1055+
10231056
/*
10241057
* connectOptions2
10251058
*
@@ -1619,6 +1652,49 @@ connectOptions2(PGconn *conn)
16191652
else
16201653
conn->target_server_type = SERVER_TYPE_ANY;
16211654

1655+
/*
1656+
* validate load_balance_hosts option, and set load_balance_type
1657+
*/
1658+
if (conn->load_balance_hosts)
1659+
{
1660+
if (strcmp(conn->load_balance_hosts, "disable") == 0)
1661+
conn->load_balance_type = LOAD_BALANCE_DISABLE;
1662+
else if (strcmp(conn->load_balance_hosts, "random") == 0)
1663+
conn->load_balance_type = LOAD_BALANCE_RANDOM;
1664+
else
1665+
{
1666+
conn->status = CONNECTION_BAD;
1667+
libpq_append_conn_error(conn, "invalid %s value: \"%s\"",
1668+
"load_balance_hosts",
1669+
conn->load_balance_hosts);
1670+
return false;
1671+
}
1672+
}
1673+
else
1674+
conn->load_balance_type = LOAD_BALANCE_DISABLE;
1675+
1676+
if (conn->load_balance_type == LOAD_BALANCE_RANDOM)
1677+
{
1678+
libpq_prng_init(conn);
1679+
1680+
/*
1681+
* This is the "inside-out" variant of the Fisher-Yates shuffle
1682+
* algorithm. Notionally, we append each new value to the array and
1683+
* then swap it with a randomly-chosen array element (possibly
1684+
* including itself, else we fail to generate permutations with the
1685+
* last integer last). The swap step can be optimized by combining it
1686+
* with the insertion.
1687+
*/
1688+
for (i = 1; i < conn->nconnhost; i++)
1689+
{
1690+
int j = pg_prng_uint64_range(&conn->prng_state, 0, i);
1691+
pg_conn_host temp = conn->connhost[j];
1692+
1693+
conn->connhost[j] = conn->connhost[i];
1694+
conn->connhost[i] = temp;
1695+
}
1696+
}
1697+
16221698
/*
16231699
* Resolve special "auto" client_encoding from the locale
16241700
*/
@@ -2626,6 +2702,32 @@ PQconnectPoll(PGconn *conn)
26262702
if (ret)
26272703
goto error_return; /* message already logged */
26282704

2705+
/*
2706+
* If random load balancing is enabled we shuffle the addresses.
2707+
*/
2708+
if (conn->load_balance_type == LOAD_BALANCE_RANDOM)
2709+
{
2710+
/*
2711+
* This is the "inside-out" variant of the Fisher-Yates shuffle
2712+
* algorithm. Notionally, we append each new value to the array
2713+
* and then swap it with a randomly-chosen array element (possibly
2714+
* including itself, else we fail to generate permutations with
2715+
* the last integer last). The swap step can be optimized by
2716+
* combining it with the insertion.
2717+
*
2718+
* We don't need to initialize conn->prng_state here, because that
2719+
* already happened in connectOptions2.
2720+
*/
2721+
for (int i = 1; i < conn->naddr; i++)
2722+
{
2723+
int j = pg_prng_uint64_range(&conn->prng_state, 0, i);
2724+
AddrInfo temp = conn->addr[j];
2725+
2726+
conn->addr[j] = conn->addr[i];
2727+
conn->addr[i] = temp;
2728+
}
2729+
}
2730+
26292731
reset_connection_state_machine = true;
26302732
conn->try_next_host = false;
26312733
}
@@ -4320,6 +4422,7 @@ freePGconn(PGconn *conn)
43204422
free(conn->outBuffer);
43214423
free(conn->rowBuf);
43224424
free(conn->target_session_attrs);
4425+
free(conn->load_balance_hosts);
43234426
termPQExpBuffer(&conn->errorMessage);
43244427
termPQExpBuffer(&conn->workBuffer);
43254428

‎src/interfaces/libpq/libpq-int.h

+16-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
#include <netdb.h>
2727
#include <sys/socket.h>
2828
#include <time.h>
29-
#ifndef WIN32
29+
/* MinGW has sys/time.h, but MSVC doesn't */
30+
#ifndef _MSC_VER
3031
#include <sys/time.h>
3132
#endif
3233

@@ -82,6 +83,8 @@ typedef struct
8283
#endif
8384
#endif /* USE_OPENSSL */
8485

86+
#include "common/pg_prng.h"
87+
8588
/*
8689
* POSTGRES backend dependent Constants.
8790
*/
@@ -242,6 +245,13 @@ typedef enum
242245
SERVER_TYPE_PREFER_STANDBY_PASS2 /* second pass - behaves same as ANY */
243246
} PGTargetServerType;
244247

248+
/* Target server type (decoded value of load_balance_hosts) */
249+
typedef enum
250+
{
251+
LOAD_BALANCE_DISABLE = 0, /* Use the existing host order (default) */
252+
LOAD_BALANCE_RANDOM, /* Randomly shuffle the hosts */
253+
} PGLoadBalanceType;
254+
245255
/* Boolean value plus a not-known state, for GUCs we might have to fetch */
246256
typedef enum
247257
{
@@ -398,6 +408,7 @@ struct pg_conn
398408
char *ssl_max_protocol_version; /* maximum TLS protocol version */
399409
char *target_session_attrs; /* desired session properties */
400410
char *require_auth; /* name of the expected auth method */
411+
char *load_balance_hosts; /* load balance over hosts */
401412

402413
/* Optional file to write trace info to */
403414
FILE *Pfdebug;
@@ -469,6 +480,8 @@ struct pg_conn
469480

470481
/* Transient state needed while establishing connection */
471482
PGTargetServerType target_server_type; /* desired session properties */
483+
PGLoadBalanceType load_balance_type; /* desired load balancing
484+
* algorithm */
472485
bool try_next_addr; /* time to advance to next address/host? */
473486
bool try_next_host; /* time to advance to next connhost[]? */
474487
int naddr; /* number of addresses returned by getaddrinfo */
@@ -488,6 +501,8 @@ struct pg_conn
488501
PGVerbosity verbosity; /* error/notice message verbosity */
489502
PGContextVisibility show_context; /* whether to show CONTEXT field */
490503
PGlobjfuncs *lobjfuncs; /* private state for large-object access fns */
504+
pg_prng_state prng_state; /* prng state for load balancing connections */
505+
491506

492507
/* Buffer for data received from backend and not yet processed */
493508
char *inBuffer; /* currently allocated buffer */

‎src/interfaces/libpq/meson.build

+2
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,8 @@ tests += {
116116
'tests': [
117117
't/001_uri.pl',
118118
't/002_api.pl',
119+
't/003_load_balance_host_list.pl',
120+
't/004_load_balance_dns.pl',
119121
],
120122
'env': {'with_ssl': ssl_library},
121123
},

0 commit comments

Comments
 (0)