# Check if the compiler supports the XSAVE instructions using the _xgetbv
# intrinsic function.
#
-# An optional compiler flag can be passed as argument (e.g., -mxsave). If the
-# intrinsic is supported, sets pgac_xsave_intrinsics and CFLAGS_XSAVE.
+# If the intrinsics are supported, sets pgac_xsave_intrinsics.
AC_DEFUN([PGAC_XSAVE_INTRINSICS],
-[define([Ac_cachevar], [AS_TR_SH([pgac_cv_xsave_intrinsics_$1])])dnl
-AC_CACHE_CHECK([for _xgetbv with CFLAGS=$1], [Ac_cachevar],
-[pgac_save_CFLAGS=$CFLAGS
-CFLAGS="$pgac_save_CFLAGS $1"
-AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>],
- [return _xgetbv(0) & 0xe0;])],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_xsave_intrinsics])])dnl
+AC_CACHE_CHECK([for _xgetbv], [Ac_cachevar],
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("xsave")))
+ #endif
+ static int xsave_test(void)
+ {
+ return _xgetbv(0) & 0xe0;
+ }],
+ [return xsave_test();])],
[Ac_cachevar=yes],
- [Ac_cachevar=no])
-CFLAGS="$pgac_save_CFLAGS"])
+ [Ac_cachevar=no])])
if test x"$Ac_cachevar" = x"yes"; then
- CFLAGS_XSAVE="$1"
pgac_xsave_intrinsics=yes
fi
undefine([Ac_cachevar])dnl
# _mm512_setzero_si512, _mm512_maskz_loadu_epi8, _mm512_popcnt_epi64,
# _mm512_add_epi64, and _mm512_reduce_add_epi64 intrinsic functions.
#
-# Optional compiler flags can be passed as argument (e.g., -mavx512vpopcntdq
-# -mavx512bw). If the intrinsics are supported, sets
-# pgac_avx512_popcnt_intrinsics and CFLAGS_POPCNT.
+# If the intrinsics are supported, sets pgac_avx512_popcnt_intrinsics.
AC_DEFUN([PGAC_AVX512_POPCNT_INTRINSICS],
-[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx512_popcnt_intrinsics_$1])])dnl
-AC_CACHE_CHECK([for _mm512_popcnt_epi64 with CFLAGS=$1], [Ac_cachevar],
-[pgac_save_CFLAGS=$CFLAGS
-CFLAGS="$pgac_save_CFLAGS $1"
-AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>],
- [const char buf@<:@sizeof(__m512i)@:>@;
- PG_INT64_TYPE popcnt = 0;
- __m512i accum = _mm512_setzero_si512();
- const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
- const __m512i cnt = _mm512_popcnt_epi64(val);
- accum = _mm512_add_epi64(accum, cnt);
- popcnt = _mm512_reduce_add_epi64(accum);
- /* return computed value, to prevent the above being optimized away */
- return popcnt == 0;])],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx512_popcnt_intrinsics])])dnl
+AC_CACHE_CHECK([for _mm512_popcnt_epi64], [Ac_cachevar],
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("avx512vpopcntdq","avx512bw")))
+ #endif
+ static int popcount_test(void)
+ {
+ const char buf@<:@sizeof(__m512i)@:>@;
+ PG_INT64_TYPE popcnt = 0;
+ __m512i accum = _mm512_setzero_si512();
+ const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
+ const __m512i cnt = _mm512_popcnt_epi64(val);
+ accum = _mm512_add_epi64(accum, cnt);
+ popcnt = _mm512_reduce_add_epi64(accum);
+ return (int) popcnt;
+ }],
+ [return popcount_test();])],
[Ac_cachevar=yes],
- [Ac_cachevar=no])
-CFLAGS="$pgac_save_CFLAGS"])
+ [Ac_cachevar=no])])
if test x"$Ac_cachevar" = x"yes"; then
- CFLAGS_POPCNT="$1"
pgac_avx512_popcnt_intrinsics=yes
fi
undefine([Ac_cachevar])dnl
MSGFMT
PG_CRC32C_OBJS
CFLAGS_CRC
-PG_POPCNT_OBJS
-CFLAGS_POPCNT
-CFLAGS_XSAVE
LIBOBJS
OPENSSL
ZSTD
# Check for XSAVE intrinsics
#
-CFLAGS_XSAVE=""
-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _xgetbv with CFLAGS=" >&5
-$as_echo_n "checking for _xgetbv with CFLAGS=... " >&6; }
-if ${pgac_cv_xsave_intrinsics_+:} false; then :
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _xgetbv" >&5
+$as_echo_n "checking for _xgetbv... " >&6; }
+if ${pgac_cv_xsave_intrinsics+:} false; then :
$as_echo_n "(cached) " >&6
else
- pgac_save_CFLAGS=$CFLAGS
-CFLAGS="$pgac_save_CFLAGS "
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <immintrin.h>
-int
-main ()
-{
-return _xgetbv(0) & 0xe0;
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- pgac_cv_xsave_intrinsics_=yes
-else
- pgac_cv_xsave_intrinsics_=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-CFLAGS="$pgac_save_CFLAGS"
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_xsave_intrinsics_" >&5
-$as_echo "$pgac_cv_xsave_intrinsics_" >&6; }
-if test x"$pgac_cv_xsave_intrinsics_" = x"yes"; then
- CFLAGS_XSAVE=""
- pgac_xsave_intrinsics=yes
-fi
-
-if test x"$pgac_xsave_intrinsics" != x"yes"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _xgetbv with CFLAGS=-mxsave" >&5
-$as_echo_n "checking for _xgetbv with CFLAGS=-mxsave... " >&6; }
-if ${pgac_cv_xsave_intrinsics__mxsave+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- pgac_save_CFLAGS=$CFLAGS
-CFLAGS="$pgac_save_CFLAGS -mxsave"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <immintrin.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("xsave")))
+ #endif
+ static int xsave_test(void)
+ {
+ return _xgetbv(0) & 0xe0;
+ }
int
main ()
{
-return _xgetbv(0) & 0xe0;
+return xsave_test();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- pgac_cv_xsave_intrinsics__mxsave=yes
+ pgac_cv_xsave_intrinsics=yes
else
- pgac_cv_xsave_intrinsics__mxsave=no
+ pgac_cv_xsave_intrinsics=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
-CFLAGS="$pgac_save_CFLAGS"
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_xsave_intrinsics__mxsave" >&5
-$as_echo "$pgac_cv_xsave_intrinsics__mxsave" >&6; }
-if test x"$pgac_cv_xsave_intrinsics__mxsave" = x"yes"; then
- CFLAGS_XSAVE="-mxsave"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_xsave_intrinsics" >&5
+$as_echo "$pgac_cv_xsave_intrinsics" >&6; }
+if test x"$pgac_cv_xsave_intrinsics" = x"yes"; then
pgac_xsave_intrinsics=yes
fi
-fi
if test x"$pgac_xsave_intrinsics" = x"yes"; then
$as_echo "#define HAVE_XSAVE_INTRINSICS 1" >>confdefs.h
fi
-
# Check for AVX-512 popcount intrinsics
#
-CFLAGS_POPCNT=""
-PG_POPCNT_OBJS=""
if test x"$host_cpu" = x"x86_64"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm512_popcnt_epi64 with CFLAGS=" >&5
-$as_echo_n "checking for _mm512_popcnt_epi64 with CFLAGS=... " >&6; }
-if ${pgac_cv_avx512_popcnt_intrinsics_+:} false; then :
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm512_popcnt_epi64" >&5
+$as_echo_n "checking for _mm512_popcnt_epi64... " >&6; }
+if ${pgac_cv_avx512_popcnt_intrinsics+:} false; then :
$as_echo_n "(cached) " >&6
else
- pgac_save_CFLAGS=$CFLAGS
-CFLAGS="$pgac_save_CFLAGS "
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
-/* end confdefs.h. */
-#include <immintrin.h>
-int
-main ()
-{
-const char buf[sizeof(__m512i)];
- PG_INT64_TYPE popcnt = 0;
- __m512i accum = _mm512_setzero_si512();
- const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
- const __m512i cnt = _mm512_popcnt_epi64(val);
- accum = _mm512_add_epi64(accum, cnt);
- popcnt = _mm512_reduce_add_epi64(accum);
- /* return computed value, to prevent the above being optimized away */
- return popcnt == 0;
- ;
- return 0;
-}
-_ACEOF
-if ac_fn_c_try_link "$LINENO"; then :
- pgac_cv_avx512_popcnt_intrinsics_=yes
-else
- pgac_cv_avx512_popcnt_intrinsics_=no
-fi
-rm -f core conftest.err conftest.$ac_objext \
- conftest$ac_exeext conftest.$ac_ext
-CFLAGS="$pgac_save_CFLAGS"
-fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx512_popcnt_intrinsics_" >&5
-$as_echo "$pgac_cv_avx512_popcnt_intrinsics_" >&6; }
-if test x"$pgac_cv_avx512_popcnt_intrinsics_" = x"yes"; then
- CFLAGS_POPCNT=""
- pgac_avx512_popcnt_intrinsics=yes
-fi
-
- if test x"$pgac_avx512_popcnt_intrinsics" != x"yes"; then
- { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm512_popcnt_epi64 with CFLAGS=-mavx512vpopcntdq -mavx512bw" >&5
-$as_echo_n "checking for _mm512_popcnt_epi64 with CFLAGS=-mavx512vpopcntdq -mavx512bw... " >&6; }
-if ${pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw+:} false; then :
- $as_echo_n "(cached) " >&6
-else
- pgac_save_CFLAGS=$CFLAGS
-CFLAGS="$pgac_save_CFLAGS -mavx512vpopcntdq -mavx512bw"
-cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ cat confdefs.h - <<_ACEOF >conftest.$ac_ext
/* end confdefs.h. */
#include <immintrin.h>
+ #if defined(__has_attribute) && __has_attribute (target)
+ __attribute__((target("avx512vpopcntdq","avx512bw")))
+ #endif
+ static int popcount_test(void)
+ {
+ const char buf[sizeof(__m512i)];
+ PG_INT64_TYPE popcnt = 0;
+ __m512i accum = _mm512_setzero_si512();
+ const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
+ const __m512i cnt = _mm512_popcnt_epi64(val);
+ accum = _mm512_add_epi64(accum, cnt);
+ popcnt = _mm512_reduce_add_epi64(accum);
+ return (int) popcnt;
+ }
int
main ()
{
-const char buf[sizeof(__m512i)];
- PG_INT64_TYPE popcnt = 0;
- __m512i accum = _mm512_setzero_si512();
- const __m512i val = _mm512_maskz_loadu_epi8((__mmask64) 0xf0f0f0f0f0f0f0f0, (const __m512i *) buf);
- const __m512i cnt = _mm512_popcnt_epi64(val);
- accum = _mm512_add_epi64(accum, cnt);
- popcnt = _mm512_reduce_add_epi64(accum);
- /* return computed value, to prevent the above being optimized away */
- return popcnt == 0;
+return popcount_test();
;
return 0;
}
_ACEOF
if ac_fn_c_try_link "$LINENO"; then :
- pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw=yes
+ pgac_cv_avx512_popcnt_intrinsics=yes
else
- pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw=no
+ pgac_cv_avx512_popcnt_intrinsics=no
fi
rm -f core conftest.err conftest.$ac_objext \
conftest$ac_exeext conftest.$ac_ext
-CFLAGS="$pgac_save_CFLAGS"
fi
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw" >&5
-$as_echo "$pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw" >&6; }
-if test x"$pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512bw" = x"yes"; then
- CFLAGS_POPCNT="-mavx512vpopcntdq -mavx512bw"
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx512_popcnt_intrinsics" >&5
+$as_echo "$pgac_cv_avx512_popcnt_intrinsics" >&6; }
+if test x"$pgac_cv_avx512_popcnt_intrinsics" = x"yes"; then
pgac_avx512_popcnt_intrinsics=yes
fi
- fi
if test x"$pgac_avx512_popcnt_intrinsics" = x"yes"; then
- PG_POPCNT_OBJS="pg_popcount_avx512.o pg_popcount_avx512_choose.o"
$as_echo "#define USE_AVX512_POPCNT_WITH_RUNTIME_CHECK 1" >>confdefs.h
fi
fi
-
-
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
#
# First check if the _mm_crc32_u8 and _mm_crc32_u64 intrinsics can be used
# Check for XSAVE intrinsics
#
-CFLAGS_XSAVE=""
-PGAC_XSAVE_INTRINSICS([])
-if test x"$pgac_xsave_intrinsics" != x"yes"; then
- PGAC_XSAVE_INTRINSICS([-mxsave])
-fi
+PGAC_XSAVE_INTRINSICS()
if test x"$pgac_xsave_intrinsics" = x"yes"; then
AC_DEFINE(HAVE_XSAVE_INTRINSICS, 1, [Define to 1 if you have XSAVE intrinsics.])
fi
-AC_SUBST(CFLAGS_XSAVE)
# Check for AVX-512 popcount intrinsics
#
-CFLAGS_POPCNT=""
-PG_POPCNT_OBJS=""
if test x"$host_cpu" = x"x86_64"; then
- PGAC_AVX512_POPCNT_INTRINSICS([])
- if test x"$pgac_avx512_popcnt_intrinsics" != x"yes"; then
- PGAC_AVX512_POPCNT_INTRINSICS([-mavx512vpopcntdq -mavx512bw])
- fi
+ PGAC_AVX512_POPCNT_INTRINSICS()
if test x"$pgac_avx512_popcnt_intrinsics" = x"yes"; then
- PG_POPCNT_OBJS="pg_popcount_avx512.o pg_popcount_avx512_choose.o"
AC_DEFINE(USE_AVX512_POPCNT_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX-512 popcount instructions with a runtime check.])
fi
fi
-AC_SUBST(CFLAGS_POPCNT)
-AC_SUBST(PG_POPCNT_OBJS)
# Check for Intel SSE 4.2 intrinsics to do CRC calculations.
#
# Check for the availability of XSAVE intrinsics.
###############################################################
-cflags_xsave = []
if host_cpu == 'x86' or host_cpu == 'x86_64'
prog = '''
#include <immintrin.h>
+#if defined(__has_attribute) && __has_attribute (target)
+__attribute__((target("xsave")))
+#endif
int main(void)
{
return _xgetbv(0) & 0xe0;
}
'''
- if cc.links(prog, name: 'XSAVE intrinsics without -mxsave',
- args: test_c_args)
- cdata.set('HAVE_XSAVE_INTRINSICS', 1)
- elif cc.links(prog, name: 'XSAVE intrinsics with -mxsave',
- args: test_c_args + ['-mxsave'])
+ if cc.links(prog, name: 'XSAVE intrinsics', args: test_c_args)
cdata.set('HAVE_XSAVE_INTRINSICS', 1)
- cflags_xsave += '-mxsave'
endif
endif
# Check for the availability of AVX-512 popcount intrinsics.
###############################################################
-cflags_popcnt = []
if host_cpu == 'x86_64'
prog = '''
#include <immintrin.h>
+#if defined(__has_attribute) && __has_attribute (target)
+__attribute__((target("avx512vpopcntdq","avx512bw")))
+#endif
int main(void)
{
const char buf[sizeof(__m512i)];
}
'''
- if cc.links(prog, name: 'AVX-512 popcount without -mavx512vpopcntdq -mavx512bw',
+ if cc.links(prog, name: 'AVX-512 popcount',
args: test_c_args + ['-DINT64=@0@'.format(cdata.get('PG_INT64_TYPE'))])
cdata.set('USE_AVX512_POPCNT_WITH_RUNTIME_CHECK', 1)
- elif cc.links(prog, name: 'AVX-512 popcount with -mavx512vpopcntdq -mavx512bw',
- args: test_c_args + ['-DINT64=@0@'.format(cdata.get('PG_INT64_TYPE'))] + ['-mavx512vpopcntdq'] + ['-mavx512bw'])
- cdata.set('USE_AVX512_POPCNT_WITH_RUNTIME_CHECK', 1)
- cflags_popcnt += ['-mavx512vpopcntdq'] + ['-mavx512bw']
endif
endif
CXXFLAGS_SL_MODULE = @CXXFLAGS_SL_MODULE@
CFLAGS_UNROLL_LOOPS = @CFLAGS_UNROLL_LOOPS@
CFLAGS_VECTORIZE = @CFLAGS_VECTORIZE@
-CFLAGS_POPCNT = @CFLAGS_POPCNT@
CFLAGS_CRC = @CFLAGS_CRC@
-CFLAGS_XSAVE = @CFLAGS_XSAVE@
PERMIT_DECLARATION_AFTER_STATEMENT = @PERMIT_DECLARATION_AFTER_STATEMENT@
PERMIT_MISSING_VARIABLE_DECLARATIONS = @PERMIT_MISSING_VARIABLE_DECLARATIONS@
CXXFLAGS = @CXXFLAGS@
# files needed for the chosen CRC-32C implementation
PG_CRC32C_OBJS = @PG_CRC32C_OBJS@
-# files needed for the chosen popcount implementation
-PG_POPCNT_OBJS = @PG_POPCNT_OBJS@
-
LIBS := -lpgcommon -lpgport $(LIBS)
# to make ws2_32.lib the last library
#define pg_attribute_nonnull(...)
#endif
+/*
+ * pg_attribute_target allows specifying different target options that the
+ * function should be compiled with (e.g., for using special CPU instructions).
+ * Note that there still needs to be a configure-time check to verify that a
+ * specific target is understood by the compiler.
+ */
+#if __has_attribute (target)
+#define pg_attribute_target(...) __attribute__((target(__VA_ARGS__)))
+#else
+#define pg_attribute_target(...)
+#endif
+
/*
* Append PG_USED_FOR_ASSERTS_ONLY to definitions of variables that are only
* used in assert-enabled builds, to avoid compiler warnings about unused
' '.join(cflags_no_missing_var_decls),
'CFLAGS_CRC': ' '.join(cflags_crc),
- 'CFLAGS_POPCNT': ' '.join(cflags_popcnt),
'CFLAGS_UNROLL_LOOPS': ' '.join(unroll_loops_cflags),
'CFLAGS_VECTORIZE': ' '.join(vectorize_cflags),
- 'CFLAGS_XSAVE': ' '.join(cflags_xsave),
'LDFLAGS': var_ldflags,
'LDFLAGS_EX': var_ldflags_ex,
'WANTED_LANGUAGES',
# Not needed because we don't build the server / PLs with the generated makefile
- 'LIBOBJS', 'PG_CRC32C_OBJS', 'PG_POPCNT_OBJS', 'TAS',
+ 'LIBOBJS', 'PG_CRC32C_OBJS', 'TAS',
'PG_TEST_EXTRA',
'DTRACEFLAGS', # only server has dtrace probes
OBJS = \
$(LIBOBJS) \
$(PG_CRC32C_OBJS) \
- $(PG_POPCNT_OBJS) \
bsearch_arg.o \
chklocale.o \
inet_net_ntop.o \
noblock.o \
path.o \
pg_bitutils.o \
+ pg_popcount_avx512.o \
pg_strong_random.o \
pgcheckdir.o \
pgmkdirp.o \
pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_CRC)
pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_CRC)
-# all versions of pg_popcount_avx512_choose.o need CFLAGS_XSAVE
-pg_popcount_avx512_choose.o: CFLAGS+=$(CFLAGS_XSAVE)
-pg_popcount_avx512_choose_shlib.o: CFLAGS+=$(CFLAGS_XSAVE)
-pg_popcount_avx512_choose_srv.o: CFLAGS+=$(CFLAGS_XSAVE)
-
-# all versions of pg_popcount_avx512.o need CFLAGS_POPCNT
-pg_popcount_avx512.o: CFLAGS+=$(CFLAGS_POPCNT)
-pg_popcount_avx512_shlib.o: CFLAGS+=$(CFLAGS_POPCNT)
-pg_popcount_avx512_srv.o: CFLAGS+=$(CFLAGS_POPCNT)
-
#
# Shared library versions of object files
#
'noblock.c',
'path.c',
'pg_bitutils.c',
+ 'pg_popcount_avx512.c',
'pg_strong_random.c',
'pgcheckdir.c',
'pgmkdirp.c',
['pg_crc32c_sse42', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK', 'crc'],
['pg_crc32c_sse42_choose', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
['pg_crc32c_sb8', 'USE_SSE42_CRC32C_WITH_RUNTIME_CHECK'],
- ['pg_popcount_avx512', 'USE_AVX512_POPCNT_WITH_RUNTIME_CHECK', 'popcnt'],
- ['pg_popcount_avx512_choose', 'USE_AVX512_POPCNT_WITH_RUNTIME_CHECK', 'xsave'],
# arm / aarch64
['pg_crc32c_armv8', 'USE_ARMV8_CRC32C'],
['pg_crc32c_sb8', 'USE_SLICING_BY_8_CRC32C'],
]
-pgport_cflags = {'crc': cflags_crc, 'popcnt': cflags_popcnt, 'xsave': cflags_xsave}
-pgport_sources_cflags = {'crc': [], 'popcnt': [], 'xsave': []}
+pgport_cflags = {'crc': cflags_crc}
+pgport_sources_cflags = {'crc': []}
foreach f : replace_funcs_neg
func = f.get(0)
*/
#include "c.h"
+#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
+#include <cpuid.h>
+#endif
+
+#ifdef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
#include <immintrin.h>
+#endif
+
+#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
+#include <intrin.h>
+#endif
#include "port/pg_bitutils.h"
* use AVX-512 intrinsics, but we check it anyway to be sure. We piggy-back on
* the function pointers that are only used when TRY_POPCNT_FAST is set.
*/
-#ifdef TRY_POPCNT_FAST
+#if defined(TRY_POPCNT_FAST) && defined(USE_AVX512_POPCNT_WITH_RUNTIME_CHECK)
+
+/*
+ * Does CPUID say there's support for XSAVE instructions?
+ */
+static inline bool
+xsave_available(void)
+{
+ unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+ __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+ __cpuid(exx, 1);
+#else
+#error cpuid instruction not available
+#endif
+ return (exx[2] & (1 << 27)) != 0; /* osxsave */
+}
+
+/*
+ * Does XGETBV say the ZMM registers are enabled?
+ *
+ * NB: Caller is responsible for verifying that xsave_available() returns true
+ * before calling this.
+ */
+#ifdef HAVE_XSAVE_INTRINSICS
+pg_attribute_target("xsave")
+#endif
+static inline bool
+zmm_regs_available(void)
+{
+#ifdef HAVE_XSAVE_INTRINSICS
+ return (_xgetbv(0) & 0xe6) == 0xe6;
+#else
+ return false;
+#endif
+}
+
+/*
+ * Does CPUID say there's support for AVX-512 popcount and byte-and-word
+ * instructions?
+ */
+static inline bool
+avx512_popcnt_available(void)
+{
+ unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID_COUNT)
+ __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUIDEX)
+ __cpuidex(exx, 7, 0);
+#else
+#error cpuid instruction not available
+#endif
+ return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
+ (exx[1] & (1 << 30)) != 0; /* avx512-bw */
+}
+
+/*
+ * Returns true if the CPU supports the instructions required for the AVX-512
+ * pg_popcount() implementation.
+ */
+bool
+pg_popcount_avx512_available(void)
+{
+ return xsave_available() &&
+ zmm_regs_available() &&
+ avx512_popcnt_available();
+}
/*
* pg_popcount_avx512
* Returns the number of 1-bits in buf
*/
+pg_attribute_target("avx512vpopcntdq", "avx512bw")
uint64
pg_popcount_avx512(const char *buf, int bytes)
{
* pg_popcount_masked_avx512
* Returns the number of 1-bits in buf after applying the mask to each byte
*/
+pg_attribute_target("avx512vpopcntdq", "avx512bw")
uint64
pg_popcount_masked_avx512(const char *buf, int bytes, bits8 mask)
{
return _mm512_reduce_add_epi64(accum);
}
-#endif /* TRY_POPCNT_FAST */
+#endif /* TRY_POPCNT_FAST &&
+ * USE_AVX512_POPCNT_WITH_RUNTIME_CHECK */
+++ /dev/null
-/*-------------------------------------------------------------------------
- *
- * pg_popcount_avx512_choose.c
- * Test whether we can use the AVX-512 pg_popcount() implementation.
- *
- * Copyright (c) 2024, PostgreSQL Global Development Group
- *
- * IDENTIFICATION
- * src/port/pg_popcount_avx512_choose.c
- *
- *-------------------------------------------------------------------------
- */
-#include "c.h"
-
-#if defined(HAVE__GET_CPUID) || defined(HAVE__GET_CPUID_COUNT)
-#include <cpuid.h>
-#endif
-
-#ifdef HAVE_XSAVE_INTRINSICS
-#include <immintrin.h>
-#endif
-
-#if defined(HAVE__CPUID) || defined(HAVE__CPUIDEX)
-#include <intrin.h>
-#endif
-
-#include "port/pg_bitutils.h"
-
-/*
- * It's probably unlikely that TRY_POPCNT_FAST won't be set if we are able to
- * use AVX-512 intrinsics, but we check it anyway to be sure. We piggy-back on
- * the function pointers that are only used when TRY_POPCNT_FAST is set.
- */
-#ifdef TRY_POPCNT_FAST
-
-/*
- * Does CPUID say there's support for XSAVE instructions?
- */
-static inline bool
-xsave_available(void)
-{
- unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID)
- __get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
- __cpuid(exx, 1);
-#else
-#error cpuid instruction not available
-#endif
- return (exx[2] & (1 << 27)) != 0; /* osxsave */
-}
-
-/*
- * Does XGETBV say the ZMM registers are enabled?
- *
- * NB: Caller is responsible for verifying that xsave_available() returns true
- * before calling this.
- */
-static inline bool
-zmm_regs_available(void)
-{
-#ifdef HAVE_XSAVE_INTRINSICS
- return (_xgetbv(0) & 0xe6) == 0xe6;
-#else
- return false;
-#endif
-}
-
-/*
- * Does CPUID say there's support for AVX-512 popcount and byte-and-word
- * instructions?
- */
-static inline bool
-avx512_popcnt_available(void)
-{
- unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID_COUNT)
- __get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUIDEX)
- __cpuidex(exx, 7, 0);
-#else
-#error cpuid instruction not available
-#endif
- return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
- (exx[1] & (1 << 30)) != 0; /* avx512-bw */
-}
-
-/*
- * Returns true if the CPU supports the instructions required for the AVX-512
- * pg_popcount() implementation.
- */
-bool
-pg_popcount_avx512_available(void)
-{
- return xsave_available() &&
- zmm_regs_available() &&
- avx512_popcnt_available();
-}
-
-#endif /* TRY_POPCNT_FAST */