diff --git a/ext/json/ext/json.h b/ext/json/ext/json.h index 9379d7ae..ea12aedb 100644 --- a/ext/json/ext/json.h +++ b/ext/json/ext/json.h @@ -94,6 +94,8 @@ typedef unsigned char _Bool; #if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && INTPTR_MAX == INT64_MAX #define JSON_CPU_LITTLE_ENDIAN_64BITS 1 +#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_AMD64)) +#define JSON_CPU_LITTLE_ENDIAN_64BITS 1 #else #define JSON_CPU_LITTLE_ENDIAN_64BITS 0 #endif diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index f1ea1b6a..aff5f79b 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -86,7 +86,16 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring #define rstring_cache_memcmp memcmp #if JSON_CPU_LITTLE_ENDIAN_64BITS -#if __has_builtin(__builtin_bswap64) + +#if defined(JSON_SIMD_WINDOWS) +#define HAVE_BYTE_SWAP64 1 +#define byte_swap64(x) _byteswap_uint64(x) +#elif __has_builtin(__builtin_bswap64) +#define HAVE_BYTE_SWAP64 1 +#define byte_swap64(x) __builtin_bswap64(x) +#endif + +#if defined(HAVE_BYTE_SWAP64) && (HAVE_BYTE_SWAP64 == 1) #undef rstring_cache_memcmp ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length) { @@ -100,8 +109,8 @@ ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr memcpy(&a, str + i, 8); memcpy(&b, rptr + i, 8); if (a != b) { - a = __builtin_bswap64(a); - b = __builtin_bswap64(b); + a = byte_swap64(a); + b = byte_swap64(b); return (a < b) ? -1 : 1; } } @@ -114,6 +123,7 @@ ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr return 0; } +#undef byte_swap64 #endif #endif diff --git a/ext/json/ext/simd/conf.rb b/ext/json/ext/simd/conf.rb index 76f774bc..e5d0b963 100644 --- a/ext/json/ext/simd/conf.rb +++ b/ext/json/ext/simd/conf.rb @@ -5,6 +5,7 @@ when /^(x86_64|x64)/ header, type, init, extra = 'x86intrin.h', '__m128i', '_mm_set1_epi8(32)', 'if (__builtin_cpu_supports("sse2")) { printf("OK"); }' end +# intrin.h if header if have_header(header) && try_compile(<<~SRC, '-Werror=implicit-function-declaration') #{cpp_include(header)} @@ -17,7 +18,16 @@ SRC $defs.push("-DJSON_ENABLE_SIMD") else - puts "Disable SIMD" + if RbConfig::CONFIG['host_os'] =~ /mswin/i + if have_header('intrin.h') && try_compile(<<~SRC, '-arch:SSE2') + #include + int main() { __m128i x = _mm_setzero_si128(); return 0; } + SRC + $defs.push("-DJSON_ENABLE_SIMD") + end + else + puts "Disable SIMD" + end end end diff --git a/ext/json/ext/simd/simd.h b/ext/json/ext/simd/simd.h index 3bb86acd..5e658f76 100644 --- a/ext/json/ext/simd/simd.h +++ b/ext/json/ext/simd/simd.h @@ -22,12 +22,24 @@ typedef enum { # define HAVE_BUILTIN_CTZLL 0 #endif +#if defined(_MSC_VER) && defined(HAVE_INTRIN_H) +#define JSON_SIMD_WINDOWS 1 +#include +#endif + static inline uint32_t trailing_zeros64(uint64_t input) { JSON_ASSERT(input > 0); // __builtin_ctz(0) is undefined behavior #if HAVE_BUILTIN_CTZLL return __builtin_ctzll(input); +#elif JSON_SIMD_WINDOWS + uint32_t index; + if (_BitScanForward64(&index, input)) { + return index; + } else { + return 64; // _BitScanForward64 returns 0 if input is 0, so there are 64 zeros. + } #else uint32_t trailing_zeros = 0; uint64_t temp = input; @@ -45,6 +57,13 @@ static inline int trailing_zeros(int input) #if HAVE_BUILTIN_CTZLL return __builtin_ctz(input); +#elif JSON_SIMD_WINDOWS + uint32_t index; + if (_BitScanForward(&index, input)) { + return index; + } else { + return 32; // _BitScanForward returns 0 if input is 0, so there are 32 zeros. + } #else int trailing_zeros = 0; int temp = input; @@ -147,13 +166,16 @@ static inline uint8x16x4_t load_uint8x16_4(const unsigned char *table) #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) +#if defined(HAVE_X86INTRIN_H) || defined(JSON_SIMD_WINDOWS) + +// We check for HAVE_X86INTRIN_H again to include the heder as if we have 'intrin.h' it's already been incldued. #ifdef HAVE_X86INTRIN_H #include +#endif #define HAVE_SIMD 1 #define HAVE_SIMD_SSE2 1 -#ifdef HAVE_CPUID_H #define FIND_SIMD_IMPLEMENTATION_DEFINED 1 #if defined(__clang__) || defined(__GNUC__) @@ -192,15 +214,27 @@ ALWAYS_INLINE(static) TARGET_SSE2 int string_scan_simd_sse2(const char **ptr, co return 0; } +#ifdef HAVE_CPUID_H #include #endif /* HAVE_CPUID_H */ static inline SIMD_Implementation find_simd_implementation(void) { // TODO Revisit. I think the SSE version now only uses SSE2 instructions. +#if __has_builtin(__builtin_cpu_supports) if (__builtin_cpu_supports("sse2")) { return SIMD_SSE2; } +#endif + +#if defined(JSON_SIMD_WINDOWS) + // https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex?view=msvc-170 + int cpuInfo[4] = {0}; + __cpuid(cpuInfo, 1); + if ((cpuInfo[3] & (1 << 26)) != 0) { + return SIMD_SSE2; + } +#endif return SIMD_NONE; }