Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ext/json/ext/json.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,8 @@ typedef unsigned char _Bool;

#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && INTPTR_MAX == INT64_MAX
#define JSON_CPU_LITTLE_ENDIAN_64BITS 1
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_AMD64))
#define JSON_CPU_LITTLE_ENDIAN_64BITS 1
#else
#define JSON_CPU_LITTLE_ENDIAN_64BITS 0
#endif
Expand Down
16 changes: 13 additions & 3 deletions ext/json/ext/parser/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,16 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
#define rstring_cache_memcmp memcmp

#if JSON_CPU_LITTLE_ENDIAN_64BITS
#if __has_builtin(__builtin_bswap64)

#if defined(JSON_SIMD_WINDOWS)
#define HAVE_BYTE_SWAP64 1
#define byte_swap64(x) _byteswap_uint64(x)
#elif __has_builtin(__builtin_bswap64)
#define HAVE_BYTE_SWAP64 1
#define byte_swap64(x) __builtin_bswap64(x)
#endif

#if defined(HAVE_BYTE_SWAP64) && (HAVE_BYTE_SWAP64 == 1)
#undef rstring_cache_memcmp
ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr, const long length)
{
Expand All @@ -100,8 +109,8 @@ ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr
memcpy(&a, str + i, 8);
memcpy(&b, rptr + i, 8);
if (a != b) {
a = __builtin_bswap64(a);
b = __builtin_bswap64(b);
a = byte_swap64(a);
b = byte_swap64(b);
return (a < b) ? -1 : 1;
}
}
Expand All @@ -114,6 +123,7 @@ ALWAYS_INLINE(static) int rstring_cache_memcmp(const char *str, const char *rptr

return 0;
}
#undef byte_swap64
#endif
#endif

Expand Down
12 changes: 11 additions & 1 deletion ext/json/ext/simd/conf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
when /^(x86_64|x64)/
header, type, init, extra = 'x86intrin.h', '__m128i', '_mm_set1_epi8(32)', 'if (__builtin_cpu_supports("sse2")) { printf("OK"); }'
end
# intrin.h
if header
if have_header(header) && try_compile(<<~SRC, '-Werror=implicit-function-declaration')
#{cpp_include(header)}
Expand All @@ -17,7 +18,16 @@
SRC
$defs.push("-DJSON_ENABLE_SIMD")
else
puts "Disable SIMD"
if RbConfig::CONFIG['host_os'] =~ /mswin/i
if have_header('intrin.h') && try_compile(<<~SRC, '-arch:SSE2')
#include <intrin.h>
int main() { __m128i x = _mm_setzero_si128(); return 0; }
SRC
$defs.push("-DJSON_ENABLE_SIMD")
end
else
puts "Disable SIMD"
end
end
end

Expand Down
36 changes: 35 additions & 1 deletion ext/json/ext/simd/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,24 @@ typedef enum {
# define HAVE_BUILTIN_CTZLL 0
#endif

#if defined(_MSC_VER) && defined(HAVE_INTRIN_H)
#define JSON_SIMD_WINDOWS 1
#include <intrin.h>
#endif

static inline uint32_t trailing_zeros64(uint64_t input)
{
JSON_ASSERT(input > 0); // __builtin_ctz(0) is undefined behavior

#if HAVE_BUILTIN_CTZLL
return __builtin_ctzll(input);
#elif JSON_SIMD_WINDOWS
uint32_t index;
if (_BitScanForward64(&index, input)) {
return index;
} else {
return 64; // _BitScanForward64 returns 0 if input is 0, so there are 64 zeros.
}
#else
uint32_t trailing_zeros = 0;
uint64_t temp = input;
Expand All @@ -45,6 +57,13 @@ static inline int trailing_zeros(int input)

#if HAVE_BUILTIN_CTZLL
return __builtin_ctz(input);
#elif JSON_SIMD_WINDOWS
uint32_t index;
if (_BitScanForward(&index, input)) {
return index;
} else {
return 32; // _BitScanForward returns 0 if input is 0, so there are 32 zeros.
}
#else
int trailing_zeros = 0;
int temp = input;
Expand Down Expand Up @@ -147,13 +166,16 @@ static inline uint8x16x4_t load_uint8x16_4(const unsigned char *table)

#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)

#if defined(HAVE_X86INTRIN_H) || defined(JSON_SIMD_WINDOWS)

// We check for HAVE_X86INTRIN_H again to include the heder as if we have 'intrin.h' it's already been incldued.
#ifdef HAVE_X86INTRIN_H
#include <x86intrin.h>
#endif

#define HAVE_SIMD 1
#define HAVE_SIMD_SSE2 1

#ifdef HAVE_CPUID_H
#define FIND_SIMD_IMPLEMENTATION_DEFINED 1

#if defined(__clang__) || defined(__GNUC__)
Expand Down Expand Up @@ -192,15 +214,27 @@ ALWAYS_INLINE(static) TARGET_SSE2 int string_scan_simd_sse2(const char **ptr, co
return 0;
}

#ifdef HAVE_CPUID_H
#include <cpuid.h>
#endif /* HAVE_CPUID_H */

static inline SIMD_Implementation find_simd_implementation(void)
{
// TODO Revisit. I think the SSE version now only uses SSE2 instructions.
#if __has_builtin(__builtin_cpu_supports)
if (__builtin_cpu_supports("sse2")) {
return SIMD_SSE2;
}
#endif

#if defined(JSON_SIMD_WINDOWS)
// https://learn.microsoft.com/en-us/cpp/intrinsics/cpuid-cpuidex?view=msvc-170
int cpuInfo[4] = {0};
__cpuid(cpuInfo, 1);
if ((cpuInfo[3] & (1 << 26)) != 0) {
return SIMD_SSE2;
}
#endif

return SIMD_NONE;
}
Expand Down