From c147026abfda196ba7783455ce413fbc56da9ba8 Mon Sep 17 00:00:00 2001 From: Daniel Lemire Date: Tue, 1 Apr 2025 16:56:18 -0400 Subject: [PATCH] slightly faster ARM kernel --- src/Base64ARM.cs | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/Base64ARM.cs b/src/Base64ARM.cs index dad3362..3a34ad8 100644 --- a/src/Base64ARM.cs +++ b/src/Base64ARM.cs @@ -344,23 +344,15 @@ private static unsafe void Base64DecodeBlock(byte* outPtr, byte* srcPtr) // Load 4 vectors from src var (str0, str1, str2, str3) = AdvSimd.Arm64.Load4xVector128AndUnzip(srcPtr); + // Perform bitwise operations to simulate NEON intrinsics + Vector128 outvec0 = AdvSimd.ShiftLeftAndInsert( + AdvSimd.ShiftRightLogical(str1, 4), str0, 2); + Vector128 outvec1 = AdvSimd.ShiftLeftAndInsert( + AdvSimd.ShiftRightLogical(str2, 2), str1, 4); - // Perform bitwise operations to simulate NEON intrinsics - Vector128 outvec0 = AdvSimd.Or( - AdvSimd.ShiftLeftLogical(str0, 2), - AdvSimd.ShiftRightLogical(str1, 4) - ); - - Vector128 outvec1 = AdvSimd.Or( - AdvSimd.ShiftLeftLogical(str1, 4), - AdvSimd.ShiftRightLogical(str2, 2) - ); - - Vector128 outvec2 = AdvSimd.Or( - AdvSimd.ShiftLeftLogical(str2, 6), - str3 - ); + Vector128 outvec2 = AdvSimd.ShiftLeftAndInsert( + str3, str2, 6); // Store the result in outData AdvSimd.Arm64.StoreVectorAndZip(outPtr, (outvec0, outvec1, outvec2));