using System; using System.Diagnostics; namespace Unity.Burst.Intrinsics { public unsafe static partial class X86 { /// /// SSSE3 intrinsics /// public static class Ssse3 { /// /// Evaluates to true at compile time if SSSE3 intrinsics are supported. /// public static bool IsSsse3Supported { get { return false; } } // _mm_abs_epi8 /// Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst". /// Vector a /// Vector [DebuggerStepThrough] public static v128 abs_epi8(v128 a) { v128 dst = default(v128); byte* dptr = &dst.Byte0; sbyte* aptr = &a.SByte0; for (int j = 0; j <= 15; j++) { dptr[j] = (byte)Math.Abs((int)aptr[j]); } return dst; } // _mm_abs_epi16 /// Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst". /// Vector a /// Vector [DebuggerStepThrough] public static v128 abs_epi16(v128 a) { v128 dst = default(v128); ushort* dptr = &dst.UShort0; short* aptr = &a.SShort0; for (int j = 0; j <= 7; j++) { dptr[j] = (ushort)Math.Abs((int)aptr[j]); } return dst; } // _mm_abs_epi32 /// Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst". /// Vector a /// Vector [DebuggerStepThrough] public static v128 abs_epi32(v128 a) { v128 dst = default(v128); uint* dptr = &dst.UInt0; int* aptr = &a.SInt0; for (int j = 0; j <= 3; j++) { dptr[j] = (uint)Math.Abs((long)aptr[j]); } return dst; } // _mm_shuffle_epi8 /// Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". /// Vector a /// Vector b /// Vector [DebuggerStepThrough] public static v128 shuffle_epi8(v128 a, v128 b) { v128 dst = default(v128); byte* dptr = &dst.Byte0; byte* aptr = &a.Byte0; byte* bptr = &b.Byte0; for (int j = 0; j <= 15; j++) { if ((bptr[j] & 0x80) != 0) { dptr[j] = 0x00; } else { dptr[j] = aptr[bptr[j] & 15]; } } return dst; } // _mm_alignr_epi8 /// Concatenate 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst". /// Vector a /// Vector b /// Byte count /// Vector [DebuggerStepThrough] public static v128 alignr_epi8(v128 a, v128 b, int count) { var dst = default(v128); byte* dptr = &dst.Byte0; byte* aptr = &a.Byte0 + count; byte* bptr = &b.Byte0; int i; for (i = 0; i < 16 - count; ++i) { *dptr++ = *aptr++; } for (; i < 16; ++i) { *dptr++ = *bptr++; } return dst; } // _mm_hadd_epi16 /// Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". /// Vector a /// Vector b /// Vector [DebuggerStepThrough] public static v128 hadd_epi16(v128 a, v128 b) { v128 dst = default(v128); short* dptr = &dst.SShort0; short* aptr = &a.SShort0; short* bptr = &b.SShort0; for (int j = 0; j <= 3; ++j) { dptr[j] = (short)(aptr[2 * j + 1] + aptr[2 * j]); dptr[j + 4] = (short)(bptr[2 * j + 1] + bptr[2 * j]); } return dst; } // _mm_hadds_epi16 /// Horizontally add adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". /// Vector a /// Vector b /// Vector [DebuggerStepThrough] public static v128 hadds_epi16(v128 a, v128 b) { v128 dst = default(v128); short* dptr = &dst.SShort0; short* aptr = &a.SShort0; short* bptr = &b.SShort0; for (int j = 0; j <= 3; ++j) { dptr[j] = Saturate_To_Int16(aptr[2 * j + 1] + aptr[2 * j]); dptr[j + 4] = Saturate_To_Int16(bptr[2 * j + 1] + bptr[2 * j]); } return dst; } // _mm_hadd_epi32 /// Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". /// Vector a /// Vector b /// Vector [DebuggerStepThrough] public static v128 hadd_epi32(v128 a, v128 b) { v128 dst = default(v128); dst.SInt0 = a.SInt1 + a.SInt0; dst.SInt1 = a.SInt3 + a.SInt2; dst.SInt2 = b.SInt1 + b.SInt0; dst.SInt3 = b.SInt3 + b.SInt2; return dst; } // _mm_hsub_epi16 /// Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". /// Vector a /// Vector b /// Vector [DebuggerStepThrough] public static v128 hsub_epi16(v128 a, v128 b) { v128 dst = default(v128); short* dptr = &dst.SShort0; short* aptr = &a.SShort0; short* bptr = &b.SShort0; for (int j = 0; j <= 3; ++j) { dptr[j] = (short)(aptr[2 * j] - aptr[2 * j + 1]); dptr[j + 4] = (short)(bptr[2 * j] - bptr[2 * j + 1]); } return dst; } // _mm_hsubs_epi16 /// Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". /// Vector a /// Vector b /// Vector [DebuggerStepThrough] public static v128 hsubs_epi16(v128 a, v128 b) { v128 dst = default(v128); short* dptr = &dst.SShort0; short* aptr = &a.SShort0; short* bptr = &b.SShort0; for (int j = 0; j <= 3; ++j) { dptr[j] = Saturate_To_Int16(aptr[2 * j] - aptr[2 * j + 1]); dptr[j + 4] = Saturate_To_Int16(bptr[2 * j] - bptr[2 * j + 1]); } return dst; } // _mm_hsub_epi32 /// Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". /// Vector a /// Vector b /// Vector [DebuggerStepThrough] public static v128 hsub_epi32(v128 a, v128 b) { v128 dst = default(v128); dst.SInt0 = a.SInt0 - a.SInt1; dst.SInt1 = a.SInt2 - a.SInt3; dst.SInt2 = b.SInt0 - b.SInt1; dst.SInt3 = b.SInt2 - b.SInt3; return dst; } // _mm_maddubs_epi16 /// Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". /// Vector a /// Vector b /// Vector [DebuggerStepThrough] public static v128 maddubs_epi16(v128 a, v128 b) { v128 dst = default(v128); short* dptr = &dst.SShort0; byte* aptr = &a.Byte0; sbyte* bptr = &b.SByte0; for (int j = 0; j <= 7; j++) { int tmp = aptr[2 * j + 1] * bptr[2 * j + 1] + aptr[2 * j] * bptr[2 * j]; dptr[j] = Saturate_To_Int16(tmp); } return dst; } // _mm_mulhrs_epi16 /// Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". /// Vector a /// Vector b /// Vector [DebuggerStepThrough] public static v128 mulhrs_epi16(v128 a, v128 b) { v128 dst = default(v128); short* dptr = &dst.SShort0; short* aptr = &a.SShort0; short* bptr = &b.SShort0; for (int j = 0; j <= 7; j++) { int tmp = aptr[j] * bptr[j]; tmp >>= 14; tmp += 1; tmp >>= 1; dptr[j] = (short)tmp; } return dst; } // _mm_sign_epi8 /// Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. /// Vector a /// Vector b /// Vector [DebuggerStepThrough] public static v128 sign_epi8(v128 a, v128 b) { v128 dst = default(v128); sbyte* dptr = &dst.SByte0; sbyte* aptr = &a.SByte0; sbyte* bptr = &b.SByte0; for (int j = 0; j <= 15; j++) { if (bptr[j] < 0) { dptr[j] = (sbyte)-aptr[j]; } else if (bptr[j] == 0) { dptr[j] = 0; } else { dptr[j] = aptr[j]; } } return dst; } // _mm_sign_epi16 /// Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. /// Vector a /// Vector b /// Vector [DebuggerStepThrough] public static v128 sign_epi16(v128 a, v128 b) { v128 dst = default(v128); short* dptr = &dst.SShort0; short* aptr = &a.SShort0; short* bptr = &b.SShort0; for (int j = 0; j <= 7; j++) { if (bptr[j] < 0) { dptr[j] = (short)-aptr[j]; } else if (bptr[j] == 0) { dptr[j] = 0; } else { dptr[j] = aptr[j]; } } return dst; } // _mm_sign_epi32 /// Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. /// Vector a /// Vector b /// Vector [DebuggerStepThrough] public static v128 sign_epi32(v128 a, v128 b) { v128 dst = default(v128); int* dptr = &dst.SInt0; int* aptr = &a.SInt0; int* bptr = &b.SInt0; for (int j = 0; j <= 3; j++) { if (bptr[j] < 0) { dptr[j] = -aptr[j]; } else if (bptr[j] == 0) { dptr[j] = 0; } else { dptr[j] = aptr[j]; } } return dst; } } } }