Rasagar/Library/PackageCache/com.unity.burst/Runtime/Intrinsics/x86/Ssse3.cs

372 lines
14 KiB
C#
Raw Permalink Normal View History

2024-08-26 13:07:20 -07:00
using System;
using System.Diagnostics;
namespace Unity.Burst.Intrinsics
{
public unsafe static partial class X86
{
/// <summary>
/// SSSE3 intrinsics
/// </summary>
public static class Ssse3
{
/// <summary>
/// Evaluates to true at compile time if SSSE3 intrinsics are supported.
/// </summary>
public static bool IsSsse3Supported { get { return false; } }
// _mm_abs_epi8
/// <summary> Compute the absolute value of packed 8-bit integers in "a", and store the unsigned results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 abs_epi8(v128 a)
{
v128 dst = default(v128);
byte* dptr = &dst.Byte0;
sbyte* aptr = &a.SByte0;
for (int j = 0; j <= 15; j++)
{
dptr[j] = (byte)Math.Abs((int)aptr[j]);
}
return dst;
}
// _mm_abs_epi16
/// <summary> Compute the absolute value of packed 16-bit integers in "a", and store the unsigned results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 abs_epi16(v128 a)
{
v128 dst = default(v128);
ushort* dptr = &dst.UShort0;
short* aptr = &a.SShort0;
for (int j = 0; j <= 7; j++)
{
dptr[j] = (ushort)Math.Abs((int)aptr[j]);
}
return dst;
}
// _mm_abs_epi32
/// <summary> Compute the absolute value of packed 32-bit integers in "a", and store the unsigned results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 abs_epi32(v128 a)
{
v128 dst = default(v128);
uint* dptr = &dst.UInt0;
int* aptr = &a.SInt0;
for (int j = 0; j <= 3; j++)
{
dptr[j] = (uint)Math.Abs((long)aptr[j]);
}
return dst;
}
// _mm_shuffle_epi8
/// <summary> Shuffle packed 8-bit integers in "a" according to shuffle control mask in the corresponding 8-bit element of "b", and store the results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 shuffle_epi8(v128 a, v128 b)
{
v128 dst = default(v128);
byte* dptr = &dst.Byte0;
byte* aptr = &a.Byte0;
byte* bptr = &b.Byte0;
for (int j = 0; j <= 15; j++)
{
if ((bptr[j] & 0x80) != 0)
{
dptr[j] = 0x00;
}
else
{
dptr[j] = aptr[bptr[j] & 15];
}
}
return dst;
}
// _mm_alignr_epi8
/// <summary> Concatenate 16-byte blocks in "a" and "b" into a 32-byte temporary result, shift the result right by "count" bytes, and store the low 16 bytes in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <param name="count">Byte count</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 alignr_epi8(v128 a, v128 b, int count)
{
var dst = default(v128);
byte* dptr = &dst.Byte0;
byte* aptr = &a.Byte0 + count;
byte* bptr = &b.Byte0;
int i;
for (i = 0; i < 16 - count; ++i)
{
*dptr++ = *aptr++;
}
for (; i < 16; ++i)
{
*dptr++ = *bptr++;
}
return dst;
}
// _mm_hadd_epi16
/// <summary> Horizontally add adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hadd_epi16(v128 a, v128 b)
{
v128 dst = default(v128);
short* dptr = &dst.SShort0;
short* aptr = &a.SShort0;
short* bptr = &b.SShort0;
for (int j = 0; j <= 3; ++j)
{
dptr[j] = (short)(aptr[2 * j + 1] + aptr[2 * j]);
dptr[j + 4] = (short)(bptr[2 * j + 1] + bptr[2 * j]);
}
return dst;
}
// _mm_hadds_epi16
/// <summary> Horizontally add adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hadds_epi16(v128 a, v128 b)
{
v128 dst = default(v128);
short* dptr = &dst.SShort0;
short* aptr = &a.SShort0;
short* bptr = &b.SShort0;
for (int j = 0; j <= 3; ++j)
{
dptr[j] = Saturate_To_Int16(aptr[2 * j + 1] + aptr[2 * j]);
dptr[j + 4] = Saturate_To_Int16(bptr[2 * j + 1] + bptr[2 * j]);
}
return dst;
}
// _mm_hadd_epi32
/// <summary> Horizontally add adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hadd_epi32(v128 a, v128 b)
{
v128 dst = default(v128);
dst.SInt0 = a.SInt1 + a.SInt0;
dst.SInt1 = a.SInt3 + a.SInt2;
dst.SInt2 = b.SInt1 + b.SInt0;
dst.SInt3 = b.SInt3 + b.SInt2;
return dst;
}
// _mm_hsub_epi16
/// <summary> Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b", and pack the signed 16-bit results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hsub_epi16(v128 a, v128 b)
{
v128 dst = default(v128);
short* dptr = &dst.SShort0;
short* aptr = &a.SShort0;
short* bptr = &b.SShort0;
for (int j = 0; j <= 3; ++j)
{
dptr[j] = (short)(aptr[2 * j] - aptr[2 * j + 1]);
dptr[j + 4] = (short)(bptr[2 * j] - bptr[2 * j + 1]);
}
return dst;
}
// _mm_hsubs_epi16
/// <summary> Horizontally subtract adjacent pairs of 16-bit integers in "a" and "b" using saturation, and pack the signed 16-bit results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hsubs_epi16(v128 a, v128 b)
{
v128 dst = default(v128);
short* dptr = &dst.SShort0;
short* aptr = &a.SShort0;
short* bptr = &b.SShort0;
for (int j = 0; j <= 3; ++j)
{
dptr[j] = Saturate_To_Int16(aptr[2 * j] - aptr[2 * j + 1]);
dptr[j + 4] = Saturate_To_Int16(bptr[2 * j] - bptr[2 * j + 1]);
}
return dst;
}
// _mm_hsub_epi32
/// <summary> Horizontally subtract adjacent pairs of 32-bit integers in "a" and "b", and pack the signed 32-bit results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 hsub_epi32(v128 a, v128 b)
{
v128 dst = default(v128);
dst.SInt0 = a.SInt0 - a.SInt1;
dst.SInt1 = a.SInt2 - a.SInt3;
dst.SInt2 = b.SInt0 - b.SInt1;
dst.SInt3 = b.SInt2 - b.SInt3;
return dst;
}
// _mm_maddubs_epi16
/// <summary> Vertically multiply each unsigned 8-bit integer from "a" with the corresponding signed 8-bit integer from "b", producing intermediate signed 16-bit integers. Horizontally add adjacent pairs of intermediate signed 16-bit integers, and pack the saturated results in "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 maddubs_epi16(v128 a, v128 b)
{
v128 dst = default(v128);
short* dptr = &dst.SShort0;
byte* aptr = &a.Byte0;
sbyte* bptr = &b.SByte0;
for (int j = 0; j <= 7; j++)
{
int tmp = aptr[2 * j + 1] * bptr[2 * j + 1] + aptr[2 * j] * bptr[2 * j];
dptr[j] = Saturate_To_Int16(tmp);
}
return dst;
}
// _mm_mulhrs_epi16
/// <summary> Multiply packed 16-bit integers in "a" and "b", producing intermediate signed 32-bit integers. Truncate each intermediate integer to the 18 most significant bits, round by adding 1, and store bits [16:1] to "dst". </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 mulhrs_epi16(v128 a, v128 b)
{
v128 dst = default(v128);
short* dptr = &dst.SShort0;
short* aptr = &a.SShort0;
short* bptr = &b.SShort0;
for (int j = 0; j <= 7; j++)
{
int tmp = aptr[j] * bptr[j];
tmp >>= 14;
tmp += 1;
tmp >>= 1;
dptr[j] = (short)tmp;
}
return dst;
}
// _mm_sign_epi8
/// <summary> Negate packed 8-bit integers in "a" when the corresponding signed 8-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 sign_epi8(v128 a, v128 b)
{
v128 dst = default(v128);
sbyte* dptr = &dst.SByte0;
sbyte* aptr = &a.SByte0;
sbyte* bptr = &b.SByte0;
for (int j = 0; j <= 15; j++)
{
if (bptr[j] < 0)
{
dptr[j] = (sbyte)-aptr[j];
}
else if (bptr[j] == 0)
{
dptr[j] = 0;
}
else
{
dptr[j] = aptr[j];
}
}
return dst;
}
// _mm_sign_epi16
/// <summary> Negate packed 16-bit integers in "a" when the corresponding signed 16-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 sign_epi16(v128 a, v128 b)
{
v128 dst = default(v128);
short* dptr = &dst.SShort0;
short* aptr = &a.SShort0;
short* bptr = &b.SShort0;
for (int j = 0; j <= 7; j++)
{
if (bptr[j] < 0)
{
dptr[j] = (short)-aptr[j];
}
else if (bptr[j] == 0)
{
dptr[j] = 0;
}
else
{
dptr[j] = aptr[j];
}
}
return dst;
}
// _mm_sign_epi32
/// <summary> Negate packed 32-bit integers in "a" when the corresponding signed 32-bit integer in "b" is negative, and store the results in "dst". Element in "dst" are zeroed out when the corresponding element in "b" is zero. </summary>
/// <param name="a">Vector a</param>
/// <param name="b">Vector b</param>
/// <returns>Vector</returns>
[DebuggerStepThrough]
public static v128 sign_epi32(v128 a, v128 b)
{
v128 dst = default(v128);
int* dptr = &dst.SInt0;
int* aptr = &a.SInt0;
int* bptr = &b.SInt0;
for (int j = 0; j <= 3; j++)
{
if (bptr[j] < 0)
{
dptr[j] = -aptr[j];
}
else if (bptr[j] == 0)
{
dptr[j] = 0;
}
else
{
dptr[j] = aptr[j];
}
}
return dst;
}
}
}
}