Rasagar/Library/PackageCache/com.unity.burst/Runtime/Intrinsics/x86/F16C.cs

using System.Diagnostics;

namespace Unity.Burst.Intrinsics
{
    public unsafe static partial class X86
    {
        /// <summary>
        /// F16C intrinsics
        /// </summary>
        public static class F16C
        {
            /// <summary>
            /// Evaluates to true at compile time if F16C intrinsics are supported.
            ///
            /// Burst ties F16C support to AVX2 support to simplify feature sets to support.
            /// </summary>
            public static bool IsF16CSupported { get { return Avx2.IsAvx2Supported; } }

            /// <summary>
            /// Converts a half (hiding in a ushort) to a float (hiding in a uint).
            /// </summary>
            /// <param name="h">The half to convert</param>
            /// <returns>The float result</returns>
            [DebuggerStepThrough]
            private static uint HalfToFloat(ushort h)
            {
                var signed = (h & 0x8000u) != 0;
                var exponent = (h >> 10) & 0x1fu;
                var mantissa = h & 0x3ffu;

                var result = signed ? 0x80000000u : 0u;

                if (!(exponent == 0 && mantissa == 0))
                {
                    // Denormal (converts to normalized)
                    if (exponent == 0)
                    {
                        // Adjust mantissa so it's normalized (and keep track of exponent adjustment)
                        exponent = -1;
                        do
                        {
                            exponent++;
                            mantissa <<= 1;
                        } while ((mantissa & 0x400) == 0);

                        result |= (uint)((127 - 15 - exponent) << 23);

                        // Have to re-mask the mantissa here because we've been shifting bits up.
                        result |= (mantissa & 0x3ff) << 13;
                    }
                    else
                    {
                        var isInfOrNan = exponent == 0x1f;
                        result |= (uint)(isInfOrNan ? 255 : (127 - 15 + exponent) << 23);
                        result |= mantissa << 13;
                    }
                }

                return result;
            }

            /// <summary>
            /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
            /// </summary>
            /// <remarks>
            /// **** vcvtph2ps xmm, xmm
            /// </remarks>
			/// <param name="a">Vector a</param>
			/// <returns>Vector</returns>
            [DebuggerStepThrough]
            public static v128 cvtph_ps(v128 a)
            {
                return new v128(HalfToFloat(a.UShort0), HalfToFloat(a.UShort1), HalfToFloat(a.UShort2), HalfToFloat(a.UShort3));
            }

            /// <summary>
            /// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.
            /// </summary>
            /// <remarks>
            /// **** vcvtph2ps ymm, xmm
            /// </remarks>
			/// <param name="a">Vector a</param>
			/// <returns>Vector</returns>
            [DebuggerStepThrough]
            public static v256 mm256_cvtph_ps(v128 a)
            {
                return new v256(HalfToFloat(a.UShort0), HalfToFloat(a.UShort1), HalfToFloat(a.UShort2), HalfToFloat(a.UShort3), HalfToFloat(a.UShort4), HalfToFloat(a.UShort5), HalfToFloat(a.UShort6), HalfToFloat(a.UShort7));
            }

            // Using ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
            private static readonly ushort[] BaseTable =
            {
                0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
                0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
                0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
                0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
                0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
                0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
                0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100,
                0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00,
                0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00,
                0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
                0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
                0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
                0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
                0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
                0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
                0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,
                0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
                0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
                0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
                0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
                0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
                0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
                0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100,
                0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00,
                0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00,
                0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
                0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
                0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
                0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
                0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
                0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
                0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,
            };

            private static readonly sbyte[] ShiftTable =
            {
                24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
                24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
                24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
                24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
                13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
                24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
                24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
                24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13,
                24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
                24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
                24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
                24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
                13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
                24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
                24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
                24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13,
            };

            /// <summary>
            /// Converts a float (hiding in a uint) to a half (hiding in a ushort).
            /// </summary>
            /// <param name="f">The float to convert</param>
			/// <param name="rounding">Rounding mode</param>
            /// <returns>The half result</returns>
            [DebuggerStepThrough]
            private static ushort FloatToHalf(uint f, int rounding)
            {
                var exponentAndSign = f >> 23;
                var shift = ShiftTable[exponentAndSign];

                var result = (uint)(BaseTable[exponentAndSign] + (ushort)((f & 0x7FFFFFu) >> shift));

                // Check if the result is not Inf or NaN.
                var isFinite = (result & 0x7C00) != 0x7C00;
                var isNegative = (result & 0x8000) != 0;

                if (rounding == (int)RoundingMode.FROUND_NINT_NOEXC)
                {
                    var fWithRoundingBitPreserved = (f & 0x7FFFFFu) >> (shift - 1);

                    if ((exponentAndSign & 0xFF) == 102)
                    {
                        result++;
                    }
                    if (isFinite && ((fWithRoundingBitPreserved & 0x1u) != 0))
                    {
                        result++;
                    }
                }
                else if (rounding == (int)RoundingMode.FROUND_TRUNC_NOEXC)
                {
                    if (!isFinite)
                    {
                        result -= (uint)(~shift & 0x1);
                    }
                }
                else if (rounding == (int)RoundingMode.FROUND_CEIL_NOEXC)
                {
                    if (isFinite && !isNegative)
                    {
                        if ((exponentAndSign <= 102) && (exponentAndSign != 0))
                        {
                            result++;
                        }
                        else if ((f & 0x7FFFFFu & ((1u << shift) - 1u)) != 0)
                        {
                            result++;
                        } 
                    }

                    var resultIsNegativeInf = (result == 0xFC00);
                    var inputIsNotNegativeInfOrNan = (exponentAndSign != 0x1FF);

                    if (resultIsNegativeInf && inputIsNotNegativeInfOrNan)
                    {
                        result--;
                    }
                }
                else if (rounding == (int)RoundingMode.FROUND_FLOOR_NOEXC)
                {
                    if (isFinite && isNegative)
                    {
                        if ((exponentAndSign <= 358) && (exponentAndSign != 256))
                        {
                            result++;
                        }
                        else if ((f & 0x7FFFFFu & ((1u << shift) - 1u)) != 0)
                        {
                            result++;
                        }
                    }

                    var resultIsPositiveInf = (result == 0x7C00);
                    var inputIsNotPositiveInfOrNan = (exponentAndSign != 0xFF);

                    if (resultIsPositiveInf && inputIsNotPositiveInfOrNan)
                    {
                        result--;
                    }
                }

                return (ushort)result;
            }

            /// <summary>
            /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.
            ///
            /// Rounding is done according to the rounding parameter, which can be one of:
            /// </summary>
            /// <remarks>
            /// **** cvtps2ph xmm, xmm, imm
            /// </remarks>
			/// <param name="a">Vector a</param>
			/// <param name="rounding">Rounding mode</param>
			/// <returns>Vector</returns>
            [DebuggerStepThrough]
            public static v128 cvtps_ph(v128 a, int rounding)
            {
                if (rounding == (int)RoundingMode.FROUND_RINT_NOEXC)
                {
                    switch (MXCSR & MXCSRBits.RoundingControlMask)
                    {
                        case MXCSRBits.RoundToNearest:
                            rounding = (int)RoundingMode.FROUND_NINT_NOEXC;
                            break;
                        case MXCSRBits.RoundDown:
                            rounding = (int)RoundingMode.FROUND_FLOOR_NOEXC;
                            break;
                        case MXCSRBits.RoundUp:
                            rounding = (int)RoundingMode.FROUND_CEIL_NOEXC;
                            break;
                        case MXCSRBits.RoundTowardZero:
                            rounding = (int)RoundingMode.FROUND_TRUNC_NOEXC;
                            break;
                    }
                }

                return new v128(FloatToHalf(a.UInt0, rounding), FloatToHalf(a.UInt1, rounding), FloatToHalf(a.UInt2, rounding), FloatToHalf(a.UInt3, rounding), 0, 0, 0, 0);
            }

            /// <summary>
            /// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.
            ///
            /// Rounding is done according to the rounding parameter, which can be one of:
            /// </summary>
            /// <remarks>
            /// **** cvtps2ph xmm, ymm, imm
            /// </remarks>
			/// <param name="a">Vector a</param>
			/// <param name="rounding">Rounding mode</param>
			/// <returns>Vector</returns>
            [DebuggerStepThrough]
            public static v128 mm256_cvtps_ph(v256 a, int rounding)
            {
                if (rounding == (int)RoundingMode.FROUND_RINT_NOEXC)
                {
                    switch (MXCSR & MXCSRBits.RoundingControlMask)
                    {
                        case MXCSRBits.RoundToNearest:
                            rounding = (int)RoundingMode.FROUND_NINT_NOEXC;
                            break;
                        case MXCSRBits.RoundDown:
                            rounding = (int)RoundingMode.FROUND_FLOOR_NOEXC;
                            break;
                        case MXCSRBits.RoundUp:
                            rounding = (int)RoundingMode.FROUND_CEIL_NOEXC;
                            break;
                        case MXCSRBits.RoundTowardZero:
                            rounding = (int)RoundingMode.FROUND_TRUNC_NOEXC;
                            break;
                    }
                }

                return new v128(FloatToHalf(a.UInt0, rounding), FloatToHalf(a.UInt1, rounding), FloatToHalf(a.UInt2, rounding), FloatToHalf(a.UInt3, rounding), FloatToHalf(a.UInt4, rounding), FloatToHalf(a.UInt5, rounding), FloatToHalf(a.UInt6, rounding), FloatToHalf(a.UInt7, rounding));
            }
        }
    }
}
deneme 2024-08-26 13:07:20 -07:00			`using System.Diagnostics;`

			`namespace Unity.Burst.Intrinsics`
			`{`
			`public unsafe static partial class X86`
			`{`
			`/// <summary>`
			`/// F16C intrinsics`
			`/// </summary>`
			`public static class F16C`
			`{`
			`/// <summary>`
			`/// Evaluates to true at compile time if F16C intrinsics are supported.`
			`///`
			`/// Burst ties F16C support to AVX2 support to simplify feature sets to support.`
			`/// </summary>`
			`public static bool IsF16CSupported { get { return Avx2.IsAvx2Supported; } }`

			`/// <summary>`
			`/// Converts a half (hiding in a ushort) to a float (hiding in a uint).`
			`/// </summary>`
			`/// <param name="h">The half to convert</param>`
			`/// <returns>The float result</returns>`
			`[DebuggerStepThrough]`
			`private static uint HalfToFloat(ushort h)`
			`{`
			`var signed = (h & 0x8000u) != 0;`
			`var exponent = (h >> 10) & 0x1fu;`
			`var mantissa = h & 0x3ffu;`

			`var result = signed ? 0x80000000u : 0u;`

			`if (!(exponent == 0 && mantissa == 0))`
			`{`
			`// Denormal (converts to normalized)`
			`if (exponent == 0)`
			`{`
			`// Adjust mantissa so it's normalized (and keep track of exponent adjustment)`
			`exponent = -1;`
			`do`
			`{`
			`exponent++;`
			`mantissa <<= 1;`
			`} while ((mantissa & 0x400) == 0);`

			`result \|= (uint)((127 - 15 - exponent) << 23);`

			`// Have to re-mask the mantissa here because we've been shifting bits up.`
			`result \|= (mantissa & 0x3ff) << 13;`
			`}`
			`else`
			`{`
			`var isInfOrNan = exponent == 0x1f;`
			`result \|= (uint)(isInfOrNan ? 255 : (127 - 15 + exponent) << 23);`
			`result \|= mantissa << 13;`
			`}`
			`}`

			`return result;`
			`}`

			`/// <summary>`
			`/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.`
			`/// </summary>`
			`/// <remarks>`
			`/// **** vcvtph2ps xmm, xmm`
			`/// </remarks>`
			`/// <param name="a">Vector a</param>`
			`/// <returns>Vector</returns>`
			`[DebuggerStepThrough]`
			`public static v128 cvtph_ps(v128 a)`
			`{`
			`return new v128(HalfToFloat(a.UShort0), HalfToFloat(a.UShort1), HalfToFloat(a.UShort2), HalfToFloat(a.UShort3));`
			`}`

			`/// <summary>`
			`/// Convert packed half-precision (16-bit) floating-point elements in a to packed single-precision (32-bit) floating-point elements, and store the results in dst.`
			`/// </summary>`
			`/// <remarks>`
			`/// **** vcvtph2ps ymm, xmm`
			`/// </remarks>`
			`/// <param name="a">Vector a</param>`
			`/// <returns>Vector</returns>`
			`[DebuggerStepThrough]`
			`public static v256 mm256_cvtph_ps(v128 a)`
			`{`
			`return new v256(HalfToFloat(a.UShort0), HalfToFloat(a.UShort1), HalfToFloat(a.UShort2), HalfToFloat(a.UShort3), HalfToFloat(a.UShort4), HalfToFloat(a.UShort5), HalfToFloat(a.UShort6), HalfToFloat(a.UShort7));`
			`}`

			`// Using ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf`
			`private static readonly ushort[] BaseTable =`
			`{`
			`0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,`
			`0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,`
			`0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,`
			`0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,`
			`0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,`
			`0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,`
			`0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100,`
			`0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00,`
			`0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00,`
			`0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,`
			`0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,`
			`0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,`
			`0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,`
			`0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,`
			`0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,`
			`0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00,`
			`0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,`
			`0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,`
			`0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,`
			`0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,`
			`0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,`
			`0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,`
			`0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100,`
			`0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00,`
			`0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00,`
			`0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,`
			`0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,`
			`0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,`
			`0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,`
			`0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,`
			`0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,`
			`0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00,`
			`};`

			`private static readonly sbyte[] ShiftTable =`
			`{`
			`24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,`
			`24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,`
			`24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,`
			`24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,`
			`13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,`
			`24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,`
			`24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,`
			`24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13,`
			`24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,`
			`24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,`
			`24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,`
			`24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,`
			`13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,`
			`24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,`
			`24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,`
			`24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13,`
			`};`

			`/// <summary>`
			`/// Converts a float (hiding in a uint) to a half (hiding in a ushort).`
			`/// </summary>`
			`/// <param name="f">The float to convert</param>`
			`/// <param name="rounding">Rounding mode</param>`
			`/// <returns>The half result</returns>`
			`[DebuggerStepThrough]`
			`private static ushort FloatToHalf(uint f, int rounding)`
			`{`
			`var exponentAndSign = f >> 23;`
			`var shift = ShiftTable[exponentAndSign];`

			`var result = (uint)(BaseTable[exponentAndSign] + (ushort)((f & 0x7FFFFFu) >> shift));`

			`// Check if the result is not Inf or NaN.`
			`var isFinite = (result & 0x7C00) != 0x7C00;`
			`var isNegative = (result & 0x8000) != 0;`

			`if (rounding == (int)RoundingMode.FROUND_NINT_NOEXC)`
			`{`
			`var fWithRoundingBitPreserved = (f & 0x7FFFFFu) >> (shift - 1);`

			`if ((exponentAndSign & 0xFF) == 102)`
			`{`
			`result++;`
			`}`
			`if (isFinite && ((fWithRoundingBitPreserved & 0x1u) != 0))`
			`{`
			`result++;`
			`}`
			`}`
			`else if (rounding == (int)RoundingMode.FROUND_TRUNC_NOEXC)`
			`{`
			`if (!isFinite)`
			`{`
			`result -= (uint)(~shift & 0x1);`
			`}`
			`}`
			`else if (rounding == (int)RoundingMode.FROUND_CEIL_NOEXC)`
			`{`
			`if (isFinite && !isNegative)`
			`{`
			`if ((exponentAndSign <= 102) && (exponentAndSign != 0))`
			`{`
			`result++;`
			`}`
			`else if ((f & 0x7FFFFFu & ((1u << shift) - 1u)) != 0)`
			`{`
			`result++;`
			`}`
			`}`

			`var resultIsNegativeInf = (result == 0xFC00);`
			`var inputIsNotNegativeInfOrNan = (exponentAndSign != 0x1FF);`

			`if (resultIsNegativeInf && inputIsNotNegativeInfOrNan)`
			`{`
			`result--;`
			`}`
			`}`
			`else if (rounding == (int)RoundingMode.FROUND_FLOOR_NOEXC)`
			`{`
			`if (isFinite && isNegative)`
			`{`
			`if ((exponentAndSign <= 358) && (exponentAndSign != 256))`
			`{`
			`result++;`
			`}`
			`else if ((f & 0x7FFFFFu & ((1u << shift) - 1u)) != 0)`
			`{`
			`result++;`
			`}`
			`}`

			`var resultIsPositiveInf = (result == 0x7C00);`
			`var inputIsNotPositiveInfOrNan = (exponentAndSign != 0xFF);`

			`if (resultIsPositiveInf && inputIsNotPositiveInfOrNan)`
			`{`
			`result--;`
			`}`
			`}`

			`return (ushort)result;`
			`}`

			`/// <summary>`
			`/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.`
			`///`
			`/// Rounding is done according to the rounding parameter, which can be one of:`
			`/// </summary>`
			`/// <remarks>`
			`/// **** cvtps2ph xmm, xmm, imm`
			`/// </remarks>`
			`/// <param name="a">Vector a</param>`
			`/// <param name="rounding">Rounding mode</param>`
			`/// <returns>Vector</returns>`
			`[DebuggerStepThrough]`
			`public static v128 cvtps_ph(v128 a, int rounding)`
			`{`
			`if (rounding == (int)RoundingMode.FROUND_RINT_NOEXC)`
			`{`
			`switch (MXCSR & MXCSRBits.RoundingControlMask)`
			`{`
			`case MXCSRBits.RoundToNearest:`
			`rounding = (int)RoundingMode.FROUND_NINT_NOEXC;`
			`break;`
			`case MXCSRBits.RoundDown:`
			`rounding = (int)RoundingMode.FROUND_FLOOR_NOEXC;`
			`break;`
			`case MXCSRBits.RoundUp:`
			`rounding = (int)RoundingMode.FROUND_CEIL_NOEXC;`
			`break;`
			`case MXCSRBits.RoundTowardZero:`
			`rounding = (int)RoundingMode.FROUND_TRUNC_NOEXC;`
			`break;`
			`}`
			`}`

			`return new v128(FloatToHalf(a.UInt0, rounding), FloatToHalf(a.UInt1, rounding), FloatToHalf(a.UInt2, rounding), FloatToHalf(a.UInt3, rounding), 0, 0, 0, 0);`
			`}`

			`/// <summary>`
			`/// Convert packed single-precision (32-bit) floating-point elements in a to packed half-precision (16-bit) floating-point elements, and store the results in dst.`
			`///`
			`/// Rounding is done according to the rounding parameter, which can be one of:`
			`/// </summary>`
			`/// <remarks>`
			`/// **** cvtps2ph xmm, ymm, imm`
			`/// </remarks>`
			`/// <param name="a">Vector a</param>`
			`/// <param name="rounding">Rounding mode</param>`
			`/// <returns>Vector</returns>`
			`[DebuggerStepThrough]`
			`public static v128 mm256_cvtps_ph(v256 a, int rounding)`
			`{`
			`if (rounding == (int)RoundingMode.FROUND_RINT_NOEXC)`
			`{`
			`switch (MXCSR & MXCSRBits.RoundingControlMask)`
			`{`
			`case MXCSRBits.RoundToNearest:`
			`rounding = (int)RoundingMode.FROUND_NINT_NOEXC;`
			`break;`
			`case MXCSRBits.RoundDown:`
			`rounding = (int)RoundingMode.FROUND_FLOOR_NOEXC;`
			`break;`
			`case MXCSRBits.RoundUp:`
			`rounding = (int)RoundingMode.FROUND_CEIL_NOEXC;`
			`break;`
			`case MXCSRBits.RoundTowardZero:`
			`rounding = (int)RoundingMode.FROUND_TRUNC_NOEXC;`
			`break;`
			`}`
			`}`

			`return new v128(FloatToHalf(a.UInt0, rounding), FloatToHalf(a.UInt1, rounding), FloatToHalf(a.UInt2, rounding), FloatToHalf(a.UInt3, rounding), FloatToHalf(a.UInt4, rounding), FloatToHalf(a.UInt5, rounding), FloatToHalf(a.UInt6, rounding), FloatToHalf(a.UInt7, rounding));`
			`}`
			`}`
			`}`
			`}`