425 lines
16 KiB
HLSL
425 lines
16 KiB
HLSL
/*
|
|
MIT License
|
|
|
|
Copyright (c) 2022 Kleber Garcia
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in all
|
|
copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
SOFTWARE.
|
|
*/
|
|
|
|
#ifndef __COVERAGE__
|
|
#define __COVERAGE__
|
|
|
|
//Utilities for coverage bit mask on an 8x8 grid.
|
|
namespace Coverage
|
|
{
|
|
|
|
//**************************************************************************************************************/
|
|
// How to use
|
|
//**************************************************************************************************************/
|
|
/*
|
|
To utilize this library, first call the genLUT function at the beginning of your compute shader.
|
|
This function must be followed by a group sync. Example follows:
|
|
|
|
...
|
|
coverage::genLUT(groupThreadIndex);
|
|
GroupMemoryBarrierWithGroupSync();
|
|
...
|
|
|
|
Alternatively, you can dump the contents into buffer. The contents of the LUT are inside gs_quadMask, which is 64 entries.
|
|
|
|
After this use the coverage functions
|
|
|
|
*/
|
|
|
|
//**************************************************************************************************************/
|
|
// Coordinate System
|
|
//**************************************************************************************************************/
|
|
/*
|
|
The functions in this library follow the same convension, input is a shape described by certain vertices,
|
|
output is a 64 bit mask with such shape's coverage.
|
|
|
|
The coordinate system is (0,0) for the top left of an 8x8 grid, and (1,1) for the bottom right.
|
|
The LSB represents coordinate (0,0), and sample points are centered on the pixel.
|
|
|
|
(0.0,0.0) (1.0,0.0)
|
|
| |
|
|
|___________________________________|
|
|
| | | | | | | | | |
|
|
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 |
|
|
|___|___|___|___|___|___|___|___|___|
|
|
| | | | | | | | | |
|
|
| 9 | 10| 11| 12| 13| 14| 15| 16| 17|
|
|
|___|___|___|___|___|___|___|___|___|___(1.0, 2.0/8.0)
|
|
|
|
the center of bit 0 would be 0.5,0.5 and so on
|
|
|
|
any points outside of the range (0,1) means they are outside the grid.
|
|
*/
|
|
|
|
//**************************************************************************************************************/
|
|
// Masks
|
|
//**************************************************************************************************************/
|
|
/*
|
|
Masks are stored in a packed 64 bit represented by uint2.
|
|
x component represents the first 32 bits, y component the next 32 bits.
|
|
*/
|
|
|
|
//**************************************************************************************************************/
|
|
// coverage API
|
|
//**************************************************************************************************************/
|
|
|
|
/*
|
|
lut for 4x4 quad mask. See buildQuadMask function
|
|
4 states for horizontal flipping and vertical flipping
|
|
You can dump this lut to a buffer, and preload it manually,
|
|
or just regenerated in your thread group
|
|
*/
|
|
groupshared uint gs_quadMask[16 * 4];
|
|
|
|
/*
|
|
Call this function to generate the coverage 4x4 luts
|
|
groupThreadIndex - the thread index.
|
|
NOTE: must sync group threads after calling this.
|
|
*/
|
|
void GenLUT(uint groupThreadIndex);
|
|
|
|
/*
|
|
Call this function to get a 64 bit coverage mask for a triangle.
|
|
v0, v1, v2 - the triangle coordinates in right hand ruling order
|
|
return - the coverage mask for this triangle
|
|
*/
|
|
uint2 TriangleCoverageMask(float2 v0, float2 v1, float2 v2, bool showFrontFace, bool showBackface);
|
|
|
|
|
|
/*
|
|
Call this function to get a 64 bit coverage mask for a line.
|
|
v0, v1 - the line coordinates.
|
|
thickness - thickness of line in normalized space. 1.0 means the entire 8 pixels in a tile
|
|
caps - extra pixels in the caps of the line in normalized space. 1.0 means 8 pixels in a tile
|
|
return - the coverage mask of this line
|
|
*/
|
|
uint2 LineCoverageMask(float2 v0, float2 v1, float thickness, float caps);
|
|
|
|
|
|
//**************************************************************************************************************/
|
|
// coverage implementation
|
|
//**************************************************************************************************************/
|
|
|
|
/*
|
|
function that builds a 4x4 compact bit quad for line coverage.
|
|
the line is assumed to have a positive slope < 1.0. That means it can only be raised 1 step at most.
|
|
"incrementMask" is a bit mask specifying how much the y component of a line increments.
|
|
"incrementMask" only describes 4 bits, the rest of the bits are ignored.
|
|
For example, given this bit mask:
|
|
1 0 1 0
|
|
would generate this 4x4 coverage mask:
|
|
|
|
0 0 0 0
|
|
0 0 0 1 <- 3rd bit tells the line to raise here
|
|
0 1 1 1 <- first bit raises the line
|
|
1 1 1 1 <- low axis is always covered
|
|
*/
|
|
uint BuildQuadMask(uint incrementMask)
|
|
{
|
|
uint c = 0;
|
|
|
|
uint mask = 0xF;
|
|
for (int r = 0; r < 4; ++r)
|
|
{
|
|
c |= mask << (r * 8);
|
|
if (incrementMask == 0)
|
|
break;
|
|
int b = firstbitlow(incrementMask);
|
|
mask = (0xFu << (b + 1)) & 0xFu;
|
|
incrementMask ^= 1u << b;
|
|
}
|
|
|
|
return c;
|
|
}
|
|
|
|
//flip 4 bit nibble
|
|
uint FlipNibble(uint mask, int offset)
|
|
{
|
|
mask = (mask >> offset) & 0xF;
|
|
uint r = ((mask << 3) & 0x8)
|
|
| ((mask << 1) & 0x4)
|
|
| ((mask >> 1) & 0x2)
|
|
| ((mask >> 3) & 0x1);
|
|
return (r << offset);
|
|
}
|
|
|
|
//flip an entire 4x4 bit quad
|
|
uint FlipQuadInX(uint mask)
|
|
{
|
|
return FlipNibble(mask, 0) | FlipNibble(mask, 8) | FlipNibble(mask, 16) | FlipNibble(mask, 24);
|
|
}
|
|
|
|
uint TransposeQuad(uint mask)
|
|
{
|
|
uint result = 0;
|
|
[unroll]
|
|
for (int i = 0; i < 4; ++i)
|
|
{
|
|
for (int j = 0; j < 4; ++j)
|
|
{
|
|
if (mask & (1u << (i * 8 + j)))
|
|
result |= 1u << (j * 8 + i);
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
// Builds all the luts necessary for fast bit based coverage
|
|
void GenLUT(uint groupThreadIndex)
|
|
{
|
|
// Neutral
|
|
if (groupThreadIndex < 16)
|
|
gs_quadMask[groupThreadIndex] = BuildQuadMask(groupThreadIndex);
|
|
|
|
GroupMemoryBarrierWithGroupSync();
|
|
|
|
// Flip in X axis, transpose
|
|
if (groupThreadIndex < 16)
|
|
{
|
|
gs_quadMask[groupThreadIndex + 16] = FlipQuadInX(gs_quadMask[groupThreadIndex]);
|
|
gs_quadMask[groupThreadIndex + 32] = TransposeQuad(gs_quadMask[groupThreadIndex]);
|
|
}
|
|
GroupMemoryBarrierWithGroupSync();
|
|
if (groupThreadIndex < 16)
|
|
{
|
|
gs_quadMask[groupThreadIndex + 48] = (~TransposeQuad(FlipQuadInX(gs_quadMask[groupThreadIndex]))) & 0x0F0F0F0F;
|
|
}
|
|
}
|
|
|
|
// Represents a 2D analytical line.
|
|
// stores slope (a) and offset (b)
|
|
struct AnalyticalLine
|
|
{
|
|
float a;
|
|
float b;
|
|
|
|
// Builds an analytical line based on two points.
|
|
void Build(float2 v0, float2 v1)
|
|
{
|
|
//line equation: f(x): a * x + b;
|
|
// where a = (v1.y - v0.y)/(v1.x - v0.x)
|
|
float2 l = v1 - v0;
|
|
a = l.y/l.x;
|
|
b = v1.y - a * v1.x;
|
|
}
|
|
|
|
// Builds a "Flipped" line.
|
|
// A flipped line is defined as having a positive slope < 1.0
|
|
// The two output booleans specify the flip operators to recover the original line.
|
|
void BuildFlipped(float2 v0, float2 v1, out bool outFlipX, out bool outFlipAxis, out bool outIsRightHand, out bool outValid)
|
|
{
|
|
//build line with flip bits for lookup compression
|
|
//This line will have a slope between 0 and 0.5, and always positive.
|
|
//We output the flips as bools
|
|
|
|
float2 ll = v1 - v0;
|
|
outFlipAxis = abs(ll.y) > abs(ll.x);
|
|
outFlipX = sign(ll.y) != sign(ll.x);
|
|
outIsRightHand = ll.x >= 0 ? v0.y >= v1.y : v0.y > v1.y;
|
|
if (outFlipAxis)
|
|
{
|
|
ll.xy = ll.yx;
|
|
v0.xy = v0.yx;
|
|
v1.xy = v1.yx;
|
|
}
|
|
|
|
a = ll.y/ll.x;
|
|
if (outFlipX)
|
|
{
|
|
v0.x = 1.0 - v0.x;
|
|
v1.x = 1.0 - v1.x;
|
|
a *= -1;
|
|
}
|
|
b = v1.y - a * v1.x;
|
|
outValid = any(v1 != v0);//ll.y != 0.0f;
|
|
}
|
|
|
|
// Evaluates f(x) = a * x + b for the line
|
|
float Eval(float xval)
|
|
{
|
|
return xval * a + b;
|
|
}
|
|
|
|
// Evaluates 4 inputs of f(x) = a * x + b for the line
|
|
float4 Eval4(float4 xvals)
|
|
{
|
|
return xvals * a + b;
|
|
}
|
|
|
|
// Evaluates a single 2d in the line given an X.
|
|
float2 PointAt(float xv)
|
|
{
|
|
return float2(xv, Eval(xv));
|
|
}
|
|
};
|
|
|
|
/*
|
|
Represents a set of bits in an 8x8 grid divided by a line.
|
|
The representation is given by 2 splits of the 8x8 grid.
|
|
offsets represents how much we offset the quadCoverage on either x or y (flipped dependant axis)
|
|
the mask represents the increment mask used to look up the quadCoverage
|
|
*/
|
|
struct LineArea
|
|
{
|
|
int offsets[2];
|
|
uint masks[2];
|
|
bool isValid;
|
|
bool flipX;
|
|
bool flipAxis;
|
|
bool isRightHand;
|
|
AnalyticalLine debugLine;
|
|
|
|
// Recovers a single point in the boundary
|
|
// of the line (where the line intersects a pixel).
|
|
// Theres a total of 8 possible points
|
|
float2 GetBoundaryPoint(uint i)
|
|
{
|
|
int j = i & 0x3;
|
|
int m = i >> 2;
|
|
int yval = offsets[m] + (int)countbits(((1u << j) - 1) & masks[m]);
|
|
float2 v = float2(i + 0.5, yval + 0.5) * 1.0/8.0;
|
|
if (flipX)
|
|
v.x = 1.0 - v.x;
|
|
if (flipAxis)
|
|
{
|
|
float2 tmp = v;
|
|
v.xy = tmp.yx;
|
|
}
|
|
return v;
|
|
}
|
|
|
|
// Creates a line area object, based on 2 points on an 8x8 quad
|
|
// quad coordinate domain is 0.0 -> 1.0 for both axis.
|
|
// Anything negative or greater than 1.0 is by definition outside of the 8x8 quad.
|
|
static LineArea Create(float2 v0, float2 v1)
|
|
{
|
|
LineArea data;
|
|
|
|
//line debug data
|
|
data.debugLine.Build(v0, v1);
|
|
|
|
AnalyticalLine l;
|
|
l.BuildFlipped(v0, v1, data.flipX, data.flipAxis, data.isRightHand, data.isValid);
|
|
|
|
// Xs values of 8 points
|
|
const float4 xs0 = float4(0.5,1.5,2.5,3.5)/8.0;
|
|
const float4 xs1 = float4(4.5,5.5,6.5,7.5)/8.0;
|
|
|
|
// Ys values of 8 points
|
|
float4 ys0 = l.Eval4(xs0);
|
|
float4 ys1 = l.Eval4(xs1);
|
|
|
|
int4 ysi0 = (int4)floor(ys0 * 8.0 - 0.5);
|
|
int4 ysi1 = (int4)floor(ys1 * 8.0 - 0.5);
|
|
|
|
// Incremental masks
|
|
uint4 dysmask0 = uint4(ysi0.yzw, ysi1.x) - ysi0.xyzw;
|
|
uint4 dysmask1 = uint4(ysi1.yzw, 0) - uint4(ysi1.xyz, 0);
|
|
|
|
// Final output, offset and mask
|
|
data.offsets[0] = ysi0.x;
|
|
data.masks[0] = dysmask0.x | (dysmask0.y << 1) | (dysmask0.z << 2) | (dysmask0.w << 3);
|
|
data.offsets[1] = countbits(data.masks[0]) + data.offsets[0];
|
|
data.masks[1] = dysmask1.x | (dysmask1.y << 1) | (dysmask1.z << 2) | (dysmask1.w << 3);
|
|
return data;
|
|
}
|
|
} ;
|
|
|
|
uint2 CreateCoverageMask(in LineArea lineArea)
|
|
{
|
|
const uint leftSideMask = 0x0F0F0F0F;
|
|
const uint2 horizontalMask = uint2(leftSideMask, ~leftSideMask);
|
|
|
|
//prepare samples, flip samples if there is mirroring in x
|
|
int2 ii = lineArea.flipX ? int2(1,0) : int2(0,1);
|
|
int lutOperation = ((uint)lineArea.flipX << 4) | ((uint)lineArea.flipAxis << 5);
|
|
int2 offsets = int2(lineArea.offsets[ii.x],lineArea.offsets[ii.y]);
|
|
uint2 halfSamples = uint2(gs_quadMask[lineArea.masks[ii.x] + lutOperation], gs_quadMask[lineArea.masks[ii.y] + lutOperation]);
|
|
|
|
uint2 result = 0;
|
|
if (lineArea.flipAxis)
|
|
{
|
|
//Case were we have flipped axis / transpose. We generate top and bottom part
|
|
int2 tOffsets = clamp(offsets, -31, 31);
|
|
uint2 workMask = leftSideMask << clamp(offsets, 0, 4);
|
|
uint2 topDownMasks = uint2( tOffsets.x > 0 ?
|
|
((halfSamples.x << min(4,tOffsets.x)) & leftSideMask) | ((halfSamples.x << min(8,tOffsets.x)) & ~leftSideMask)
|
|
: ((halfSamples.x << 4) >> min(4,-tOffsets.x) & ~leftSideMask) >> 4,
|
|
tOffsets.y > 0 ?
|
|
((halfSamples.y << min(4, tOffsets.y)) & leftSideMask) | ((halfSamples.y << min(8, tOffsets.y)) & ~leftSideMask)
|
|
: ((halfSamples.y << 4) >> min(4, -tOffsets.y) & ~leftSideMask) >> 4);
|
|
;
|
|
int2 backMaskShift = lineArea.flipX ? clamp(tOffsets + 4, -31, 31) : tOffsets;
|
|
uint2 backMaskOp = int2((backMaskShift.x > 0 ? 1u << backMaskShift.x : 1u >> -backMaskShift.x) - 1u, (backMaskShift.y > 0 ? 1u << backMaskShift.y : 1u >> -backMaskShift.y) - 1u);
|
|
uint2 backBite = uint2( backMaskShift.x <= 0 ? (lineArea.flipX ? ~0x0 : 0x0) : (lineArea.flipX ? (0xFF & ~backMaskOp.x) : (0xFFFF & backMaskOp.x)),
|
|
backMaskShift.y <= 0 ? (lineArea.flipX ? ~0x0 : 0x0) : (lineArea.flipX ? (0xFF & ~backMaskOp.y) : (0xFFFF & backMaskOp.y)));
|
|
result = backBite | (backBite << 8) | (backBite << 16) | (backBite << 24) | (topDownMasks & workMask);
|
|
}
|
|
else
|
|
{
|
|
//Case were the masks are positioned horizontally. We generate 4 quads
|
|
uint2 sideMasks = uint2(halfSamples.x, (halfSamples.y << 4));
|
|
int4 tOffsets = clamp((offsets.xyxy - int4(0,0,4,4)) << 3, -31, 31);
|
|
uint4 halfMasks = uint4( tOffsets.x > 0 ? (~sideMasks.x & horizontalMask.x) << tOffsets.x : ~(sideMasks.x >> -tOffsets.x),
|
|
tOffsets.y > 0 ? (~sideMasks.y & horizontalMask.y) << tOffsets.y : ~(sideMasks.y >> -tOffsets.y),
|
|
tOffsets.z > 0 ? (~sideMasks.x & horizontalMask.x) << tOffsets.z : ~(sideMasks.x >> -tOffsets.z),
|
|
tOffsets.w > 0 ? (~sideMasks.y & horizontalMask.y) << tOffsets.w : ~(sideMasks.y >> -tOffsets.w)) & horizontalMask.xyxy;
|
|
result = uint2(halfMasks.x | halfMasks.y, halfMasks.z | halfMasks.w);
|
|
}
|
|
|
|
result = lineArea.flipX ? ~result : result;
|
|
result = lineArea.isRightHand ? result : ~result;
|
|
result = lineArea.isValid ? result : 0;
|
|
return result;
|
|
|
|
}
|
|
|
|
uint2 TriangleCoverageMask(float2 v0, float2 v1, float2 v2, bool showFrontFace, bool showBackface)
|
|
{
|
|
uint2 mask0 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0, v1));
|
|
uint2 mask1 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1, v2));
|
|
uint2 mask2 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v2, v0));
|
|
uint2 frontMask = (mask0 & mask1 & mask2);
|
|
bool frontMaskValid = any(mask0 != 0) || any(mask1 != 0) || any(mask2 != 0);
|
|
return (showFrontFace * (mask0 & mask1 & mask2)) | ((frontMaskValid && showBackface) * (~mask0 & ~mask1 & ~mask2));
|
|
}
|
|
|
|
uint2 LineCoverageMask(float2 v0, float2 v1, float thickness, float caps)
|
|
{
|
|
float2 lineVector = normalize(v1 - v0);
|
|
float2 D = cross(float3(lineVector, 0.0),float3(0,0,1)).xy * thickness;
|
|
v0 -= caps * lineVector;
|
|
v1 += caps * lineVector;
|
|
|
|
uint2 mask0 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0 - D, v1 - D));
|
|
uint2 mask1 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1 + D, v0 + D));
|
|
uint2 mask2 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v0 + D, v0 - D));
|
|
uint2 mask3 = Coverage::CreateCoverageMask(Coverage::LineArea::Create(v1 - D, v1 + D));
|
|
return mask0 & mask1 & mask3 & mask2;
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|