Skip to content

Commit

Permalink
Implement pre-vertex-shader backface culling on the GPU.
Browse files Browse the repository at this point in the history
Decreases frametime by ~16% on my hardware.
Not as much as I hoped, but still worth a lot.
  • Loading branch information
IntegratedQuantum committed Jun 6, 2024
1 parent 50076d1 commit c225efa
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 27 deletions.
2 changes: 1 addition & 1 deletion assets/cubyz/shaders/chunks/chunk_vertex.vs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ struct ChunkData {
int visibilityMask;
int voxelSize;
uint vertexStartOpaque;
uint vertexCountOpaque;
uint faceCountsByNormalOpaque[7];
uint vertexStartTransparent;
uint vertexCountTransparent;
};
Expand Down
68 changes: 52 additions & 16 deletions assets/cubyz/shaders/chunks/fillIndirectBuffer.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ struct ChunkData {
int visibilityMask;
int voxelSize;
uint vertexStartOpaque;
uint vertexCountOpaque;
uint faceCountsByNormalOpaque[7];
uint vertexStartTransparent;
uint vertexCountTransparent;
};
Expand Down Expand Up @@ -40,26 +40,62 @@ uniform uint chunkIDIndex;
uniform uint commandIndexStart;
uniform uint size;
uniform bool isTransparent;
uniform ivec3 playerPositionInteger;

bool isVisible(int dir, ivec3 relativePlayerPos, int voxelSize) {
switch(dir) {
case 0: // dirUp
return relativePlayerPos.z >= 0;
case 1: // dirDown
return relativePlayerPos.z < 32*voxelSize;
case 2: // dirPosX
return relativePlayerPos.x >= 0;
case 3: // dirNegX
return relativePlayerPos.x < 32*voxelSize;
case 4: // dirPosY
return relativePlayerPos.y >= 0;
case 5: // dirNegY
return relativePlayerPos.y < 32*voxelSize;
}
return true;
}

DrawElementsIndirectCommand addCommand(uint indices, uint vertexOffset, uint chunkID) {
return DrawElementsIndirectCommand(indices, 1, 0, int(vertexOffset), chunkID);
}

void main() {
uint chunkID = chunkIDs[chunkIDIndex + gl_GlobalInvocationID.x];
uint commandIndex = commandIndexStart + gl_GlobalInvocationID.x;
if(gl_GlobalInvocationID.x >= size) return;
if(isTransparent) {
commands[commandIndex] = DrawElementsIndirectCommand(
chunks[chunkID].vertexCountTransparent,
1,
0,
int(chunks[chunkID].vertexStartTransparent),
chunkID
);
uint commandIndex = commandIndexStart + gl_GlobalInvocationID.x;
commands[commandIndex] = addCommand(chunks[chunkID].vertexCountTransparent, chunks[chunkID].vertexStartTransparent, chunkID);
} else {
commands[commandIndex] = DrawElementsIndirectCommand(
chunks[chunkID].vertexCountOpaque,
1,
0,
int(chunks[chunkID].vertexStartOpaque),
chunkID
);
uint commandIndex = commandIndexStart + gl_GlobalInvocationID.x*4;
uint commandIndexEnd = commandIndex + 4;
uint groupFaceOffset = 0;
uint groupFaceCount = 0;
for(int i = 0; i < 7; i++) {
uint faceCount = chunks[chunkID].faceCountsByNormalOpaque[i];
if(isVisible(i, playerPositionInteger - chunks[chunkID].position.xyz, chunks[chunkID].voxelSize) || faceCount == 0) {
groupFaceCount += faceCount;
} else {
if(groupFaceCount != 0) {
commands[commandIndex] = addCommand(6*groupFaceCount, chunks[chunkID].vertexStartOpaque + 4*groupFaceOffset, chunkID);
commandIndex += 1;
groupFaceOffset += groupFaceCount;
groupFaceCount = 0;
}
groupFaceOffset += faceCount;
}
}
if(groupFaceCount != 0) {
commands[commandIndex] = addCommand(6*groupFaceCount, chunks[chunkID].vertexStartOpaque + 4*groupFaceOffset, chunkID);
commandIndex += 1;
}

for(; commandIndex < commandIndexEnd; commandIndex++) {
commands[commandIndex] = DrawElementsIndirectCommand(0, 0, 0, 0, 0);
}
}
}
2 changes: 1 addition & 1 deletion src/graphics.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2015,7 +2015,7 @@ pub fn generateBlockTexture(blockType: u16) Texture {
.visibilityMask = 255,
.voxelSize = 1,
.vertexStartOpaque = undefined,
.vertexCountOpaque = undefined,
.faceCountsByNormalOpaque = undefined,
.vertexStartTransparent = undefined,
.vertexCountTransparent = undefined,
}}, &chunkAllocation);
Expand Down
40 changes: 31 additions & 9 deletions src/renderer/chunk_meshing.zig
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ pub var commandUniforms: struct {
commandIndexStart: c_int,
size: c_int,
isTransparent: c_int,
playerPositionInteger: c_int,
} = undefined;
var vao: c_uint = undefined;
var vbo: c_uint = undefined;
Expand Down Expand Up @@ -158,16 +159,18 @@ pub fn bindTransparentShaderAndUniforms(projMatrix: Mat4f, ambient: Vec3f, playe
}

pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec3f, playerPos: Vec3d, transparent: bool) void {
const drawCallsEstimate: u31 = @intCast(if(transparent) chunkIDs.len else chunkIDs.len*4);
var chunkIDAllocation: main.graphics.SubAllocation = .{.start = 0, .len = 0};
chunkIDBuffer.uploadData(chunkIDs, &chunkIDAllocation);
defer chunkIDBuffer.free(chunkIDAllocation);
const allocation = commandBuffer.rawAlloc(@intCast(chunkIDs.len));
const allocation = commandBuffer.rawAlloc(drawCallsEstimate);
defer commandBuffer.free(allocation);
commandShader.bind();
c.glUniform1ui(commandUniforms.chunkIDIndex, chunkIDAllocation.start);
c.glUniform1ui(commandUniforms.commandIndexStart, allocation.start);
c.glUniform1ui(commandUniforms.size, @intCast(chunkIDs.len));
c.glUniform1i(commandUniforms.isTransparent, @intFromBool(transparent));
c.glUniform3i(commandUniforms.playerPositionInteger, @intFromFloat(playerPos[0]), @intFromFloat(playerPos[1]), @intFromFloat(playerPos[2]));
c.glDispatchCompute(@intCast(@divFloor(chunkIDs.len + 63, 64)), 1, 1); // TODO: Replace with @divCeil once available
c.glMemoryBarrier(c.GL_SHADER_STORAGE_BARRIER_BIT);

Expand All @@ -177,7 +180,7 @@ pub fn drawChunksIndirect(chunkIDs: []const u32, projMatrix: Mat4f, ambient: Vec
bindShaderAndUniforms(projMatrix, ambient, playerPos);
}
c.glBindBuffer(c.GL_DRAW_INDIRECT_BUFFER, commandBuffer.ssbo.bufferID);
c.glMultiDrawElementsIndirect(c.GL_TRIANGLES, c.GL_UNSIGNED_INT, @ptrFromInt(allocation.start*@sizeOf(IndirectData)), @intCast(chunkIDs.len), 0);
c.glMultiDrawElementsIndirect(c.GL_TRIANGLES, c.GL_UNSIGNED_INT, @ptrFromInt(allocation.start*@sizeOf(IndirectData)), drawCallsEstimate, 0);
}

pub const FaceData = extern struct {
Expand Down Expand Up @@ -208,7 +211,7 @@ pub const ChunkData = extern struct {
visibilityMask: i32,
voxelSize: i32,
vertexStartOpaque: u32,
vertexCountOpaque: u32,
faceCountsByNormalOpaque: [7]u32,
vertexStartTransparent: u32,
vertexCountTransparent: u32,
};
Expand All @@ -232,6 +235,7 @@ const PrimitiveMesh = struct {
mutex: std.Thread.Mutex = .{},
bufferAllocation: graphics.SubAllocation = .{.start = 0, .len = 0},
vertexCount: u31 = 0,
byNormalCount: [7]u32 = .{0} ** 7,
wasChanged: bool = false,

fn deinit(self: *PrimitiveMesh) void {
Expand Down Expand Up @@ -497,12 +501,30 @@ const PrimitiveMesh = struct {
}
const fullBuffer = faceBuffer.allocateAndMapRange(len, &self.bufferAllocation);
defer faceBuffer.unmapRange(fullBuffer);
@memcpy(fullBuffer[0..self.coreLen], self.completeList[0..self.coreLen]);
var i: usize = self.coreLen;
for(0..6) |n| {
@memcpy(fullBuffer[i..][0..list[n].len], list[n]);
i += list[n].len;
// Sort the faces by normal to allow for backface culling on the GPU:
var i: u32 = 0;
var iStart = i;
const coreList = self.completeList[0..self.coreLen];
for(0..7) |normal| {
for(coreList) |face| {
if(main.models.extraQuadInfos.items[face.blockAndQuad.quadIndex].alignedNormalDirection) |normalDir| {
if(normalDir == normal) {
fullBuffer[i] = face;
i += 1;
}
} else if(normal == 6) {
fullBuffer[i] = face;
i += 1;
}
}
if(normal < 6) {
@memcpy(fullBuffer[i..][0..list[normal ^ 1].len], list[normal ^ 1]);
i += @intCast(list[normal ^ 1].len);
}
self.byNormalCount[normal] = i - iStart;
iStart = i;
}
std.debug.assert(i == fullBuffer.len);
self.vertexCount = @intCast(6*fullBuffer.len);
self.wasChanged = true;
}
Expand Down Expand Up @@ -1102,7 +1124,7 @@ pub const ChunkMesh = struct {
.voxelSize = self.pos.voxelSize,
.visibilityMask = self.visibilityMask,
.vertexStartOpaque = self.opaqueMesh.bufferAllocation.start*4,
.vertexCountOpaque = self.opaqueMesh.vertexCount,
.faceCountsByNormalOpaque = self.opaqueMesh.byNormalCount,
.vertexStartTransparent = self.transparentMesh.bufferAllocation.start*4,
.vertexCountTransparent = self.transparentMesh.bufferAllocation.len*6,
}}, &self.chunkAllocation);
Expand Down

0 comments on commit c225efa

Please sign in to comment.