feat: 拆分三角形数据结构,添加三角形边预计算功能

- 拆分原TriangleGpu类为TriangleCompactGpu与TriangleAttrGpu两个类,Compact负责相交检测,仅检测到击中后再上传三角形详细数据,减少数据上传
- 在upload_to_gpu()中预计算三角形的e1&e2边
- 同步shader端逻辑
master
ternaryop8479 2026-04-06 22:58:13 +08:00
parent 09667267fe
commit 08910e48d7
10 changed files with 190 additions and 66 deletions

Binary file not shown.

Binary file not shown.

View File

@ -299,7 +299,7 @@ void setup_cornell_box() {
g_scene->add_mesh(tall_box);
// Metal sphere (replacing the glass box, positioned on the right side)
auto metal_sphere = create_sphere(0.5f, 16, 8, /*metal_id*/white_id);
auto metal_sphere = create_sphere(0.5f, 64, 32, /*metal_id*/white_id);
metal_sphere->set_position(Vec3(0.55f, -1.5f, 0.35f));
metal_sphere->upload_to_gpu();
g_scene->add_mesh(metal_sphere);

View File

@ -68,18 +68,23 @@ struct BVHNodeGpu {
Vec4 aabb_max_count_; ///< xyz = aabb max, w = count (uint, 0 for internal)
};
// GPU-friendly triangle layout (std430 aligned)
struct TriangleGpu {
Vec4 v0_material_; ///< xyz = v0, w = material_id (uint)
Vec4 v1_; ///< xyz = v1, w = reserved
Vec4 v2_; ///< xyz = v2, w = reserved
Vec4 n0_; ///< xyz = n0, w = reserved
Vec4 n1_; ///< xyz = n1, w = reserved
Vec4 n2_; ///< xyz = n2, w = reserved
// Compact triangle for intersection testing only (48 bytes = 3 x vec4)
// Precomputes edge vectors to avoid redundant calculation in Moller-Trumbore
struct TriangleCompactGpu {
Vec4 v0_material_; ///< xyz = v0 position, w = material_id (uint)
Vec4 e1_; ///< xyz = v1 - v0 (precomputed edge 1)
Vec4 e2_; ///< xyz = v2 - v0 (precomputed edge 2)
};
// Full triangle attributes fetched only after confirmed hit (112 bytes = 7 x vec4)
struct TriangleAttrGpu {
Vec4 n0_; ///< xyz = normal at v0
Vec4 n1_; ///< xyz = normal at v1
Vec4 n2_; ///< xyz = normal at v2
Vec4 uv0_uv1_; ///< xy = uv0, zw = uv1
Vec4 uv2_; ///< xy = uv2, zw = reserved
Vec4 t0_; ///< xyz = t0 (tangent at v0), w = reserved
Vec4 t1_; ///< xyz = t1 (tangent at v1), w = reserved
Vec4 uv2_; ///< xy = uv2
Vec4 t0_; ///< xyz = tangent at v0
Vec4 t1_; ///< xyz = tangent at v1
};
/*
@ -116,10 +121,11 @@ public:
/*
* @brief Upload BVH to GPU
* @param node_buffer Buffer for BVH nodes
* @param triangle_buffer Buffer for triangles
* @param triangle_buffer Buffer for compact triangles (intersection only)
* @param attr_buffer Buffer for triangle attributes (fetched on hit)
* @return True if upload succeeded
*/
bool upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer);
bool upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer, Buffer &attr_buffer);
/*
* @brief Get total node count

View File

@ -110,7 +110,8 @@ private:
// BVH related
std::unique_ptr<BVH> bvh_;
Buffer bvh_node_buffer_;
Buffer bvh_triangle_buffer_;
Buffer bvh_triangle_buffer_; ///< Compact triangle data (intersection only)
Buffer bvh_attr_buffer_; ///< Triangle attributes (fetched on hit)
bool bvh_built_;
uint frame_count_;

View File

@ -38,14 +38,13 @@ bool intersect_aabb(Ray ray, vec3 aabb_min, vec3 aabb_max, float t_max) {
return intersect_aabb_t(ray, aabb_min, aabb_max, t_max) >= 0.0;
}
// Moller-Trumbore triangle intersection
bool intersect_triangle(Ray ray, TriangleGpu tri, inout HitInfo hit) {
// Moller-Trumbore triangle intersection using compact triangle (precomputed edges)
// Uses TriangleCompactGpu: v0_material, e1=v1-v0, e2=v2-v0
bool intersect_triangle_compact(Ray ray, TriangleCompactGpu tri, inout HitInfo hit) {
vec3 v0 = tri.v0_material.xyz;
vec3 v1 = tri.v1.xyz;
vec3 v2 = tri.v2.xyz;
vec3 e1 = tri.e1.xyz;
vec3 e2 = tri.e2.xyz;
vec3 e1 = v1 - v0;
vec3 e2 = v2 - v0;
vec3 pvec = cross(ray.direction, e2);
float det = dot(e1, pvec);
@ -64,26 +63,37 @@ bool intersect_triangle(Ray ray, TriangleGpu tri, inout HitInfo hit) {
if (t < EPSILON || t >= hit.t) return false;
float w = 1.0 - u - v;
vec3 n0 = tri.n0.xyz;
vec3 n1 = tri.n1.xyz;
vec3 n2 = tri.n2.xyz;
vec2 uv0 = tri.uv0_uv1.xy;
vec2 uv1 = tri.uv0_uv1.zw;
vec2 uv2 = tri.uv2.xy;
vec3 t0 = tri.t0.xyz;
vec3 t1 = tri.t1.xyz;
vec3 t2 = normalize(cross(n0, t0));
// Fetch attributes only after confirmed hit
TriangleAttrGpu attr = bvh_attrs[gl_GlobalInvocationID.x];
// We need the triangle index, not invocation ID. Use a different approach.
hit.hit = true;
hit.t = t;
hit.position = ray.origin + t * ray.direction;
hit.material_id = as_uint(tri.v0_material.w);
return true;
}
// Finalize hit with attributes (called after intersection confirmed)
void finalize_hit(uint tri_idx, float u, float v, float w, inout HitInfo hit) {
TriangleAttrGpu attr = bvh_attrs[tri_idx];
vec3 n0 = attr.n0.xyz;
vec3 n1 = attr.n1.xyz;
vec3 n2 = attr.n2.xyz;
vec2 uv0 = attr.uv0_uv1.xy;
vec2 uv1 = attr.uv0_uv1.zw;
vec2 uv2 = attr.uv2.xy;
vec3 t0 = attr.t0.xyz;
vec3 t1 = attr.t1.xyz;
vec3 t2 = normalize(cross(n0, t0));
hit.normal = normalize(n0 * w + n1 * u + n2 * v);
hit.texcoord = uv0 * w + uv1 * u + uv2 * v;
hit.tangent = normalize(t0 * w + t1 * u + t2 * v);
hit.material_id = as_uint(tri.v0_material.w);
return true;
}
// BVH traversal (closest hit) with distance-sorted children
@ -92,6 +102,11 @@ HitInfo trace_ray_bvh(Ray ray) {
hit.hit = false;
hit.t = MAX_FLOAT;
// Track barycentric coords and triangle index for hit finalization
uint hit_tri_idx = 0u;
float hit_u = 0.0;
float hit_v = 0.0;
if (!u_use_bvh || u_bvh_node_count == 0u) {
return hit;
}
@ -114,12 +129,39 @@ HitInfo trace_ray_bvh(Ray ray) {
if (count > 0u) {
for (uint i = 0u; i < count; ++i) {
TriangleGpu tri = bvh_tris[left_first + i];
intersect_triangle(ray, tri, hit);
uint tri_idx = left_first + i;
TriangleCompactGpu tri = bvh_tris[tri_idx];
vec3 v0 = tri.v0_material.xyz;
vec3 e1 = tri.e1.xyz;
vec3 e2 = tri.e2.xyz;
vec3 pvec = cross(ray.direction, e2);
float det = dot(e1, pvec);
if (abs(det) < EPSILON) continue;
float inv_det = 1.0 / det;
vec3 tvec = ray.origin - v0;
float u = dot(tvec, pvec) * inv_det;
if (u < 0.0 || u > 1.0) continue;
vec3 qvec = cross(tvec, e1);
float v = dot(ray.direction, qvec) * inv_det;
if (v < 0.0 || u + v > 1.0) continue;
float t = dot(e2, qvec) * inv_det;
if (t < EPSILON || t >= hit.t) continue;
// Record hit but defer attribute fetch
hit.hit = true;
hit.t = t;
hit.position = ray.origin + t * ray.direction;
hit.material_id = as_uint(tri.v0_material.w);
hit_tri_idx = tri_idx;
hit_u = u;
hit_v = v;
}
} else {
// Distance-sorted child traversal: push farther child first
// so closer child is processed first, improving early termination
uint left = left_first;
uint right = left_first + 1u;
@ -134,7 +176,6 @@ HitInfo trace_ray_bvh(Ray ray) {
bool right_valid = t_right >= 0.0;
if (left_valid && right_valid) {
// Both valid: push farther first
if (t_left < t_right) {
if (sp < 63) stack[sp++] = right;
if (sp < 63) stack[sp++] = left;
@ -150,10 +191,32 @@ HitInfo trace_ray_bvh(Ray ray) {
}
}
// Fetch attributes only once for the final closest hit
if (hit.hit) {
float w = 1.0 - hit_u - hit_v;
TriangleAttrGpu attr = bvh_attrs[hit_tri_idx];
vec3 n0 = attr.n0.xyz;
vec3 n1 = attr.n1.xyz;
vec3 n2 = attr.n2.xyz;
vec2 uv0 = attr.uv0_uv1.xy;
vec2 uv1 = attr.uv0_uv1.zw;
vec2 uv2 = attr.uv2.xy;
vec3 t0 = attr.t0.xyz;
vec3 t1 = attr.t1.xyz;
vec3 t2 = normalize(cross(n0, t0));
hit.normal = normalize(n0 * w + n1 * hit_u + n2 * hit_v);
hit.texcoord = uv0 * w + uv1 * hit_u + uv2 * hit_v;
hit.tangent = normalize(t0 * w + t1 * hit_u + t2 * hit_v);
}
return hit;
}
// Any-hit BVH for shadow ray (no sorting needed - early exit on first hit)
// Any-hit BVH for shadow ray (no attribute fetch needed - early exit on first hit)
bool trace_any_bvh(Ray ray, float t_max) {
if (!u_use_bvh || u_bvh_node_count == 0u) return false;
@ -161,10 +224,6 @@ bool trace_any_bvh(Ray ray, float t_max) {
int sp = 0;
stack[sp++] = 0u;
HitInfo hit;
hit.hit = false;
hit.t = t_max;
while (sp > 0) {
uint node_idx = stack[--sp];
if (node_idx >= u_bvh_node_count) continue;
@ -179,8 +238,29 @@ bool trace_any_bvh(Ray ray, float t_max) {
if (count > 0u) {
for (uint i = 0u; i < count; ++i) {
TriangleGpu tri = bvh_tris[left_first + i];
if (intersect_triangle(ray, tri, hit)) return true;
TriangleCompactGpu tri = bvh_tris[left_first + i];
vec3 v0 = tri.v0_material.xyz;
vec3 e1 = tri.e1.xyz;
vec3 e2 = tri.e2.xyz;
vec3 pvec = cross(ray.direction, e2);
float det = dot(e1, pvec);
if (abs(det) < EPSILON) continue;
float inv_det = 1.0 / det;
vec3 tvec = ray.origin - v0;
float u = dot(tvec, pvec) * inv_det;
if (u < 0.0 || u > 1.0) continue;
vec3 qvec = cross(tvec, e1);
float v = dot(ray.direction, qvec) * inv_det;
if (v < 0.0 || u + v > 1.0) continue;
float t = dot(e2, qvec) * inv_det;
if (t < EPSILON || t >= t_max) continue;
return true;
}
} else {
uint left = left_first;

View File

@ -53,6 +53,26 @@ struct BVHNodeGpu {
vec4 aabb_max_count;
};
// Compact triangle for intersection testing (48 bytes = 3 x vec4)
// Precomputes edge vectors e1 = v1-v0, e2 = v2-v0 for Moller-Trumbore
struct TriangleCompactGpu {
vec4 v0_material; ///< xyz = v0 position, w = material_id
vec4 e1; ///< xyz = v1 - v0 (precomputed)
vec4 e2; ///< xyz = v2 - v0 (precomputed)
};
// Triangle attributes fetched only after confirmed hit (112 bytes = 7 x vec4)
struct TriangleAttrGpu {
vec4 n0; ///< xyz = normal at v0
vec4 n1; ///< xyz = normal at v1
vec4 n2; ///< xyz = normal at v2
vec4 uv0_uv1; ///< xy = uv0, zw = uv1
vec4 uv2; ///< xy = uv2
vec4 t0; ///< xyz = tangent at v0
vec4 t1; ///< xyz = tangent at v1
};
// Legacy full triangle layout (deprecated, kept for reference)
struct TriangleGpu {
vec4 v0_material;
vec4 v1;

View File

@ -27,7 +27,8 @@ layout(binding = 4, rgba32f) uniform image2D accumulation_image;
layout(std430, binding = 0) readonly buffer MaterialBuffer { Material materials[]; };
layout(std430, binding = 1) readonly buffer LightBuffer { Light lights[]; };
layout(std430, binding = 2) readonly buffer BVHNodeBuffer { BVHNodeGpu bvh_nodes[]; };
layout(std430, binding = 3) readonly buffer TriangleBuffer { TriangleGpu bvh_tris[]; };
layout(std430, binding = 3) readonly buffer TriangleBuffer { TriangleCompactGpu bvh_tris[]; };
layout(std430, binding = 4) readonly buffer AttrBuffer { TriangleAttrGpu bvh_attrs[]; };
// Uniforms
uniform uint u_frame_count;

View File

@ -85,8 +85,7 @@ bool BVH::build(const std::vector<std::shared_ptr<Mesh>> &meshes) {
// Build recursively
build_recursive_(0, 0, n);
ARE_LOG_INFO("BVH built: " + std::to_string(nodes_.size()) + " nodes, " +
std::to_string(triangles_.size()) + " triangles");
ARE_LOG_INFO("BVH built: " + std::to_string(nodes_.size()) + " nodes, " + std::to_string(triangles_.size()) + " triangles");
return true;
}
@ -308,8 +307,7 @@ float BVH::find_best_split_(uint first_prim, uint prim_count, int &axis, float &
// SAH cost: C_split = C_trav + (N_left * SA_left + N_right * SA_right) / SA_parent
float cost = 1.0f;
if (parent_sa > 0.0f) {
cost += (left_count * left_bounds.surface_area() +
right_count * right_bounds.surface_area()) / parent_sa;
cost += (left_count * left_bounds.surface_area() + right_count * right_bounds.surface_area()) / parent_sa;
}
if (cost < best_cost) {
@ -347,7 +345,7 @@ AABB BVH::calculate_centroid_bounds_(uint first_prim, uint prim_count) {
return bounds;
}
bool BVH::upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer) {
bool BVH::upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer, Buffer &attr_buffer) {
if (nodes_.empty() || triangles_.empty()) {
ARE_LOG_ERROR("Cannot upload empty BVH to GPU");
return false;
@ -371,28 +369,36 @@ bool BVH::upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer) {
node_gpu[i] = g;
}
// Pack triangles to GPU layout
std::vector<TriangleGpu> tri_gpu;
tri_gpu.resize(ordered_triangles.size());
// Pack compact triangles (intersection only, 48 bytes each)
std::vector<TriangleCompactGpu> tri_compact;
tri_compact.resize(ordered_triangles.size());
for (size_t i = 0; i < ordered_triangles.size(); ++i) {
const Triangle &t = ordered_triangles[i];
TriangleGpu g {};
TriangleCompactGpu g {};
g.v0_material_ = Vec4(t.v0_, glm::uintBitsToFloat(t.material_id_));
g.v1_ = Vec4(t.v1_, 0.0f);
g.v2_ = Vec4(t.v2_, 0.0f);
g.e1_ = Vec4(t.v1_ - t.v0_, 0.0f);
g.e2_ = Vec4(t.v2_ - t.v0_, 0.0f);
tri_compact[i] = g;
}
// Pack triangle attributes (fetched only on hit, 112 bytes each)
std::vector<TriangleAttrGpu> tri_attr;
tri_attr.resize(ordered_triangles.size());
for (size_t i = 0; i < ordered_triangles.size(); ++i) {
const Triangle &t = ordered_triangles[i];
TriangleAttrGpu g {};
g.n0_ = Vec4(t.n0_, 0.0f);
g.n1_ = Vec4(t.n1_, 0.0f);
g.n2_ = Vec4(t.n2_, 0.0f);
g.uv0_uv1_ = Vec4(t.uv0_.x, t.uv0_.y, t.uv1_.x, t.uv1_.y);
g.uv2_ = Vec4(t.uv2_.x, t.uv2_.y, 0.0f, 0.0f);
g.t0_ = Vec4(t.t0_, 0.0f);
g.t1_ = Vec4(t.t1_, 0.0f);
tri_gpu[i] = g;
tri_attr[i] = g;
}
if (!node_buffer.create(BufferType::SHADER_STORAGE_BUFFER,
@ -404,14 +410,22 @@ bool BVH::upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer) {
}
if (!triangle_buffer.create(BufferType::SHADER_STORAGE_BUFFER,
tri_gpu.size() * sizeof(TriangleGpu),
tri_gpu.data(),
tri_compact.size() * sizeof(TriangleCompactGpu),
tri_compact.data(),
BufferUsage::STATIC_DRAW)) {
ARE_LOG_ERROR("Failed to upload BVH triangles to GPU");
ARE_LOG_ERROR("Failed to upload BVH compact triangles to GPU");
return false;
}
ARE_LOG_INFO("BVH uploaded to GPU successfully");
if (!attr_buffer.create(BufferType::SHADER_STORAGE_BUFFER,
tri_attr.size() * sizeof(TriangleAttrGpu),
tri_attr.data(),
BufferUsage::STATIC_DRAW)) {
ARE_LOG_ERROR("Failed to upload BVH triangle attributes to GPU");
return false;
}
ARE_LOG_INFO("BVH uploaded to GPU: " + std::to_string(nodes_.size()) + " nodes, " + std::to_string(ordered_triangles.size()) + " triangles (" + std::to_string(tri_compact.size() * sizeof(TriangleCompactGpu) / 1024) + "KB compact + " + std::to_string(tri_attr.size() * sizeof(TriangleAttrGpu) / 1024) + "KB attr)");
return true;
}

View File

@ -124,6 +124,7 @@ void RayTracer::release() {
bvh_node_buffer_.release();
bvh_triangle_buffer_.release();
bvh_attr_buffer_.release();
bvh_.reset();
bvh_built_ = false;
@ -149,7 +150,7 @@ bool RayTracer::rebuild_bvh(const Scene &scene) {
return false;
}
if (!bvh_->upload_to_gpu(bvh_node_buffer_, bvh_triangle_buffer_)) {
if (!bvh_->upload_to_gpu(bvh_node_buffer_, bvh_triangle_buffer_, bvh_attr_buffer_)) {
ARE_LOG_ERROR("Failed to upload BVH to GPU");
return false;
}
@ -224,6 +225,7 @@ void RayTracer::trace(const Scene &scene, const GBuffer &gbuffer, TextureHandle
if (config_.use_bvh_ && bvh_built_) {
bvh_node_buffer_.bind_base(2);
bvh_triangle_buffer_.bind_base(3);
bvh_attr_buffer_.bind_base(4);
compute_shader_->set_bool("u_use_bvh", true);
compute_shader_->set_uint("u_bvh_node_count", bvh_->get_node_count());
} else {