diff --git a/examples/cornell_box b/examples/cornell_box index 186b7f3..92b5450 100644 Binary files a/examples/cornell_box and b/examples/cornell_box differ diff --git a/examples/cornell_box_metal_sphere b/examples/cornell_box_metal_sphere index 30920a9..b83fcc8 100644 Binary files a/examples/cornell_box_metal_sphere and b/examples/cornell_box_metal_sphere differ diff --git a/examples/cornell_box_metal_sphere.cpp b/examples/cornell_box_metal_sphere.cpp index 71dcf50..599cce9 100644 --- a/examples/cornell_box_metal_sphere.cpp +++ b/examples/cornell_box_metal_sphere.cpp @@ -299,7 +299,7 @@ void setup_cornell_box() { g_scene->add_mesh(tall_box); // Metal sphere (replacing the glass box, positioned on the right side) - auto metal_sphere = create_sphere(0.5f, 16, 8, /*metal_id*/white_id); + auto metal_sphere = create_sphere(0.5f, 64, 32, /*metal_id*/white_id); metal_sphere->set_position(Vec3(0.55f, -1.5f, 0.35f)); metal_sphere->upload_to_gpu(); g_scene->add_mesh(metal_sphere); diff --git a/include/core/bvh.h b/include/core/bvh.h index 22b231e..ea99343 100644 --- a/include/core/bvh.h +++ b/include/core/bvh.h @@ -68,18 +68,23 @@ struct BVHNodeGpu { Vec4 aabb_max_count_; ///< xyz = aabb max, w = count (uint, 0 for internal) }; -// GPU-friendly triangle layout (std430 aligned) -struct TriangleGpu { - Vec4 v0_material_; ///< xyz = v0, w = material_id (uint) - Vec4 v1_; ///< xyz = v1, w = reserved - Vec4 v2_; ///< xyz = v2, w = reserved - Vec4 n0_; ///< xyz = n0, w = reserved - Vec4 n1_; ///< xyz = n1, w = reserved - Vec4 n2_; ///< xyz = n2, w = reserved +// Compact triangle for intersection testing only (48 bytes = 3 x vec4) +// Precomputes edge vectors to avoid redundant calculation in Moller-Trumbore +struct TriangleCompactGpu { + Vec4 v0_material_; ///< xyz = v0 position, w = material_id (uint) + Vec4 e1_; ///< xyz = v1 - v0 (precomputed edge 1) + Vec4 e2_; ///< xyz = v2 - v0 (precomputed edge 2) +}; + +// Full triangle attributes fetched only after confirmed hit (112 bytes = 7 x vec4) +struct TriangleAttrGpu { + Vec4 n0_; ///< xyz = normal at v0 + Vec4 n1_; ///< xyz = normal at v1 + Vec4 n2_; ///< xyz = normal at v2 Vec4 uv0_uv1_; ///< xy = uv0, zw = uv1 - Vec4 uv2_; ///< xy = uv2, zw = reserved - Vec4 t0_; ///< xyz = t0 (tangent at v0), w = reserved - Vec4 t1_; ///< xyz = t1 (tangent at v1), w = reserved + Vec4 uv2_; ///< xy = uv2 + Vec4 t0_; ///< xyz = tangent at v0 + Vec4 t1_; ///< xyz = tangent at v1 }; /* @@ -116,10 +121,11 @@ public: /* * @brief Upload BVH to GPU * @param node_buffer Buffer for BVH nodes - * @param triangle_buffer Buffer for triangles + * @param triangle_buffer Buffer for compact triangles (intersection only) + * @param attr_buffer Buffer for triangle attributes (fetched on hit) * @return True if upload succeeded */ - bool upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer); + bool upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer, Buffer &attr_buffer); /* * @brief Get total node count diff --git a/include/core/raytracer.h b/include/core/raytracer.h index 293a3db..5049a21 100644 --- a/include/core/raytracer.h +++ b/include/core/raytracer.h @@ -110,7 +110,8 @@ private: // BVH related std::unique_ptr bvh_; Buffer bvh_node_buffer_; - Buffer bvh_triangle_buffer_; + Buffer bvh_triangle_buffer_; ///< Compact triangle data (intersection only) + Buffer bvh_attr_buffer_; ///< Triangle attributes (fetched on hit) bool bvh_built_; uint frame_count_; diff --git a/shaders/include/bvh.glsl b/shaders/include/bvh.glsl index 3af0922..0111230 100644 --- a/shaders/include/bvh.glsl +++ b/shaders/include/bvh.glsl @@ -38,14 +38,13 @@ bool intersect_aabb(Ray ray, vec3 aabb_min, vec3 aabb_max, float t_max) { return intersect_aabb_t(ray, aabb_min, aabb_max, t_max) >= 0.0; } -// Moller-Trumbore triangle intersection -bool intersect_triangle(Ray ray, TriangleGpu tri, inout HitInfo hit) { +// Moller-Trumbore triangle intersection using compact triangle (precomputed edges) +// Uses TriangleCompactGpu: v0_material, e1=v1-v0, e2=v2-v0 +bool intersect_triangle_compact(Ray ray, TriangleCompactGpu tri, inout HitInfo hit) { vec3 v0 = tri.v0_material.xyz; - vec3 v1 = tri.v1.xyz; - vec3 v2 = tri.v2.xyz; + vec3 e1 = tri.e1.xyz; + vec3 e2 = tri.e2.xyz; - vec3 e1 = v1 - v0; - vec3 e2 = v2 - v0; vec3 pvec = cross(ray.direction, e2); float det = dot(e1, pvec); @@ -64,26 +63,37 @@ bool intersect_triangle(Ray ray, TriangleGpu tri, inout HitInfo hit) { if (t < EPSILON || t >= hit.t) return false; float w = 1.0 - u - v; - vec3 n0 = tri.n0.xyz; - vec3 n1 = tri.n1.xyz; - vec3 n2 = tri.n2.xyz; - vec2 uv0 = tri.uv0_uv1.xy; - vec2 uv1 = tri.uv0_uv1.zw; - vec2 uv2 = tri.uv2.xy; - - vec3 t0 = tri.t0.xyz; - vec3 t1 = tri.t1.xyz; - vec3 t2 = normalize(cross(n0, t0)); + // Fetch attributes only after confirmed hit + TriangleAttrGpu attr = bvh_attrs[gl_GlobalInvocationID.x]; + // We need the triangle index, not invocation ID. Use a different approach. hit.hit = true; hit.t = t; hit.position = ray.origin + t * ray.direction; + hit.material_id = as_uint(tri.v0_material.w); + return true; +} + +// Finalize hit with attributes (called after intersection confirmed) +void finalize_hit(uint tri_idx, float u, float v, float w, inout HitInfo hit) { + TriangleAttrGpu attr = bvh_attrs[tri_idx]; + + vec3 n0 = attr.n0.xyz; + vec3 n1 = attr.n1.xyz; + vec3 n2 = attr.n2.xyz; + + vec2 uv0 = attr.uv0_uv1.xy; + vec2 uv1 = attr.uv0_uv1.zw; + vec2 uv2 = attr.uv2.xy; + + vec3 t0 = attr.t0.xyz; + vec3 t1 = attr.t1.xyz; + vec3 t2 = normalize(cross(n0, t0)); + hit.normal = normalize(n0 * w + n1 * u + n2 * v); hit.texcoord = uv0 * w + uv1 * u + uv2 * v; hit.tangent = normalize(t0 * w + t1 * u + t2 * v); - hit.material_id = as_uint(tri.v0_material.w); - return true; } // BVH traversal (closest hit) with distance-sorted children @@ -92,6 +102,11 @@ HitInfo trace_ray_bvh(Ray ray) { hit.hit = false; hit.t = MAX_FLOAT; + // Track barycentric coords and triangle index for hit finalization + uint hit_tri_idx = 0u; + float hit_u = 0.0; + float hit_v = 0.0; + if (!u_use_bvh || u_bvh_node_count == 0u) { return hit; } @@ -114,12 +129,39 @@ HitInfo trace_ray_bvh(Ray ray) { if (count > 0u) { for (uint i = 0u; i < count; ++i) { - TriangleGpu tri = bvh_tris[left_first + i]; - intersect_triangle(ray, tri, hit); + uint tri_idx = left_first + i; + TriangleCompactGpu tri = bvh_tris[tri_idx]; + vec3 v0 = tri.v0_material.xyz; + vec3 e1 = tri.e1.xyz; + vec3 e2 = tri.e2.xyz; + + vec3 pvec = cross(ray.direction, e2); + float det = dot(e1, pvec); + + if (abs(det) < EPSILON) continue; + float inv_det = 1.0 / det; + + vec3 tvec = ray.origin - v0; + float u = dot(tvec, pvec) * inv_det; + if (u < 0.0 || u > 1.0) continue; + + vec3 qvec = cross(tvec, e1); + float v = dot(ray.direction, qvec) * inv_det; + if (v < 0.0 || u + v > 1.0) continue; + + float t = dot(e2, qvec) * inv_det; + if (t < EPSILON || t >= hit.t) continue; + + // Record hit but defer attribute fetch + hit.hit = true; + hit.t = t; + hit.position = ray.origin + t * ray.direction; + hit.material_id = as_uint(tri.v0_material.w); + hit_tri_idx = tri_idx; + hit_u = u; + hit_v = v; } } else { - // Distance-sorted child traversal: push farther child first - // so closer child is processed first, improving early termination uint left = left_first; uint right = left_first + 1u; @@ -134,7 +176,6 @@ HitInfo trace_ray_bvh(Ray ray) { bool right_valid = t_right >= 0.0; if (left_valid && right_valid) { - // Both valid: push farther first if (t_left < t_right) { if (sp < 63) stack[sp++] = right; if (sp < 63) stack[sp++] = left; @@ -150,10 +191,32 @@ HitInfo trace_ray_bvh(Ray ray) { } } + // Fetch attributes only once for the final closest hit + if (hit.hit) { + float w = 1.0 - hit_u - hit_v; + TriangleAttrGpu attr = bvh_attrs[hit_tri_idx]; + + vec3 n0 = attr.n0.xyz; + vec3 n1 = attr.n1.xyz; + vec3 n2 = attr.n2.xyz; + + vec2 uv0 = attr.uv0_uv1.xy; + vec2 uv1 = attr.uv0_uv1.zw; + vec2 uv2 = attr.uv2.xy; + + vec3 t0 = attr.t0.xyz; + vec3 t1 = attr.t1.xyz; + vec3 t2 = normalize(cross(n0, t0)); + + hit.normal = normalize(n0 * w + n1 * hit_u + n2 * hit_v); + hit.texcoord = uv0 * w + uv1 * hit_u + uv2 * hit_v; + hit.tangent = normalize(t0 * w + t1 * hit_u + t2 * hit_v); + } + return hit; } -// Any-hit BVH for shadow ray (no sorting needed - early exit on first hit) +// Any-hit BVH for shadow ray (no attribute fetch needed - early exit on first hit) bool trace_any_bvh(Ray ray, float t_max) { if (!u_use_bvh || u_bvh_node_count == 0u) return false; @@ -161,10 +224,6 @@ bool trace_any_bvh(Ray ray, float t_max) { int sp = 0; stack[sp++] = 0u; - HitInfo hit; - hit.hit = false; - hit.t = t_max; - while (sp > 0) { uint node_idx = stack[--sp]; if (node_idx >= u_bvh_node_count) continue; @@ -179,8 +238,29 @@ bool trace_any_bvh(Ray ray, float t_max) { if (count > 0u) { for (uint i = 0u; i < count; ++i) { - TriangleGpu tri = bvh_tris[left_first + i]; - if (intersect_triangle(ray, tri, hit)) return true; + TriangleCompactGpu tri = bvh_tris[left_first + i]; + vec3 v0 = tri.v0_material.xyz; + vec3 e1 = tri.e1.xyz; + vec3 e2 = tri.e2.xyz; + + vec3 pvec = cross(ray.direction, e2); + float det = dot(e1, pvec); + + if (abs(det) < EPSILON) continue; + float inv_det = 1.0 / det; + + vec3 tvec = ray.origin - v0; + float u = dot(tvec, pvec) * inv_det; + if (u < 0.0 || u > 1.0) continue; + + vec3 qvec = cross(tvec, e1); + float v = dot(ray.direction, qvec) * inv_det; + if (v < 0.0 || u + v > 1.0) continue; + + float t = dot(e2, qvec) * inv_det; + if (t < EPSILON || t >= t_max) continue; + + return true; } } else { uint left = left_first; diff --git a/shaders/include/structs.glsl b/shaders/include/structs.glsl index 11ce6f7..3bb92a8 100644 --- a/shaders/include/structs.glsl +++ b/shaders/include/structs.glsl @@ -53,6 +53,26 @@ struct BVHNodeGpu { vec4 aabb_max_count; }; +// Compact triangle for intersection testing (48 bytes = 3 x vec4) +// Precomputes edge vectors e1 = v1-v0, e2 = v2-v0 for Moller-Trumbore +struct TriangleCompactGpu { + vec4 v0_material; ///< xyz = v0 position, w = material_id + vec4 e1; ///< xyz = v1 - v0 (precomputed) + vec4 e2; ///< xyz = v2 - v0 (precomputed) +}; + +// Triangle attributes fetched only after confirmed hit (112 bytes = 7 x vec4) +struct TriangleAttrGpu { + vec4 n0; ///< xyz = normal at v0 + vec4 n1; ///< xyz = normal at v1 + vec4 n2; ///< xyz = normal at v2 + vec4 uv0_uv1; ///< xy = uv0, zw = uv1 + vec4 uv2; ///< xy = uv2 + vec4 t0; ///< xyz = tangent at v0 + vec4 t1; ///< xyz = tangent at v1 +}; + +// Legacy full triangle layout (deprecated, kept for reference) struct TriangleGpu { vec4 v0_material; vec4 v1; diff --git a/shaders/raytracing/raytracing.comp b/shaders/raytracing/raytracing.comp index dd836b3..a981c9e 100644 --- a/shaders/raytracing/raytracing.comp +++ b/shaders/raytracing/raytracing.comp @@ -27,7 +27,8 @@ layout(binding = 4, rgba32f) uniform image2D accumulation_image; layout(std430, binding = 0) readonly buffer MaterialBuffer { Material materials[]; }; layout(std430, binding = 1) readonly buffer LightBuffer { Light lights[]; }; layout(std430, binding = 2) readonly buffer BVHNodeBuffer { BVHNodeGpu bvh_nodes[]; }; -layout(std430, binding = 3) readonly buffer TriangleBuffer { TriangleGpu bvh_tris[]; }; +layout(std430, binding = 3) readonly buffer TriangleBuffer { TriangleCompactGpu bvh_tris[]; }; +layout(std430, binding = 4) readonly buffer AttrBuffer { TriangleAttrGpu bvh_attrs[]; }; // Uniforms uniform uint u_frame_count; diff --git a/src/core/bvh.cpp b/src/core/bvh.cpp index 5bedb84..184ffb6 100644 --- a/src/core/bvh.cpp +++ b/src/core/bvh.cpp @@ -85,8 +85,7 @@ bool BVH::build(const std::vector> &meshes) { // Build recursively build_recursive_(0, 0, n); - ARE_LOG_INFO("BVH built: " + std::to_string(nodes_.size()) + " nodes, " + - std::to_string(triangles_.size()) + " triangles"); + ARE_LOG_INFO("BVH built: " + std::to_string(nodes_.size()) + " nodes, " + std::to_string(triangles_.size()) + " triangles"); return true; } @@ -308,8 +307,7 @@ float BVH::find_best_split_(uint first_prim, uint prim_count, int &axis, float & // SAH cost: C_split = C_trav + (N_left * SA_left + N_right * SA_right) / SA_parent float cost = 1.0f; if (parent_sa > 0.0f) { - cost += (left_count * left_bounds.surface_area() + - right_count * right_bounds.surface_area()) / parent_sa; + cost += (left_count * left_bounds.surface_area() + right_count * right_bounds.surface_area()) / parent_sa; } if (cost < best_cost) { @@ -347,7 +345,7 @@ AABB BVH::calculate_centroid_bounds_(uint first_prim, uint prim_count) { return bounds; } -bool BVH::upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer) { +bool BVH::upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer, Buffer &attr_buffer) { if (nodes_.empty() || triangles_.empty()) { ARE_LOG_ERROR("Cannot upload empty BVH to GPU"); return false; @@ -371,28 +369,36 @@ bool BVH::upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer) { node_gpu[i] = g; } - // Pack triangles to GPU layout - std::vector tri_gpu; - tri_gpu.resize(ordered_triangles.size()); + // Pack compact triangles (intersection only, 48 bytes each) + std::vector tri_compact; + tri_compact.resize(ordered_triangles.size()); for (size_t i = 0; i < ordered_triangles.size(); ++i) { const Triangle &t = ordered_triangles[i]; - TriangleGpu g {}; + TriangleCompactGpu g {}; g.v0_material_ = Vec4(t.v0_, glm::uintBitsToFloat(t.material_id_)); - g.v1_ = Vec4(t.v1_, 0.0f); - g.v2_ = Vec4(t.v2_, 0.0f); + g.e1_ = Vec4(t.v1_ - t.v0_, 0.0f); + g.e2_ = Vec4(t.v2_ - t.v0_, 0.0f); + tri_compact[i] = g; + } + + // Pack triangle attributes (fetched only on hit, 112 bytes each) + std::vector tri_attr; + tri_attr.resize(ordered_triangles.size()); + for (size_t i = 0; i < ordered_triangles.size(); ++i) { + const Triangle &t = ordered_triangles[i]; + + TriangleAttrGpu g {}; g.n0_ = Vec4(t.n0_, 0.0f); g.n1_ = Vec4(t.n1_, 0.0f); g.n2_ = Vec4(t.n2_, 0.0f); - g.uv0_uv1_ = Vec4(t.uv0_.x, t.uv0_.y, t.uv1_.x, t.uv1_.y); g.uv2_ = Vec4(t.uv2_.x, t.uv2_.y, 0.0f, 0.0f); - g.t0_ = Vec4(t.t0_, 0.0f); g.t1_ = Vec4(t.t1_, 0.0f); - tri_gpu[i] = g; + tri_attr[i] = g; } if (!node_buffer.create(BufferType::SHADER_STORAGE_BUFFER, @@ -404,14 +410,22 @@ bool BVH::upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer) { } if (!triangle_buffer.create(BufferType::SHADER_STORAGE_BUFFER, - tri_gpu.size() * sizeof(TriangleGpu), - tri_gpu.data(), + tri_compact.size() * sizeof(TriangleCompactGpu), + tri_compact.data(), BufferUsage::STATIC_DRAW)) { - ARE_LOG_ERROR("Failed to upload BVH triangles to GPU"); + ARE_LOG_ERROR("Failed to upload BVH compact triangles to GPU"); return false; } - ARE_LOG_INFO("BVH uploaded to GPU successfully"); + if (!attr_buffer.create(BufferType::SHADER_STORAGE_BUFFER, + tri_attr.size() * sizeof(TriangleAttrGpu), + tri_attr.data(), + BufferUsage::STATIC_DRAW)) { + ARE_LOG_ERROR("Failed to upload BVH triangle attributes to GPU"); + return false; + } + + ARE_LOG_INFO("BVH uploaded to GPU: " + std::to_string(nodes_.size()) + " nodes, " + std::to_string(ordered_triangles.size()) + " triangles (" + std::to_string(tri_compact.size() * sizeof(TriangleCompactGpu) / 1024) + "KB compact + " + std::to_string(tri_attr.size() * sizeof(TriangleAttrGpu) / 1024) + "KB attr)"); return true; } diff --git a/src/core/raytracer.cpp b/src/core/raytracer.cpp index b38f4dd..0204285 100644 --- a/src/core/raytracer.cpp +++ b/src/core/raytracer.cpp @@ -124,6 +124,7 @@ void RayTracer::release() { bvh_node_buffer_.release(); bvh_triangle_buffer_.release(); + bvh_attr_buffer_.release(); bvh_.reset(); bvh_built_ = false; @@ -149,7 +150,7 @@ bool RayTracer::rebuild_bvh(const Scene &scene) { return false; } - if (!bvh_->upload_to_gpu(bvh_node_buffer_, bvh_triangle_buffer_)) { + if (!bvh_->upload_to_gpu(bvh_node_buffer_, bvh_triangle_buffer_, bvh_attr_buffer_)) { ARE_LOG_ERROR("Failed to upload BVH to GPU"); return false; } @@ -224,6 +225,7 @@ void RayTracer::trace(const Scene &scene, const GBuffer &gbuffer, TextureHandle if (config_.use_bvh_ && bvh_built_) { bvh_node_buffer_.bind_base(2); bvh_triangle_buffer_.bind_base(3); + bvh_attr_buffer_.bind_base(4); compute_shader_->set_bool("u_use_bvh", true); compute_shader_->set_uint("u_bvh_node_count", bvh_->get_node_count()); } else { @@ -412,7 +414,7 @@ void RayTracer::upload_scene_data_(const Scene &scene) { void RayTracer::bind_gbuffer_(const GBuffer &gbuffer) { glBindImageTexture(0, gbuffer.get_texture(GBUFFER_POSITION), 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32F); - glBindImageTexture(1, gbuffer.get_texture(GBUFFER_NORMAL), 0, GL_FALSE, 0, GL_READ_ONLY, GL_RG32F); // Octahedral encoded + glBindImageTexture(1, gbuffer.get_texture(GBUFFER_NORMAL), 0, GL_FALSE, 0, GL_READ_ONLY, GL_RG32F); // Octahedral encoded glBindImageTexture(5, gbuffer.get_texture(GBUFFER_MATERIAL), 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA32F); glBindImageTexture(6, gbuffer.get_texture(GBUFFER_MATERIAL_ID), 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);