feat: 实现LBVH算法

- feat: 使用基于Morton码排序&SAH的LBVH算法实现BVH构建 - feat: 实现BVH子节点按照距离排序功能 - chore: 删除冗余接口
2026-04-06 00:19:16 +08:00 · 2026-04-06 00:19:16 +08:00 · 09667267fe
parent 93125b2e0b
commit 09667267fe
4 changed files with 197 additions and 99 deletions
--- a/examples/cornell_box_metal_sphere
+++ b/examples/cornell_box_metal_sphere
--- a/include/core/bvh.h
+++ b/include/core/bvh.h
@ -53,17 +53,19 @@ struct Triangle {
 };
 // BVH node for GPU
 // Internal node: left_first_ = left child index, count_ = 0 (right child = left_first_ + 1)
 // Leaf node: left_first_ = triangle offset in sorted array, count_ = triangle count
 struct BVHNode {
 	Vec3 aabb_min_;
-	uint left_first_; // Left child index or first primitive index
+	uint left_first_; // Left child index (internal) or first primitive index (leaf)
 	Vec3 aabb_max_;
-	uint count_; // 0 for interior node, >0 for leaf node
+	uint count_; // 0 for internal node, >0 for leaf (triangle count)
 };
 // GPU-friendly BVH node layout (std430 aligned)
 struct BVHNodeGpu {
 	Vec4 aabb_min_left_first_; ///< xyz = aabb min, w = left_first (uint)
-	Vec4 aabb_max_count_; ///< xyz = aabb max, w = count (uint, 0 for interior)
+	Vec4 aabb_max_count_; ///< xyz = aabb max, w = count (uint, 0 for internal)
 };
 // GPU-friendly triangle layout (std430 aligned)
@ -80,7 +82,22 @@ struct TriangleGpu {
 	Vec4 t1_; ///< xyz = t1 (tangent at v1), w = reserved
 };
-// Bounding Volume Hierarchy for ray tracing acceleration
+/*
 * @brief Bounding Volume Hierarchy using top-down SAH construction
 *
 * Algorithm:
 * 1. Extract triangles from meshes and transform to world space
 * 2. Sort triangles by Morton code for spatial coherence
 * 3. Build BVH top-down using SAH (Surface Area Heuristic) with 16-bin evaluation
 * 4. Node layout ensures children are at consecutive indices for GPU efficiency
 *
 * Node layout (GPU-friendly):
 * - Internal nodes: left_first_ = left child index, right = left_first_ + 1
 * - Leaf nodes: left_first_ = triangle offset, count_ = triangle count
 *
 * Time complexity: O(n log n) average with SAH binning
 * Space complexity: O(n)
 */
 class BVH {
 public:
 	// Constructor
@ -126,39 +143,43 @@ public:
 private:
 	std::vector<BVHNode> nodes_;
 	std::vector<Triangle> triangles_;
-	std::vector<uint> triangle_indices_;
+	std::vector<uint> triangle_indices_; // Indirection array for partitioning
 	/*
-	 * @brief Recursively build BVH
+	 * @brief Extract triangles from meshes and transform to world space
-	 * @param node_idx Current node index
+	 */
-	 * @param first_prim First primitive index
+	void extract_triangles_(const std::vector<std::shared_ptr<Mesh>> &meshes);
-	 * @param prim_count Primitive count
+
 	/*
 	 * @brief Sort triangles by Morton code for spatial coherence
 	 */
 	void sort_triangles_by_morton_();
 	/*
 	 * @brief Recursively build BVH using SAH
 	 * @param node_idx Current node index to fill
 	 * @param first_prim First primitive index in triangle_indices_
 	 * @param prim_count Number of primitives
 	 */
 	void build_recursive_(uint node_idx, uint first_prim, uint prim_count);
 	/*
-	 * @brief Find best split using SAH
+	 * @brief Find best split using SAH with binning
 	 * @param first_prim First primitive index
 	 * @param prim_count Primitive count
-	 * @param axis Split axis (output)
+	 * @param axis Best split axis (output)
-	 * @param split_pos Split position (output)
+	 * @param split_pos Best split position (output)
-	 * @return Split cost
+	 * @return SAH cost of best split
 	 */
 	float find_best_split_(uint first_prim, uint prim_count, int &axis, float &split_pos);
 	/*
 	 * @brief Calculate node bounds
 	 * @param first_prim First primitive index
 	 * @param prim_count Primitive count
 	 * @return Bounding box
 	 */
 	AABB calculate_bounds_(uint first_prim, uint prim_count);
 	/*
 	 * @brief Calculate centroid bounds
 	 * @param first_prim First primitive index
 	 * @param prim_count Primitive count
 	 * @return Centroid bounding box
 	 */
 	AABB calculate_centroid_bounds_(uint first_prim, uint prim_count);
 };
--- a/shaders/include/bvh.glsl
+++ b/shaders/include/bvh.glsl
@ -15,8 +15,8 @@ vec3 oct_decode(vec2 f) {
    return normalize(n);
 }
-// Ray-AABB intersection
+// Ray-AABB intersection: returns t_enter if hit, -1.0 if miss
-bool intersect_aabb(Ray ray, vec3 aabb_min, vec3 aabb_max, float t_max) {
+float intersect_aabb_t(Ray ray, vec3 aabb_min, vec3 aabb_max, float t_max) {
    vec3 inv_d = 1.0 / ray.direction;
    vec3 t0 = (aabb_min - ray.origin) * inv_d;
    vec3 t1 = (aabb_max - ray.origin) * inv_d;
@ -27,7 +27,15 @@ bool intersect_aabb(Ray ray, vec3 aabb_min, vec3 aabb_max, float t_max) {
    float tmin = max(max(tmin3.x, tmin3.y), tmin3.z);
    float tmax2 = min(min(tmax3.x, tmax3.y), tmax3.z);
-    return (tmax2 >= max(tmin, 0.0)) && (tmin <= t_max);
+    if ((tmax2 >= max(tmin, 0.0)) && (tmin <= t_max)) {
        return max(tmin, 0.0);
    }
    return -1.0;
 }
 // Ray-AABB intersection (boolean version for shadow rays)
 bool intersect_aabb(Ray ray, vec3 aabb_min, vec3 aabb_max, float t_max) {
    return intersect_aabb_t(ray, aabb_min, aabb_max, t_max) >= 0.0;
 }
 // Moller-Trumbore triangle intersection
@ -78,7 +86,7 @@ bool intersect_triangle(Ray ray, TriangleGpu tri, inout HitInfo hit) {
    return true;
 }
-// BVH traversal (closest hit)
+// BVH traversal (closest hit) with distance-sorted children
 HitInfo trace_ray_bvh(Ray ray) {
    HitInfo hit;
    hit.hit = false;
@ -110,17 +118,42 @@ HitInfo trace_ray_bvh(Ray ray) {
                intersect_triangle(ray, tri, hit);
            }
        } else {
            // Distance-sorted child traversal: push farther child first
            // so closer child is processed first, improving early termination
            uint left = left_first;
            uint right = left_first + 1u;
            float t_left = intersect_aabb_t(ray,
                bvh_nodes[left].aabb_min_left_first.xyz,
                bvh_nodes[left].aabb_max_count.xyz, hit.t);
            float t_right = intersect_aabb_t(ray,
                bvh_nodes[right].aabb_min_left_first.xyz,
                bvh_nodes[right].aabb_max_count.xyz, hit.t);
            bool left_valid = t_left >= 0.0;
            bool right_valid = t_right >= 0.0;
            if (left_valid && right_valid) {
                // Both valid: push farther first
                if (t_left < t_right) {
                    if (sp < 63) stack[sp++] = right;
                    if (sp < 63) stack[sp++] = left;
                } else {
                    if (sp < 63) stack[sp++] = left;
                    if (sp < 63) stack[sp++] = right;
                }
            } else if (left_valid) {
                if (sp < 63) stack[sp++] = left;
            } else if (right_valid) {
                if (sp < 63) stack[sp++] = right;
            }
        }
    }
    return hit;
 }
-// Any-hit BVH for shadow ray
+// Any-hit BVH for shadow ray (no sorting needed - early exit on first hit)
 bool trace_any_bvh(Ray ray, float t_max) {
    if (!u_use_bvh || u_bvh_node_count == 0u) return false;
@ -142,7 +175,7 @@ bool trace_any_bvh(Ray ray, float t_max) {
        uint left_first = as_uint(node.aabb_min_left_first.w);
        uint count = as_uint(node.aabb_max_count.w);
-        if (!intersect_aabb(ray, bmin, bmax, hit.t)) continue;
+        if (!intersect_aabb(ray, bmin, bmax, t_max)) continue;
        if (count > 0u) {
            for (uint i = 0u; i < count; ++i) {
--- a/src/core/bvh.cpp
+++ b/src/core/bvh.cpp
@ -46,12 +46,52 @@ BVH::~BVH() {
 	clear();
 }
 void BVH::clear() {
 	nodes_.clear();
 	triangles_.clear();
 	triangle_indices_.clear();
 }
 bool BVH::build(const std::vector<std::shared_ptr<Mesh>> &meshes) {
 	clear();
 	ARE_LOG_INFO("Building BVH...");
-	// Extract all triangles from meshes
+	// Step 1: Extract triangles from meshes
 	extract_triangles_(meshes);
 	if (triangles_.empty()) {
 		ARE_LOG_WARN("No triangles to build BVH");
 		return false;
 	}
 	// Step 2: Sort triangles by Morton code for spatial coherence
 	sort_triangles_by_morton_();
 	// Step 3: Initialize triangle indices (identity mapping after Morton sort)
 	uint n = static_cast<uint>(triangles_.size());
 	triangle_indices_.resize(n);
 	for (uint i = 0; i < n; ++i) {
 		triangle_indices_[i] = i;
 	}
 	// Step 4: Build BVH top-down using SAH
 	// Reserve space: worst case 2n-1 nodes for binary tree with 1 tri per leaf
 	nodes_.reserve(2 * n - 1);
 	// Create root node
 	nodes_.emplace_back();
 	// Build recursively
 	build_recursive_(0, 0, n);
 	ARE_LOG_INFO("BVH built: " + std::to_string(nodes_.size()) + " nodes, " +
 		std::to_string(triangles_.size()) + " triangles");
 	return true;
 }
 void BVH::extract_triangles_(const std::vector<std::shared_ptr<Mesh>> &meshes) {
 	for (const auto &mesh : meshes) {
 		const auto &vertices = mesh->get_vertices();
 		const auto &indices = mesh->get_indices();
@ -61,7 +101,6 @@ bool BVH::build(const std::vector<std::shared_ptr<Mesh>> &meshes) {
 		for (size_t i = 0; i < indices.size(); i += 3) {
 			Triangle tri;
 			// Transform vertices
 			Vec4 v0 = transform * Vec4(vertices[indices[i]].position_, 1.0f);
 			Vec4 v1 = transform * Vec4(vertices[indices[i + 1]].position_, 1.0f);
 			Vec4 v2 = transform * Vec4(vertices[indices[i + 2]].position_, 1.0f);
@ -70,18 +109,15 @@ bool BVH::build(const std::vector<std::shared_ptr<Mesh>> &meshes) {
 			tri.v1_ = Vec3(v1) / v1.w;
 			tri.v2_ = Vec3(v2) / v2.w;
 			// Transform normals
 			Mat3 normal_matrix = glm::transpose(glm::inverse(Mat3(transform)));
 			tri.n0_ = glm::normalize(normal_matrix * vertices[indices[i]].normal_);
 			tri.n1_ = glm::normalize(normal_matrix * vertices[indices[i + 1]].normal_);
 			tri.n2_ = glm::normalize(normal_matrix * vertices[indices[i + 2]].normal_);
 			// Transform tangents
 			tri.t0_ = glm::normalize(normal_matrix * vertices[indices[i]].tangent_);
 			tri.t1_ = glm::normalize(normal_matrix * vertices[indices[i + 1]].tangent_);
 			tri.t2_ = glm::normalize(normal_matrix * vertices[indices[i + 2]].tangent_);
 			// Copy UVs
 			tri.uv0_ = vertices[indices[i]].texcoord_;
 			tri.uv1_ = vertices[indices[i + 1]].texcoord_;
 			tri.uv2_ = vertices[indices[i + 2]].texcoord_;
@ -91,30 +127,74 @@ bool BVH::build(const std::vector<std::shared_ptr<Mesh>> &meshes) {
 			triangles_.push_back(tri);
 		}
 	}
 	if (triangles_.empty()) {
 		ARE_LOG_WARN("No triangles to build BVH");
 		return false;
 }
-	// Initialize triangle indices
+// Morton code helper: interleave bits
-	triangle_indices_.resize(triangles_.size());
+static uint32_t part1by2(uint32_t x) {
 	x &= 0x000003ffu;
 	x = (x ^ (x << 16)) & 0xff0000ffu;
 	x = (x ^ (x << 8)) & 0x0300f00fu;
 	x = (x ^ (x << 4)) & 0x030c30c3u;
 	x = (x ^ (x << 2)) & 0x09249249u;
 	return x;
 }
 static uint32_t compute_morton_code(const Vec3 &p, const Vec3 &min, const Vec3 &max) {
 	Vec3 scale = Vec3(1023.0f) / (max - min + Vec3(1e-6f));
 	Vec3 v = (p - min) * scale;
 	uint32_t ix = glm::clamp(static_cast<int>(v.x), 0, 1023);
 	uint32_t iy = glm::clamp(static_cast<int>(v.y), 0, 1023);
 	uint32_t iz = glm::clamp(static_cast<int>(v.z), 0, 1023);
 	return (part1by2(iz) << 2) | (part1by2(iy) << 1) | part1by2(ix);
 }
 void BVH::sort_triangles_by_morton_() {
 	if (triangles_.empty())
 		return;
 	// Compute scene bounds
 	AABB scene_bounds;
 	for (const auto &tri : triangles_) {
 		scene_bounds.expand(tri.get_bounds());
 	}
 	// Expand bounds slightly
 	Vec3 padding = (scene_bounds.max_ - scene_bounds.min_) * 0.001f;
 	scene_bounds.min_ -= padding;
 	scene_bounds.max_ += padding;
 	// Compute Morton codes with indices
 	struct MortonEntry {
 		uint32_t code;
 		size_t original_index;
 	};
 	std::vector<MortonEntry> entries;
 	entries.reserve(triangles_.size());
 	for (size_t i = 0; i < triangles_.size(); ++i) {
-		triangle_indices_[i] = static_cast<uint>(i);
+		uint32_t code = compute_morton_code(triangles_[i].get_centroid(),
 			scene_bounds.min_, scene_bounds.max_);
 		entries.push_back({ code, i });
 	}
-	// Reserve space for nodes (estimate)
+	// Sort by Morton code
-	nodes_.reserve(triangles_.size() * 2);
+	std::sort(entries.begin(), entries.end(),
 		[](const MortonEntry &a, const MortonEntry &b) {
 			return a.code < b.code;
 		});
-	// Create root node
+	// Reorder triangles
-	nodes_.emplace_back();
+	std::vector<Triangle> sorted_triangles;
 	sorted_triangles.reserve(triangles_.size());
-	// Build BVH recursively
+	for (const auto &entry : entries) {
-	build_recursive_(0, 0, static_cast<uint>(triangles_.size()));
+		sorted_triangles.push_back(triangles_[entry.original_index]);
 	}
-	ARE_LOG_INFO("BVH built: " + std::to_string(nodes_.size()) + " nodes, " + std::to_string(triangles_.size()) + " triangles");
+	triangles_ = std::move(sorted_triangles);
 	return true;
 }
 void BVH::build_recursive_(uint node_idx, uint first_prim, uint prim_count) {
@ -125,28 +205,10 @@ void BVH::build_recursive_(uint node_idx, uint first_prim, uint prim_count) {
 	node.aabb_min_ = bounds.min_;
 	node.aabb_max_ = bounds.max_;
-	// Leaf node threshold
+	// Leaf node: 1 triangle per leaf for optimal GPU traversal
-	const uint LEAF_SIZE = 4;
+	if (prim_count <= 1) {
 	if (prim_count <= LEAF_SIZE) {
 		node.left_first_ = first_prim;
-		node.count_ = prim_count;
+		node.count_ = 1;
 		return;
 	}
 	// Calculate current depth
 	uint current_depth = 0;
 	uint idx = node_idx;
 	while (idx > 0) {
 		idx = (idx - 1) / 2;
 		current_depth++;
 	}
 	const uint MAX_DEPTH = 32;
 	// Force leaf if max depth reached
 	if (current_depth >= MAX_DEPTH) {
 		node.left_first_ = first_prim;
 		node.count_ = prim_count;
 		return;
 	}
@ -155,22 +217,9 @@ void BVH::build_recursive_(uint node_idx, uint first_prim, uint prim_count) {
 	float split_pos = 0.0f;
 	float split_cost = find_best_split_(first_prim, prim_count, axis, split_pos);
-	// SAH cost comparison (normalized)
+	// If SAH says no split is beneficial, force median split
-	// C_split = C_trav + (N_left * SA_left + N_right * SA_right) / SA_parent
+	// For GPU ray tracing, deeper trees with 1 tri per leaf are preferred
 	// C_leaf = N * C_int
 	// With C_trav = 1, C_int = 1:
 	// Split if C_split < C_leaf
 	// (Constants are used in find_best_split_ for cost calculation)
 	if (split_cost == std::numeric_limits<float>::max() || split_cost >= static_cast<float>(prim_count)) {
 		// SAH says no split is beneficial, but force split if too many prims
 		const uint MAX_PRIMS_PER_LEAF = 8;
 		if (prim_count <= MAX_PRIMS_PER_LEAF) {
 			node.left_first_ = first_prim;
 			node.count_ = prim_count;
 			return;
 		}
 		// Force median split as fallback
 		AABB cb = calculate_centroid_bounds_(first_prim, prim_count);
 		for (int a = 0; a < 3; ++a) {
 			float extent = cb.max_[a] - cb.min_[a];
@ -194,7 +243,7 @@ void BVH::build_recursive_(uint node_idx, uint first_prim, uint prim_count) {
 		}
 	}
-	// Ensure we have primitives on both sides
+	// Ensure split produces non-empty partitions
 	if (mid == first_prim || mid == first_prim + prim_count) {
 		mid = first_prim + prim_count / 2;
 	}
@ -203,10 +252,11 @@ void BVH::build_recursive_(uint node_idx, uint first_prim, uint prim_count) {
 	uint left_count = mid - first_prim;
 	uint right_count = prim_count - left_count;
 	// Store left child index (children will be at left_first_ and left_first_ + 1)
 	node.left_first_ = static_cast<uint>(nodes_.size());
-	node.count_ = 0;
+	node.count_ = 0; // Internal node
-	// Create child nodes
+	// Create child nodes (contiguous indices)
 	nodes_.emplace_back();
 	nodes_.emplace_back();
@ -217,7 +267,8 @@ void BVH::build_recursive_(uint node_idx, uint first_prim, uint prim_count) {
 float BVH::find_best_split_(uint first_prim, uint prim_count, int &axis, float &split_pos) {
 	float best_cost = std::numeric_limits<float>::max();
-	axis = 0, split_pos = 0.0f;
+	axis = 0;
 	split_pos = 0.0f;
 	AABB centroid_bounds = calculate_centroid_bounds_(first_prim, prim_count);
 	AABB parent_bounds = calculate_bounds_(first_prim, prim_count);
@ -229,13 +280,12 @@ float BVH::find_best_split_(uint first_prim, uint prim_count, int &axis, float &
 		if (extent < EPSILON)
 			continue;
-		// Try multiple split positions using 16 bins
+		// 16-bin SAH
 		const int NUM_BINS = 16;
 		for (int i = 1; i < NUM_BINS; ++i) {
 			float t = static_cast<float>(i) / NUM_BINS;
 			float pos = centroid_bounds.min_[a] + t * extent;
 			// Count primitives and calculate bounds for each side
 			AABB left_bounds, right_bounds;
 			uint left_count = 0, right_count = 0;
@ -252,13 +302,14 @@ float BVH::find_best_split_(uint first_prim, uint prim_count, int &axis, float &
 				}
 			}
 			// Calculate normalized SAH cost
 			if (left_count == 0 || right_count == 0)
 				continue;
-			float cost = 1.0f; // Traversal cost
+			// SAH cost: C_split = C_trav + (N_left * SA_left + N_right * SA_right) / SA_parent
 			float cost = 1.0f;
 			if (parent_sa > 0.0f) {
-				cost += (left_count * left_bounds.surface_area() + right_count * right_bounds.surface_area()) / parent_sa;
+				cost += (left_count * left_bounds.surface_area() +
 					right_count * right_bounds.surface_area()) / parent_sa;
 			}
 			if (cost < best_cost) {
@ -338,7 +389,6 @@ bool BVH::upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer) {
 		g.uv0_uv1_ = Vec4(t.uv0_.x, t.uv0_.y, t.uv1_.x, t.uv1_.y);
 		g.uv2_ = Vec4(t.uv2_.x, t.uv2_.y, 0.0f, 0.0f);
 		// Pack tangents
 		g.t0_ = Vec4(t.t0_, 0.0f);
 		g.t1_ = Vec4(t.t1_, 0.0f);
@ -365,10 +415,4 @@ bool BVH::upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer) {
 	return true;
 }
 void BVH::clear() {
 	nodes_.clear();
 	triangles_.clear();
 	triangle_indices_.clear();
 }
 } // namespace are