feat: 实现LBVH算法
- feat: 使用基于Morton码排序&SAH的LBVH算法实现BVH构建 - feat: 实现BVH子节点按照距离排序功能 - chore: 删除冗余接口master
parent
93125b2e0b
commit
09667267fe
Binary file not shown.
|
|
@ -53,17 +53,19 @@ struct Triangle {
|
||||||
};
|
};
|
||||||
|
|
||||||
// BVH node for GPU
|
// BVH node for GPU
|
||||||
|
// Internal node: left_first_ = left child index, count_ = 0 (right child = left_first_ + 1)
|
||||||
|
// Leaf node: left_first_ = triangle offset in sorted array, count_ = triangle count
|
||||||
struct BVHNode {
|
struct BVHNode {
|
||||||
Vec3 aabb_min_;
|
Vec3 aabb_min_;
|
||||||
uint left_first_; // Left child index or first primitive index
|
uint left_first_; // Left child index (internal) or first primitive index (leaf)
|
||||||
Vec3 aabb_max_;
|
Vec3 aabb_max_;
|
||||||
uint count_; // 0 for interior node, >0 for leaf node
|
uint count_; // 0 for internal node, >0 for leaf (triangle count)
|
||||||
};
|
};
|
||||||
|
|
||||||
// GPU-friendly BVH node layout (std430 aligned)
|
// GPU-friendly BVH node layout (std430 aligned)
|
||||||
struct BVHNodeGpu {
|
struct BVHNodeGpu {
|
||||||
Vec4 aabb_min_left_first_; ///< xyz = aabb min, w = left_first (uint)
|
Vec4 aabb_min_left_first_; ///< xyz = aabb min, w = left_first (uint)
|
||||||
Vec4 aabb_max_count_; ///< xyz = aabb max, w = count (uint, 0 for interior)
|
Vec4 aabb_max_count_; ///< xyz = aabb max, w = count (uint, 0 for internal)
|
||||||
};
|
};
|
||||||
|
|
||||||
// GPU-friendly triangle layout (std430 aligned)
|
// GPU-friendly triangle layout (std430 aligned)
|
||||||
|
|
@ -80,7 +82,22 @@ struct TriangleGpu {
|
||||||
Vec4 t1_; ///< xyz = t1 (tangent at v1), w = reserved
|
Vec4 t1_; ///< xyz = t1 (tangent at v1), w = reserved
|
||||||
};
|
};
|
||||||
|
|
||||||
// Bounding Volume Hierarchy for ray tracing acceleration
|
/*
|
||||||
|
* @brief Bounding Volume Hierarchy using top-down SAH construction
|
||||||
|
*
|
||||||
|
* Algorithm:
|
||||||
|
* 1. Extract triangles from meshes and transform to world space
|
||||||
|
* 2. Sort triangles by Morton code for spatial coherence
|
||||||
|
* 3. Build BVH top-down using SAH (Surface Area Heuristic) with 16-bin evaluation
|
||||||
|
* 4. Node layout ensures children are at consecutive indices for GPU efficiency
|
||||||
|
*
|
||||||
|
* Node layout (GPU-friendly):
|
||||||
|
* - Internal nodes: left_first_ = left child index, right = left_first_ + 1
|
||||||
|
* - Leaf nodes: left_first_ = triangle offset, count_ = triangle count
|
||||||
|
*
|
||||||
|
* Time complexity: O(n log n) average with SAH binning
|
||||||
|
* Space complexity: O(n)
|
||||||
|
*/
|
||||||
class BVH {
|
class BVH {
|
||||||
public:
|
public:
|
||||||
// Constructor
|
// Constructor
|
||||||
|
|
@ -126,39 +143,43 @@ public:
|
||||||
private:
|
private:
|
||||||
std::vector<BVHNode> nodes_;
|
std::vector<BVHNode> nodes_;
|
||||||
std::vector<Triangle> triangles_;
|
std::vector<Triangle> triangles_;
|
||||||
std::vector<uint> triangle_indices_;
|
std::vector<uint> triangle_indices_; // Indirection array for partitioning
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* @brief Recursively build BVH
|
* @brief Extract triangles from meshes and transform to world space
|
||||||
* @param node_idx Current node index
|
*/
|
||||||
* @param first_prim First primitive index
|
void extract_triangles_(const std::vector<std::shared_ptr<Mesh>> &meshes);
|
||||||
* @param prim_count Primitive count
|
|
||||||
|
/*
|
||||||
|
* @brief Sort triangles by Morton code for spatial coherence
|
||||||
|
*/
|
||||||
|
void sort_triangles_by_morton_();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @brief Recursively build BVH using SAH
|
||||||
|
* @param node_idx Current node index to fill
|
||||||
|
* @param first_prim First primitive index in triangle_indices_
|
||||||
|
* @param prim_count Number of primitives
|
||||||
*/
|
*/
|
||||||
void build_recursive_(uint node_idx, uint first_prim, uint prim_count);
|
void build_recursive_(uint node_idx, uint first_prim, uint prim_count);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* @brief Find best split using SAH
|
* @brief Find best split using SAH with binning
|
||||||
* @param first_prim First primitive index
|
* @param first_prim First primitive index
|
||||||
* @param prim_count Primitive count
|
* @param prim_count Primitive count
|
||||||
* @param axis Split axis (output)
|
* @param axis Best split axis (output)
|
||||||
* @param split_pos Split position (output)
|
* @param split_pos Best split position (output)
|
||||||
* @return Split cost
|
* @return SAH cost of best split
|
||||||
*/
|
*/
|
||||||
float find_best_split_(uint first_prim, uint prim_count, int &axis, float &split_pos);
|
float find_best_split_(uint first_prim, uint prim_count, int &axis, float &split_pos);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* @brief Calculate node bounds
|
* @brief Calculate node bounds
|
||||||
* @param first_prim First primitive index
|
|
||||||
* @param prim_count Primitive count
|
|
||||||
* @return Bounding box
|
|
||||||
*/
|
*/
|
||||||
AABB calculate_bounds_(uint first_prim, uint prim_count);
|
AABB calculate_bounds_(uint first_prim, uint prim_count);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* @brief Calculate centroid bounds
|
* @brief Calculate centroid bounds
|
||||||
* @param first_prim First primitive index
|
|
||||||
* @param prim_count Primitive count
|
|
||||||
* @return Centroid bounding box
|
|
||||||
*/
|
*/
|
||||||
AABB calculate_centroid_bounds_(uint first_prim, uint prim_count);
|
AABB calculate_centroid_bounds_(uint first_prim, uint prim_count);
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -15,8 +15,8 @@ vec3 oct_decode(vec2 f) {
|
||||||
return normalize(n);
|
return normalize(n);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ray-AABB intersection
|
// Ray-AABB intersection: returns t_enter if hit, -1.0 if miss
|
||||||
bool intersect_aabb(Ray ray, vec3 aabb_min, vec3 aabb_max, float t_max) {
|
float intersect_aabb_t(Ray ray, vec3 aabb_min, vec3 aabb_max, float t_max) {
|
||||||
vec3 inv_d = 1.0 / ray.direction;
|
vec3 inv_d = 1.0 / ray.direction;
|
||||||
vec3 t0 = (aabb_min - ray.origin) * inv_d;
|
vec3 t0 = (aabb_min - ray.origin) * inv_d;
|
||||||
vec3 t1 = (aabb_max - ray.origin) * inv_d;
|
vec3 t1 = (aabb_max - ray.origin) * inv_d;
|
||||||
|
|
@ -27,7 +27,15 @@ bool intersect_aabb(Ray ray, vec3 aabb_min, vec3 aabb_max, float t_max) {
|
||||||
float tmin = max(max(tmin3.x, tmin3.y), tmin3.z);
|
float tmin = max(max(tmin3.x, tmin3.y), tmin3.z);
|
||||||
float tmax2 = min(min(tmax3.x, tmax3.y), tmax3.z);
|
float tmax2 = min(min(tmax3.x, tmax3.y), tmax3.z);
|
||||||
|
|
||||||
return (tmax2 >= max(tmin, 0.0)) && (tmin <= t_max);
|
if ((tmax2 >= max(tmin, 0.0)) && (tmin <= t_max)) {
|
||||||
|
return max(tmin, 0.0);
|
||||||
|
}
|
||||||
|
return -1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ray-AABB intersection (boolean version for shadow rays)
|
||||||
|
bool intersect_aabb(Ray ray, vec3 aabb_min, vec3 aabb_max, float t_max) {
|
||||||
|
return intersect_aabb_t(ray, aabb_min, aabb_max, t_max) >= 0.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Moller-Trumbore triangle intersection
|
// Moller-Trumbore triangle intersection
|
||||||
|
|
@ -78,7 +86,7 @@ bool intersect_triangle(Ray ray, TriangleGpu tri, inout HitInfo hit) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// BVH traversal (closest hit)
|
// BVH traversal (closest hit) with distance-sorted children
|
||||||
HitInfo trace_ray_bvh(Ray ray) {
|
HitInfo trace_ray_bvh(Ray ray) {
|
||||||
HitInfo hit;
|
HitInfo hit;
|
||||||
hit.hit = false;
|
hit.hit = false;
|
||||||
|
|
@ -110,17 +118,42 @@ HitInfo trace_ray_bvh(Ray ray) {
|
||||||
intersect_triangle(ray, tri, hit);
|
intersect_triangle(ray, tri, hit);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
// Distance-sorted child traversal: push farther child first
|
||||||
|
// so closer child is processed first, improving early termination
|
||||||
uint left = left_first;
|
uint left = left_first;
|
||||||
uint right = left_first + 1u;
|
uint right = left_first + 1u;
|
||||||
|
|
||||||
|
float t_left = intersect_aabb_t(ray,
|
||||||
|
bvh_nodes[left].aabb_min_left_first.xyz,
|
||||||
|
bvh_nodes[left].aabb_max_count.xyz, hit.t);
|
||||||
|
float t_right = intersect_aabb_t(ray,
|
||||||
|
bvh_nodes[right].aabb_min_left_first.xyz,
|
||||||
|
bvh_nodes[right].aabb_max_count.xyz, hit.t);
|
||||||
|
|
||||||
|
bool left_valid = t_left >= 0.0;
|
||||||
|
bool right_valid = t_right >= 0.0;
|
||||||
|
|
||||||
|
if (left_valid && right_valid) {
|
||||||
|
// Both valid: push farther first
|
||||||
|
if (t_left < t_right) {
|
||||||
if (sp < 63) stack[sp++] = right;
|
if (sp < 63) stack[sp++] = right;
|
||||||
if (sp < 63) stack[sp++] = left;
|
if (sp < 63) stack[sp++] = left;
|
||||||
|
} else {
|
||||||
|
if (sp < 63) stack[sp++] = left;
|
||||||
|
if (sp < 63) stack[sp++] = right;
|
||||||
|
}
|
||||||
|
} else if (left_valid) {
|
||||||
|
if (sp < 63) stack[sp++] = left;
|
||||||
|
} else if (right_valid) {
|
||||||
|
if (sp < 63) stack[sp++] = right;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return hit;
|
return hit;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Any-hit BVH for shadow ray
|
// Any-hit BVH for shadow ray (no sorting needed - early exit on first hit)
|
||||||
bool trace_any_bvh(Ray ray, float t_max) {
|
bool trace_any_bvh(Ray ray, float t_max) {
|
||||||
if (!u_use_bvh || u_bvh_node_count == 0u) return false;
|
if (!u_use_bvh || u_bvh_node_count == 0u) return false;
|
||||||
|
|
||||||
|
|
@ -142,7 +175,7 @@ bool trace_any_bvh(Ray ray, float t_max) {
|
||||||
uint left_first = as_uint(node.aabb_min_left_first.w);
|
uint left_first = as_uint(node.aabb_min_left_first.w);
|
||||||
uint count = as_uint(node.aabb_max_count.w);
|
uint count = as_uint(node.aabb_max_count.w);
|
||||||
|
|
||||||
if (!intersect_aabb(ray, bmin, bmax, hit.t)) continue;
|
if (!intersect_aabb(ray, bmin, bmax, t_max)) continue;
|
||||||
|
|
||||||
if (count > 0u) {
|
if (count > 0u) {
|
||||||
for (uint i = 0u; i < count; ++i) {
|
for (uint i = 0u; i < count; ++i) {
|
||||||
|
|
|
||||||
190
src/core/bvh.cpp
190
src/core/bvh.cpp
|
|
@ -46,12 +46,52 @@ BVH::~BVH() {
|
||||||
clear();
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void BVH::clear() {
|
||||||
|
nodes_.clear();
|
||||||
|
triangles_.clear();
|
||||||
|
triangle_indices_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
bool BVH::build(const std::vector<std::shared_ptr<Mesh>> &meshes) {
|
bool BVH::build(const std::vector<std::shared_ptr<Mesh>> &meshes) {
|
||||||
clear();
|
clear();
|
||||||
|
|
||||||
ARE_LOG_INFO("Building BVH...");
|
ARE_LOG_INFO("Building BVH...");
|
||||||
|
|
||||||
// Extract all triangles from meshes
|
// Step 1: Extract triangles from meshes
|
||||||
|
extract_triangles_(meshes);
|
||||||
|
|
||||||
|
if (triangles_.empty()) {
|
||||||
|
ARE_LOG_WARN("No triangles to build BVH");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 2: Sort triangles by Morton code for spatial coherence
|
||||||
|
sort_triangles_by_morton_();
|
||||||
|
|
||||||
|
// Step 3: Initialize triangle indices (identity mapping after Morton sort)
|
||||||
|
uint n = static_cast<uint>(triangles_.size());
|
||||||
|
triangle_indices_.resize(n);
|
||||||
|
for (uint i = 0; i < n; ++i) {
|
||||||
|
triangle_indices_[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 4: Build BVH top-down using SAH
|
||||||
|
// Reserve space: worst case 2n-1 nodes for binary tree with 1 tri per leaf
|
||||||
|
nodes_.reserve(2 * n - 1);
|
||||||
|
|
||||||
|
// Create root node
|
||||||
|
nodes_.emplace_back();
|
||||||
|
|
||||||
|
// Build recursively
|
||||||
|
build_recursive_(0, 0, n);
|
||||||
|
|
||||||
|
ARE_LOG_INFO("BVH built: " + std::to_string(nodes_.size()) + " nodes, " +
|
||||||
|
std::to_string(triangles_.size()) + " triangles");
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BVH::extract_triangles_(const std::vector<std::shared_ptr<Mesh>> &meshes) {
|
||||||
for (const auto &mesh : meshes) {
|
for (const auto &mesh : meshes) {
|
||||||
const auto &vertices = mesh->get_vertices();
|
const auto &vertices = mesh->get_vertices();
|
||||||
const auto &indices = mesh->get_indices();
|
const auto &indices = mesh->get_indices();
|
||||||
|
|
@ -61,7 +101,6 @@ bool BVH::build(const std::vector<std::shared_ptr<Mesh>> &meshes) {
|
||||||
for (size_t i = 0; i < indices.size(); i += 3) {
|
for (size_t i = 0; i < indices.size(); i += 3) {
|
||||||
Triangle tri;
|
Triangle tri;
|
||||||
|
|
||||||
// Transform vertices
|
|
||||||
Vec4 v0 = transform * Vec4(vertices[indices[i]].position_, 1.0f);
|
Vec4 v0 = transform * Vec4(vertices[indices[i]].position_, 1.0f);
|
||||||
Vec4 v1 = transform * Vec4(vertices[indices[i + 1]].position_, 1.0f);
|
Vec4 v1 = transform * Vec4(vertices[indices[i + 1]].position_, 1.0f);
|
||||||
Vec4 v2 = transform * Vec4(vertices[indices[i + 2]].position_, 1.0f);
|
Vec4 v2 = transform * Vec4(vertices[indices[i + 2]].position_, 1.0f);
|
||||||
|
|
@ -70,18 +109,15 @@ bool BVH::build(const std::vector<std::shared_ptr<Mesh>> &meshes) {
|
||||||
tri.v1_ = Vec3(v1) / v1.w;
|
tri.v1_ = Vec3(v1) / v1.w;
|
||||||
tri.v2_ = Vec3(v2) / v2.w;
|
tri.v2_ = Vec3(v2) / v2.w;
|
||||||
|
|
||||||
// Transform normals
|
|
||||||
Mat3 normal_matrix = glm::transpose(glm::inverse(Mat3(transform)));
|
Mat3 normal_matrix = glm::transpose(glm::inverse(Mat3(transform)));
|
||||||
tri.n0_ = glm::normalize(normal_matrix * vertices[indices[i]].normal_);
|
tri.n0_ = glm::normalize(normal_matrix * vertices[indices[i]].normal_);
|
||||||
tri.n1_ = glm::normalize(normal_matrix * vertices[indices[i + 1]].normal_);
|
tri.n1_ = glm::normalize(normal_matrix * vertices[indices[i + 1]].normal_);
|
||||||
tri.n2_ = glm::normalize(normal_matrix * vertices[indices[i + 2]].normal_);
|
tri.n2_ = glm::normalize(normal_matrix * vertices[indices[i + 2]].normal_);
|
||||||
|
|
||||||
// Transform tangents
|
|
||||||
tri.t0_ = glm::normalize(normal_matrix * vertices[indices[i]].tangent_);
|
tri.t0_ = glm::normalize(normal_matrix * vertices[indices[i]].tangent_);
|
||||||
tri.t1_ = glm::normalize(normal_matrix * vertices[indices[i + 1]].tangent_);
|
tri.t1_ = glm::normalize(normal_matrix * vertices[indices[i + 1]].tangent_);
|
||||||
tri.t2_ = glm::normalize(normal_matrix * vertices[indices[i + 2]].tangent_);
|
tri.t2_ = glm::normalize(normal_matrix * vertices[indices[i + 2]].tangent_);
|
||||||
|
|
||||||
// Copy UVs
|
|
||||||
tri.uv0_ = vertices[indices[i]].texcoord_;
|
tri.uv0_ = vertices[indices[i]].texcoord_;
|
||||||
tri.uv1_ = vertices[indices[i + 1]].texcoord_;
|
tri.uv1_ = vertices[indices[i + 1]].texcoord_;
|
||||||
tri.uv2_ = vertices[indices[i + 2]].texcoord_;
|
tri.uv2_ = vertices[indices[i + 2]].texcoord_;
|
||||||
|
|
@ -91,30 +127,74 @@ bool BVH::build(const std::vector<std::shared_ptr<Mesh>> &meshes) {
|
||||||
triangles_.push_back(tri);
|
triangles_.push_back(tri);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (triangles_.empty()) {
|
|
||||||
ARE_LOG_WARN("No triangles to build BVH");
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize triangle indices
|
// Morton code helper: interleave bits
|
||||||
triangle_indices_.resize(triangles_.size());
|
static uint32_t part1by2(uint32_t x) {
|
||||||
|
x &= 0x000003ffu;
|
||||||
|
x = (x ^ (x << 16)) & 0xff0000ffu;
|
||||||
|
x = (x ^ (x << 8)) & 0x0300f00fu;
|
||||||
|
x = (x ^ (x << 4)) & 0x030c30c3u;
|
||||||
|
x = (x ^ (x << 2)) & 0x09249249u;
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t compute_morton_code(const Vec3 &p, const Vec3 &min, const Vec3 &max) {
|
||||||
|
Vec3 scale = Vec3(1023.0f) / (max - min + Vec3(1e-6f));
|
||||||
|
Vec3 v = (p - min) * scale;
|
||||||
|
|
||||||
|
uint32_t ix = glm::clamp(static_cast<int>(v.x), 0, 1023);
|
||||||
|
uint32_t iy = glm::clamp(static_cast<int>(v.y), 0, 1023);
|
||||||
|
uint32_t iz = glm::clamp(static_cast<int>(v.z), 0, 1023);
|
||||||
|
|
||||||
|
return (part1by2(iz) << 2) | (part1by2(iy) << 1) | part1by2(ix);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BVH::sort_triangles_by_morton_() {
|
||||||
|
if (triangles_.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Compute scene bounds
|
||||||
|
AABB scene_bounds;
|
||||||
|
for (const auto &tri : triangles_) {
|
||||||
|
scene_bounds.expand(tri.get_bounds());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expand bounds slightly
|
||||||
|
Vec3 padding = (scene_bounds.max_ - scene_bounds.min_) * 0.001f;
|
||||||
|
scene_bounds.min_ -= padding;
|
||||||
|
scene_bounds.max_ += padding;
|
||||||
|
|
||||||
|
// Compute Morton codes with indices
|
||||||
|
struct MortonEntry {
|
||||||
|
uint32_t code;
|
||||||
|
size_t original_index;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<MortonEntry> entries;
|
||||||
|
entries.reserve(triangles_.size());
|
||||||
|
|
||||||
for (size_t i = 0; i < triangles_.size(); ++i) {
|
for (size_t i = 0; i < triangles_.size(); ++i) {
|
||||||
triangle_indices_[i] = static_cast<uint>(i);
|
uint32_t code = compute_morton_code(triangles_[i].get_centroid(),
|
||||||
|
scene_bounds.min_, scene_bounds.max_);
|
||||||
|
entries.push_back({ code, i });
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reserve space for nodes (estimate)
|
// Sort by Morton code
|
||||||
nodes_.reserve(triangles_.size() * 2);
|
std::sort(entries.begin(), entries.end(),
|
||||||
|
[](const MortonEntry &a, const MortonEntry &b) {
|
||||||
|
return a.code < b.code;
|
||||||
|
});
|
||||||
|
|
||||||
// Create root node
|
// Reorder triangles
|
||||||
nodes_.emplace_back();
|
std::vector<Triangle> sorted_triangles;
|
||||||
|
sorted_triangles.reserve(triangles_.size());
|
||||||
|
|
||||||
// Build BVH recursively
|
for (const auto &entry : entries) {
|
||||||
build_recursive_(0, 0, static_cast<uint>(triangles_.size()));
|
sorted_triangles.push_back(triangles_[entry.original_index]);
|
||||||
|
}
|
||||||
|
|
||||||
ARE_LOG_INFO("BVH built: " + std::to_string(nodes_.size()) + " nodes, " + std::to_string(triangles_.size()) + " triangles");
|
triangles_ = std::move(sorted_triangles);
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void BVH::build_recursive_(uint node_idx, uint first_prim, uint prim_count) {
|
void BVH::build_recursive_(uint node_idx, uint first_prim, uint prim_count) {
|
||||||
|
|
@ -125,28 +205,10 @@ void BVH::build_recursive_(uint node_idx, uint first_prim, uint prim_count) {
|
||||||
node.aabb_min_ = bounds.min_;
|
node.aabb_min_ = bounds.min_;
|
||||||
node.aabb_max_ = bounds.max_;
|
node.aabb_max_ = bounds.max_;
|
||||||
|
|
||||||
// Leaf node threshold
|
// Leaf node: 1 triangle per leaf for optimal GPU traversal
|
||||||
const uint LEAF_SIZE = 4;
|
if (prim_count <= 1) {
|
||||||
|
|
||||||
if (prim_count <= LEAF_SIZE) {
|
|
||||||
node.left_first_ = first_prim;
|
node.left_first_ = first_prim;
|
||||||
node.count_ = prim_count;
|
node.count_ = 1;
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calculate current depth
|
|
||||||
uint current_depth = 0;
|
|
||||||
uint idx = node_idx;
|
|
||||||
while (idx > 0) {
|
|
||||||
idx = (idx - 1) / 2;
|
|
||||||
current_depth++;
|
|
||||||
}
|
|
||||||
const uint MAX_DEPTH = 32;
|
|
||||||
|
|
||||||
// Force leaf if max depth reached
|
|
||||||
if (current_depth >= MAX_DEPTH) {
|
|
||||||
node.left_first_ = first_prim;
|
|
||||||
node.count_ = prim_count;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -155,22 +217,9 @@ void BVH::build_recursive_(uint node_idx, uint first_prim, uint prim_count) {
|
||||||
float split_pos = 0.0f;
|
float split_pos = 0.0f;
|
||||||
float split_cost = find_best_split_(first_prim, prim_count, axis, split_pos);
|
float split_cost = find_best_split_(first_prim, prim_count, axis, split_pos);
|
||||||
|
|
||||||
// SAH cost comparison (normalized)
|
// If SAH says no split is beneficial, force median split
|
||||||
// C_split = C_trav + (N_left * SA_left + N_right * SA_right) / SA_parent
|
// For GPU ray tracing, deeper trees with 1 tri per leaf are preferred
|
||||||
// C_leaf = N * C_int
|
|
||||||
// With C_trav = 1, C_int = 1:
|
|
||||||
// Split if C_split < C_leaf
|
|
||||||
// (Constants are used in find_best_split_ for cost calculation)
|
|
||||||
|
|
||||||
if (split_cost == std::numeric_limits<float>::max() || split_cost >= static_cast<float>(prim_count)) {
|
if (split_cost == std::numeric_limits<float>::max() || split_cost >= static_cast<float>(prim_count)) {
|
||||||
// SAH says no split is beneficial, but force split if too many prims
|
|
||||||
const uint MAX_PRIMS_PER_LEAF = 8;
|
|
||||||
if (prim_count <= MAX_PRIMS_PER_LEAF) {
|
|
||||||
node.left_first_ = first_prim;
|
|
||||||
node.count_ = prim_count;
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
// Force median split as fallback
|
|
||||||
AABB cb = calculate_centroid_bounds_(first_prim, prim_count);
|
AABB cb = calculate_centroid_bounds_(first_prim, prim_count);
|
||||||
for (int a = 0; a < 3; ++a) {
|
for (int a = 0; a < 3; ++a) {
|
||||||
float extent = cb.max_[a] - cb.min_[a];
|
float extent = cb.max_[a] - cb.min_[a];
|
||||||
|
|
@ -194,7 +243,7 @@ void BVH::build_recursive_(uint node_idx, uint first_prim, uint prim_count) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Ensure we have primitives on both sides
|
// Ensure split produces non-empty partitions
|
||||||
if (mid == first_prim || mid == first_prim + prim_count) {
|
if (mid == first_prim || mid == first_prim + prim_count) {
|
||||||
mid = first_prim + prim_count / 2;
|
mid = first_prim + prim_count / 2;
|
||||||
}
|
}
|
||||||
|
|
@ -203,10 +252,11 @@ void BVH::build_recursive_(uint node_idx, uint first_prim, uint prim_count) {
|
||||||
uint left_count = mid - first_prim;
|
uint left_count = mid - first_prim;
|
||||||
uint right_count = prim_count - left_count;
|
uint right_count = prim_count - left_count;
|
||||||
|
|
||||||
|
// Store left child index (children will be at left_first_ and left_first_ + 1)
|
||||||
node.left_first_ = static_cast<uint>(nodes_.size());
|
node.left_first_ = static_cast<uint>(nodes_.size());
|
||||||
node.count_ = 0;
|
node.count_ = 0; // Internal node
|
||||||
|
|
||||||
// Create child nodes
|
// Create child nodes (contiguous indices)
|
||||||
nodes_.emplace_back();
|
nodes_.emplace_back();
|
||||||
nodes_.emplace_back();
|
nodes_.emplace_back();
|
||||||
|
|
||||||
|
|
@ -217,7 +267,8 @@ void BVH::build_recursive_(uint node_idx, uint first_prim, uint prim_count) {
|
||||||
|
|
||||||
float BVH::find_best_split_(uint first_prim, uint prim_count, int &axis, float &split_pos) {
|
float BVH::find_best_split_(uint first_prim, uint prim_count, int &axis, float &split_pos) {
|
||||||
float best_cost = std::numeric_limits<float>::max();
|
float best_cost = std::numeric_limits<float>::max();
|
||||||
axis = 0, split_pos = 0.0f;
|
axis = 0;
|
||||||
|
split_pos = 0.0f;
|
||||||
|
|
||||||
AABB centroid_bounds = calculate_centroid_bounds_(first_prim, prim_count);
|
AABB centroid_bounds = calculate_centroid_bounds_(first_prim, prim_count);
|
||||||
AABB parent_bounds = calculate_bounds_(first_prim, prim_count);
|
AABB parent_bounds = calculate_bounds_(first_prim, prim_count);
|
||||||
|
|
@ -229,13 +280,12 @@ float BVH::find_best_split_(uint first_prim, uint prim_count, int &axis, float &
|
||||||
if (extent < EPSILON)
|
if (extent < EPSILON)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// Try multiple split positions using 16 bins
|
// 16-bin SAH
|
||||||
const int NUM_BINS = 16;
|
const int NUM_BINS = 16;
|
||||||
for (int i = 1; i < NUM_BINS; ++i) {
|
for (int i = 1; i < NUM_BINS; ++i) {
|
||||||
float t = static_cast<float>(i) / NUM_BINS;
|
float t = static_cast<float>(i) / NUM_BINS;
|
||||||
float pos = centroid_bounds.min_[a] + t * extent;
|
float pos = centroid_bounds.min_[a] + t * extent;
|
||||||
|
|
||||||
// Count primitives and calculate bounds for each side
|
|
||||||
AABB left_bounds, right_bounds;
|
AABB left_bounds, right_bounds;
|
||||||
uint left_count = 0, right_count = 0;
|
uint left_count = 0, right_count = 0;
|
||||||
|
|
||||||
|
|
@ -252,13 +302,14 @@ float BVH::find_best_split_(uint first_prim, uint prim_count, int &axis, float &
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate normalized SAH cost
|
|
||||||
if (left_count == 0 || right_count == 0)
|
if (left_count == 0 || right_count == 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
float cost = 1.0f; // Traversal cost
|
// SAH cost: C_split = C_trav + (N_left * SA_left + N_right * SA_right) / SA_parent
|
||||||
|
float cost = 1.0f;
|
||||||
if (parent_sa > 0.0f) {
|
if (parent_sa > 0.0f) {
|
||||||
cost += (left_count * left_bounds.surface_area() + right_count * right_bounds.surface_area()) / parent_sa;
|
cost += (left_count * left_bounds.surface_area() +
|
||||||
|
right_count * right_bounds.surface_area()) / parent_sa;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cost < best_cost) {
|
if (cost < best_cost) {
|
||||||
|
|
@ -338,7 +389,6 @@ bool BVH::upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer) {
|
||||||
g.uv0_uv1_ = Vec4(t.uv0_.x, t.uv0_.y, t.uv1_.x, t.uv1_.y);
|
g.uv0_uv1_ = Vec4(t.uv0_.x, t.uv0_.y, t.uv1_.x, t.uv1_.y);
|
||||||
g.uv2_ = Vec4(t.uv2_.x, t.uv2_.y, 0.0f, 0.0f);
|
g.uv2_ = Vec4(t.uv2_.x, t.uv2_.y, 0.0f, 0.0f);
|
||||||
|
|
||||||
// Pack tangents
|
|
||||||
g.t0_ = Vec4(t.t0_, 0.0f);
|
g.t0_ = Vec4(t.t0_, 0.0f);
|
||||||
g.t1_ = Vec4(t.t1_, 0.0f);
|
g.t1_ = Vec4(t.t1_, 0.0f);
|
||||||
|
|
||||||
|
|
@ -365,10 +415,4 @@ bool BVH::upload_to_gpu(Buffer &node_buffer, Buffer &triangle_buffer) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void BVH::clear() {
|
|
||||||
nodes_.clear();
|
|
||||||
triangles_.clear();
|
|
||||||
triangle_indices_.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace are
|
} // namespace are
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue