211 h1 = _mm256_and_pd(h1, hmin1);
212 h0 = _mm256_and_pd(h0, hmax0);
213 h1 = _mm256_and_pd(h1, hmax1);
215 const int hitmask = _mm256_movemask_pd(h0) | (_mm256_movemask_pd(h1) << 4);
221int overlaps(
const Aabb8<float, D>& bounds,
const Aabb<float, D>& box)
223 __m256 h = _mm256_castsi256_ps(_mm256_set1_epi32(-1));
224 for (
size_t d = 0; d < D; ++d)
226 const __m256 pmin = _mm256_set1_ps(box.corners[d][0]);
227 const __m256 pmax = _mm256_set1_ps(box.corners[d][1]);
228 const __m256 hmin = _mm256_cmp_ps(bounds.corners[d][0], pmax, _CMP_LE_OQ);
229 const __m256 hmax = _mm256_cmp_ps(pmin, bounds.corners[d][1], _CMP_LE_OQ);
230 h = _mm256_and_ps(h, hmin);
231 h = _mm256_and_ps(h, hmax);
233 const int hitmask = _mm256_movemask_ps(h);
239int overlaps(
const Aabb8<double, D>& bounds,
const Aabb<double, D>& box)
241 __m256d h0 = _mm256_castsi256_pd(_mm256_set1_epi32(-1));
243 for (
size_t d = 0; d < D; ++d)
245 const __m256d pmin = _mm256_set1_pd(box.corners[d][0]);
246 const __m256d pmax = _mm256_set1_pd(box.corners[d][1]);
247 const __m256d hmin0 = _mm256_cmp_pd(bounds.corners[d][0][0], pmax, _CMP_LE_OQ);
248 const __m256d hmin1 = _mm256_cmp_pd(bounds.corners[d][0][1], pmax, _CMP_LE_OQ);
249 const __m256d hmax0 = _mm256_cmp_pd(pmin, bounds.corners[d][1][0], _CMP_LE_OQ);
250 const __m256d hmax1 = _mm256_cmp_pd(pmin, bounds.corners[d][1][1], _CMP_LE_OQ);
251 h0 = _mm256_and_pd(h0, hmin0);
252 h1 = _mm256_and_pd(h1, hmin1);
253 h0 = _mm256_and_pd(h0, hmax0);
254 h1 = _mm256_and_pd(h1, hmax1);
256 const int hitmask = _mm256_movemask_pd(h0) | (_mm256_movemask_pd(h1) << 4);
261int intersect(
const Aabb8<float, D>& bounds,
const Ray<float, D>& ray,
float tMin,
float tMax,
float ts[8])
263 __m256 tmin = _mm256_set1_ps(tMin);
264 __m256 tmax = _mm256_set1_ps(tMax);
265 for (
size_t d = 0; d < D; ++d)
267 const __m256 support = _mm256_set1_ps(ray.support[d]);
268 const __m256 invDir = _mm256_set1_ps(ray.invDir[d]);
269 const bool sign = ray.sign[d];
270 const __m256 bmin = bounds.corners[d][
sign];
271 const __m256 bmax = bounds.corners[d][!
sign];
272 const __m256 dmin = _mm256_mul_ps(_mm256_sub_ps(bmin, support), invDir);
273 const __m256 dmax = _mm256_mul_ps(_mm256_sub_ps(bmax, support), invDir);
274 tmin = _mm256_max_ps(dmin, tmin);
275 tmax = _mm256_min_ps(dmax, tmax);
277 tmax = _mm256_mul_ps(tmax, _mm256_set1_ps(1 + 2 * num::NumTraits<float>::gamma(3)));
278 _mm256_storeu_ps(ts, tmin);
279 __m256 h = _mm256_cmp_ps(tmin, tmax, _CMP_LE_OQ);
280 const int hitmask = _mm256_movemask_ps(h);
285int intersect(
const Aabb8<double, D>& bounds,
const Ray<double, D>& ray,
double tMin,
double tMax,
double ts[8])
287 __m256d tmin0 = _mm256_set1_pd(tMin);
288 __m256d tmax0 = _mm256_set1_pd(tMax);
289 __m256d tmin1 = tmin0;
290 __m256d tmax1 = tmax0;
291 for (
size_t d = 0; d < D; ++d)
293 const __m256d support = _mm256_set1_pd(ray.support[d]);
294 const __m256d invDir = _mm256_set1_pd(ray.invDir[d]);
295 const bool sign = ray.sign[d];
296 const __m256d bmin0 = bounds.corners[d][
sign][0];
297 const __m256d bmin1 = bounds.corners[d][
sign][1];
298 const __m256d bmax0 = bounds.corners[d][!
sign][0];
299 const __m256d bmax1 = bounds.corners[d][!
sign][1];
300 const __m256d dmin0 = _mm256_mul_pd(_mm256_sub_pd(bmin0, support), invDir);
301 const __m256d dmin1 = _mm256_mul_pd(_mm256_sub_pd(bmin1, support), invDir);
302 const __m256d dmax0 = _mm256_mul_pd(_mm256_sub_pd(bmax0, support), invDir);
303 const __m256d dmax1 = _mm256_mul_pd(_mm256_sub_pd(bmax1, support), invDir);
304 tmin0 = _mm256_max_pd(dmin0, tmin0);
305 tmin1 = _mm256_max_pd(dmin1, tmin1);
306 tmax0 = _mm256_min_pd(dmax0, tmax0);
307 tmax1 = _mm256_min_pd(dmax1, tmax1);
309 _mm256_storeu_pd(&ts[0], tmin0);
310 _mm256_storeu_pd(&ts[4], tmin1);
311 const __m256d f = _mm256_set1_pd(1 + 2 * num::NumTraits<double>::gamma(3));
312 tmax0 = _mm256_mul_pd(tmax0, f);
313 tmax1 = _mm256_mul_pd(tmax1, f);
314 const __m256d h0 = _mm256_cmp_pd(tmin0, tmax0, _CMP_LE_OQ);
315 const __m256d h1 = _mm256_cmp_pd(tmin1, tmax1, _CMP_LE_OQ);
316 const int hitmask = _mm256_movemask_pd(h0) | (_mm256_movemask_pd(h1) << 4);
322void squaredDistance(
const Aabb8<float, D>& bounds,
const Point<float, D>& point,
float sds[4])
324 const __m256 zero = _mm256_setzero_ps();
325 __m256 sd = _mm256_setzero_ps();
326 for (
size_t i = 0; i < D; ++i)
328 const __m256 p = _mm256_set1_ps(point.x[i]);
329 const __m256 dmin = _mm256_sub_ps(bounds.corners[i][0], p);
330 const __m256 dmax = _mm256_sub_ps(p, bounds.corners[i][1]);
331 const __m256 d = _mm256_max_ps(_mm256_max_ps(dmin, dmax), zero);
332 const __m256 d2 = _mm256_mul_ps(d, d);
333 sd = _mm256_add_ps(sd, d2);
335 _mm256_storeu_ps(sds, sd);
339void squaredDistance(
const Aabb8<double, D>& bounds,
const Point<double, D>& point,
double sds[4])
341 const __m256d zero = _mm256_setzero_pd();
342 __m256d sd0 = _mm256_setzero_pd();
344 for (
size_t i = 0; i < D; ++i)
346 const __m256d p = _mm256_set1_pd(point.x[i]);
347 const __m256d dmin0 = _mm256_sub_pd(bounds.corners[i][0][0], p);
348 const __m256d dmin1 = _mm256_sub_pd(bounds.corners[i][0][1], p);
349 const __m256d dmax0 = _mm256_sub_pd(p, bounds.corners[i][1][0]);
350 const __m256d dmax1 = _mm256_sub_pd(p, bounds.corners[i][1][1]);
351 const __m256d d0 = _mm256_max_pd(_mm256_max_pd(dmin0, dmax0), zero);
352 const __m256d d1 = _mm256_max_pd(_mm256_max_pd(dmin1, dmax1), zero);
353 sd0 = _mm256_add_pd(sd0, _mm256_mul_pd(d0, d0));
354 sd1 = _mm256_add_pd(sd1, _mm256_mul_pd(d1, d1));
356 _mm256_storeu_pd(&sds[0], sd0);
357 _mm256_storeu_pd(&sds[4], sd1);
370template <
typename O,
typename OT,
typename SH>
371Aabb8Tree<O, OT, SH>::Aabb8Tree(TSplitHeuristics heuristics) :
372 SH(std::move(heuristics)),
373 aabb_(TObjectTraits::aabbEmpty()),
376 end_(new TObjectIterator),
383template <
typename O,
typename OT,
typename SH>
384Aabb8Tree<O, OT, SH>::Aabb8Tree(TObjectIterator first, TObjectIterator last, TSplitHeuristics heuristics):
385 SH(std::move(heuristics)),
386 aabb_(TObjectTraits::aabbEmpty()),
389 end_(new TObjectIterator(last)),
394 std::ptrdiff_t n = last - first;
397 LASS_THROW(
"Aabb8Tree: invalid range");
399 if (
static_cast<size_t>(n) >
static_cast<size_t>(Child::maxNode) + 1)
401 LASS_THROW(
"Aabb8Tree: too many objects");
404 inputs.reserve(
static_cast<size_t>(n));
405 for (TObjectIterator i = first; i != last; ++i)
407 inputs.emplace_back(TObjectTraits::objectAabb(i), i);
409 root_ = balance(inputs.begin(), inputs.end(), aabb_);
414template <
typename O,
typename OT,
typename SH>
415Aabb8Tree<O, OT, SH>::Aabb8Tree(TSelf&& other)
noexcept:
416 SH(std::forward<TSplitHeuristics>(other)),
417 aabb_(std::move(other.aabb_)),
418 nodes_(std::move(other.nodes_)),
419 objects_(std::move(other.objects_)),
420 end_(std::move(other.end_)),
421 root_(std::move(other.root_))
426template <
typename O,
typename OT,
typename SH>
429 TSplitHeuristics::operator=(std::forward<TSplitHeuristics>(other));
430 aabb_ = std::move(other.aabb_);
431 nodes_ = std::move(other.nodes_);
432 objects_ = std::move(other.objects_);
433 end_ = std::move(other.end_);
434 root_ = std::move(other.root_);
447template <
typename O,
typename OT,
typename SH>
463template <
typename O,
typename OT,
typename SH>
466 TSelf temp(first, last,
static_cast<const SH&
>(*
this));
474template <
typename O,
typename OT,
typename SH>
inline
475const typename Aabb8Tree<O, OT, SH>::TAabb
485template <
typename O,
typename OT,
typename SH>
488 if (
isEmpty() || !TObjectTraits::aabbContains(aabb_, point))
492 LASS_ASSERT(!root_.isEmpty());
493 return doContains(root_, point, info);
500template <
typename O,
typename OT,
typename SH>
501template <
typename OutputIterator>
inline
504 if (
isEmpty() || !TObjectTraits::aabbContains(aabb_, point))
508 LASS_ASSERT(!root_.isEmpty());
509 return doFind(root_, point, result, info);
516template <
typename O,
typename OT,
typename SH>
517template <
typename OutputIterator>
inline
520 if (
isEmpty() || !TObjectTraits::aabbIntersects(aabb_, box))
524 LASS_ASSERT(!root_.isEmpty());
525 return doFind(root_, box, result, info);
532template <
typename O,
typename OT,
typename SH>
533template <
typename OutputIterator>
536 const TVector invDir = TObjectTraits::vectorReciprocal(TObjectTraits::rayDirection(ray));
537 if (
isEmpty() || !volumeIntersects(aabb_, ray, invDir, tMin, tMax))
541 LASS_ASSERT(!root_.isEmpty());
542 return doFind(root_, ray, invDir, tMin, tMax, result, info);
549template <
typename O,
typename OT,
typename SH>
550typename Aabb8Tree<O, OT, SH>::TObjectIterator
553 const TVector dir = TObjectTraits::rayDirection(ray);
554 const TVector invDir = TObjectTraits::vectorReciprocal(dir);
556 if (
isEmpty() || !volumeIntersect(aabb_, ray, invDir, tRoot, tMin))
560 LASS_ASSERT(!root_.isEmpty());
561 return doIntersect(root_, ray, invDir, t, tMin, info);
568template <
typename O,
typename OT,
typename SH>
570 const TRay& ray, TParam tMin, TParam tMax,
const TInfo* info)
const
573 const TVector dir = TObjectTraits::rayDirection(ray);
574 const TVector invDir = TObjectTraits::vectorReciprocal(dir);
575 if (
isEmpty() || !volumeIntersects(aabb_, ray, invDir, tMin, tMax))
579 LASS_ASSERT(!root_.isEmpty());
580 return doIntersects(root_, ray, invDir, tMin, tMax, info);
587template <
typename O,
typename OT,
typename SH>
588const typename Aabb8Tree<O, OT, SH>::Neighbour
591 Neighbour nearest(*end_, std::numeric_limits<TValue>::infinity());
596 LASS_ASSERT(!root_.isEmpty());
597 doNearestNeighbour(root_, point, info, nearest);
622template <
class O,
class OT,
typename SH>
623template<
typename RandomIterator>
626 const TPoint& center, TParam maxRadius,
size_t maxCount, RandomIterator first,
const TInfo* info)
const
628 if (
isEmpty() || maxRadius == 0)
632 TValue squaredRadius = maxRadius * maxRadius;
633 LASS_ASSERT(!root_.isEmpty());
634 return doRangeSearch(root_, center, squaredRadius, maxCount, first, first, info);
641template <
typename O,
typename OT,
typename SH>
644 return objects_.empty();
654template <
typename O,
typename OT,
typename SH>
655const typename Aabb8Tree<O, OT, SH>::TObjectIterator
672template <
typename O,
typename OT,
typename SH>
676 std::swap(aabb_, other.aabb_);
677 nodes_.swap(other.nodes_);
678 objects_.swap(other.objects_);
679 end_.swap(other.end_);
680 std::swap(root_, other.root_);
687template <
typename O,
typename OT,
typename SH>
688typename Aabb8Tree<O, OT, SH>::Child
689Aabb8Tree<O, OT, SH>::balance(TInputIterator first, TInputIterator last, TAabb& bounds)
691 auto split = [
this](TInputIterator first, TInputIterator last) -> TSplitInfo
693 auto s = TSplitHeuristics::template split<OT>(first, last);
694 if (s.isLeaf() && (last - first) > Child::maxCount)
698 s = forceSplit(s.aabb);
703 auto partition = [](TInputIterator first, TInputIterator last,
const TSplitInfo& s) -> TInputIterator
705 TInputIterator middle = std::partition(first, last, impl::Splitter<TObjectTraits>(s));
706 if (middle == first || middle == last)
708 const std::ptrdiff_t halfSize = (last - first) / 2;
709 LASS_ASSERT(halfSize > 0);
710 middle = first + halfSize;
711 std::nth_element(first, middle, last, impl::LessAxis<TObjectTraits>(s.axis));
716 auto makeLeaf = [
this](TInputIterator first, TInputIterator last) -> Child
718 const auto count = last - first;
719 LASS_ASSERT(count <= Child::maxCount);
720 const TIndex frst =
static_cast<TIndex
>(objects_.size());
721 LASS_ASSERT(frst >= 0 && frst <= Child::maxObject);
722 for (TInputIterator i = first; i != last; ++i)
724 objects_.push_back(i->object);
726 LASS_ASSERT(objects_.size() <=
static_cast<size_t>(Child::maxObject) + 1);
727 return Child(frst,
static_cast<TIndex
>(count));
730 auto splitQuadrant = [
this, makeLeaf, partition](TIndex index,
size_t quadrant, TInputIterator first, TInputIterator last)
732 const size_t octant0 = 2 * quadrant;
733 const size_t octant1 = 2 * quadrant + 1;
734 auto s = TSplitHeuristics::template split<OT>(first, last);
737 Node& node = nodes_[index];
738 node.children[octant0] = makeLeaf(first, last);
739 node.children[octant1] = Child();
740 node.bounds.set(octant0, makeAabb(s.aabb));
741 node.bounds.set(octant1, makeAabb(TObjectTraits::aabbEmpty()));
742 node.axis[quadrant + 3] = 0;
746 const TInputIterator middle = partition(first, last, s);
747 TAabb bounds0, bounds1;
748 const Child child0 = balance(first, middle, bounds0);
749 const Child child1 = balance(middle, last, bounds1);
750 Node& node = nodes_[index];
751 node.children[octant0] = child0;
752 node.children[octant1] = child1;
753 node.bounds.set(octant0, makeAabb(bounds0));
754 node.bounds.set(octant1, makeAabb(bounds1));
755 node.axis[quadrant + 3] =
static_cast<TAxis
>(s.axis);
759 auto splitHalf = [
this, makeLeaf, partition, splitQuadrant](TIndex index,
size_t half, TInputIterator first, TInputIterator last)
761 auto s = TSplitHeuristics::template split<OT>(first, last);
764 Node& node = nodes_[index];
765 node.children[4 * half] = makeLeaf(first, last);
766 node.children[4 * half + 1] = Child();
767 node.children[4 * half + 2] = Child();
768 node.children[4 * half + 3] = Child();
769 node.bounds.set(4 * half, makeAabb(s.aabb));
770 node.bounds.set(4 * half + 1, makeAabb(TObjectTraits::aabbEmpty()));
771 node.bounds.set(4 * half + 2, makeAabb(TObjectTraits::aabbEmpty()));
772 node.bounds.set(4 * half + 3, makeAabb(TObjectTraits::aabbEmpty()));
773 node.axis[half + 1] = 0;
777 const TInputIterator middle = partition(first, last, s);
778 splitQuadrant(index, 2 * half, first, middle);
779 splitQuadrant(index, 2 * half + 1, middle, last);
780 nodes_[index].axis[half + 1] =
static_cast<TAxis
>(s.axis);
786 bounds = TObjectTraits::aabbEmpty();
790 auto s =
split(first, last);
794 return makeLeaf(first, last);
796 LASS_ASSERT(!s.isLeaf());
798 LASS_ASSERT(nodes_.size() <=
static_cast<size_t>(Child::maxNode));
799 const TIndex index =
static_cast<TIndex
>(nodes_.size());
800 nodes_.emplace_back();
802 nodes_[index].axis[0] =
static_cast<TAxis
>(s.axis);
804 const TInputIterator middle = partition(first, last, s);
805 splitHalf(index, 0, first, middle);
806 splitHalf(index, 1, middle, last);
812template <
typename O,
typename OT,
typename SH>
813typename Aabb8Tree<O, OT, SH>::TSplitInfo
814Aabb8Tree<O, OT, SH>::forceSplit(
const TAabb& bounds)
816 const TPoint min = TObjectTraits::aabbMin(bounds);
817 const TPoint max = TObjectTraits::aabbMax(bounds);
819 TValue maxSize = TObjectTraits::coord(max, 0) - TObjectTraits::coord(min, 0);
820 for (
size_t k = 1; k < TObjectTraits::dimension; ++k)
822 const TValue size = TObjectTraits::coord(max, k) - TObjectTraits::coord(min, k);
829 const TValue x = (TObjectTraits::coord(min, axis) + TObjectTraits::coord(max, axis)) / 2;
830 return TSplitInfo(bounds, x, axis);
835template <
typename O,
typename OT,
typename SH>
836bool Aabb8Tree<O, OT, SH>::doContains(Child root,
const TPoint& point,
const TInfo* info)
const
838 const auto pnt = makePoint(point);
840 Child stack[stackSize_];
841 TIndex stackSize = 0;
842 stack[stackSize++] = root;
843 while (stackSize > 0)
845 const Child index = stack[--stackSize];
846 LASS_ASSERT(!index.isEmpty());
850 const TIndex first = index.first();
851 const TIndex last = first + index.count();
852 for (TIndex i = first; i != last; ++i)
854 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_VISIT_OBJECT;
855 if (TObjectTraits::objectContains(objects_[i], point, info))
863 LASS_ASSERT(index.isInternal());
864 LASS_ASSERT(index.node() < nodes_.size());
865 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_INIT_NODE(TInfo, info);
866 const Node& node = nodes_[index.node()];
868 const int hits = impl::aabb8::contains(node.bounds, pnt);
869 for (
size_t k = 0; k < 8; ++k)
871 if (!(hits & (1 << k)))
875 const Child child = node.children[k];
880 if (stackSize < stackSize_)
882 stack[stackSize++] = child;
884 else if (doContains(child, point, info))
895template <
typename O,
typename OT,
typename SH>
896template <
typename OutputIterator>
897OutputIterator Aabb8Tree<O, OT, SH>::doFind(Child root,
const TPoint& point, OutputIterator result,
const TInfo* info)
const
899 const auto pnt = makePoint(point);
901 Child stack[stackSize_];
902 TIndex stackSize = 0;
903 stack[stackSize++] = root;
904 while (stackSize > 0)
906 const Child index = stack[--stackSize];
907 LASS_ASSERT(!index.isEmpty());
911 const TIndex first = index.first();
912 const TIndex last = first + index.count();
913 for (TIndex i = first; i != last; ++i)
915 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_VISIT_OBJECT;
916 if (TObjectTraits::objectContains(objects_[i], point, info))
918 *result++ = objects_[i];
924 LASS_ASSERT(index.isInternal());
925 LASS_ASSERT(index.node() < nodes_.size());
926 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_INIT_NODE(TInfo, info);
927 const Node& node = nodes_[index.node()];
929 const int hits = impl::aabb8::contains(node.bounds, pnt);
930 for (
size_t k = 0; k < 8; ++k)
932 if (!(hits & (1 << k)))
936 const Child child = node.children[k];
941 if (stackSize < stackSize_)
943 stack[stackSize++] = child;
947 result = doFind(child, point, result, info);
955template <
typename O,
typename OT,
typename SH>
956template <
typename OutputIterator>
957OutputIterator Aabb8Tree<O, OT, SH>::doFind(Child root,
const TAabb& box, OutputIterator result,
const TInfo* info)
const
959 const auto bx = makeAabb(box);
961 Child stack[stackSize_];
962 TIndex stackSize = 0;
963 stack[stackSize++] = root;
964 while (stackSize > 0)
966 const Child index = stack[--stackSize];
967 LASS_ASSERT(!index.isEmpty());
971 const TIndex first = index.first();
972 const TIndex last = first + index.count();
973 for (TIndex i = first; i != last; ++i)
975 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_VISIT_OBJECT;
976 if (TObjectTraits::objectIntersects(objects_[i], box, info))
978 *result++ = objects_[i];
984 LASS_ASSERT(index.isInternal());
985 LASS_ASSERT(index.node() < nodes_.size());
986 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_INIT_NODE(TInfo, info);
987 const Node& node = nodes_[index.node()];
989 const int hits = impl::aabb8::overlaps(node.bounds, bx);
990 for (
size_t k = 0; k < 8; ++k)
992 if (!(hits & (1 << k)))
996 const Child child = node.children[k];
1001 if (stackSize < stackSize_)
1003 stack[stackSize++] = child;
1007 result = doFind(child, box, result, info);
1016template <
typename O,
typename OT,
typename SH>
1017template <
typename OutputIterator>
1018OutputIterator Aabb8Tree<O, OT, SH>::doFind(Child root,
const TRay& ray,
const TVector& invDir, TParam tMin, TParam tMax, OutputIterator result,
const TInfo* info)
const
1020 const auto r = makeRay(ray);
1022 Child stack[stackSize_];
1023 TIndex stackSize = 0;
1024 stack[stackSize++] = root;
1025 while (stackSize > 0)
1027 const Child index = stack[--stackSize];
1028 LASS_ASSERT(!index.isEmpty());
1032 const TIndex first = index.first();
1033 const TIndex last = first + index.count();
1034 for (TIndex i = first; i != last; ++i)
1036 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_VISIT_OBJECT;
1037 if (TObjectTraits::objectIntersects(objects_[i], ray, tMin, tMax, info))
1039 *result++ = objects_[i];
1045 LASS_ASSERT(index.isInternal());
1046 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_INIT_NODE(TInfo, info);
1047 LASS_ASSERT(index.node() < nodes_.size());
1048 const Node& node = nodes_[index.node()];
1051 const int hits = impl::aabb8::intersect(node.bounds, r, tMin, tMax, ts);
1052 for (
size_t k = 0; k < 8; ++k)
1054 const Child child = node.children[k];
1055 if (!(hits & (child.isEmpty() ? 0 : 1) << k))
1059 if (stackSize < stackSize_)
1061 stack[stackSize++] = child;
1065 result = doFind(child, ray, invDir, tMin, tMax, result, info);
1076template <
typename O,
typename OT,
typename SH>
1077typename Aabb8Tree<O, OT, SH>::TObjectIterator
1078Aabb8Tree<O, OT, SH>::doIntersect(Child root,
const TRay& ray,
const TVector& invDir, TReference t, TParam tMin,
const TInfo* info)
const
1080 const auto r = makeRay(ray);
1082 __m128i signs[dimension];
1083 for (
size_t i = 0; i < dimension; ++i)
1085 signs[i] = _mm_set1_epi32(r.sign[i] ? -1 : 0);
1094 Visit(Child index, TValue tNear): tNear(tNear), index(index) {}
1096 Visit stack[stackSize_];
1097 size_t stackSize = 0;
1098 stack[stackSize++] = Visit(root, tMin);
1099 TValue tBest = TNumTraits::infinity;
1100 TObjectIterator best = *end_;
1101 while (stackSize > 0)
1103 const Visit visit = stack[--stackSize];
1104 if (tBest <= visit.tNear)
1109 const Child index = visit.index;
1110 LASS_ASSERT(!index.isEmpty());
1114 const TIndex first = index.first();
1115 const TIndex last = first + index.count();
1116 for (TIndex i = first; i != last; ++i)
1118 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_VISIT_OBJECT;
1119 TValue tCandidate = 0;
1120 if (TObjectTraits::objectIntersect(objects_[i], ray, tCandidate, tMin, info))
1122 if (best == *end_ || tCandidate < tBest)
1132 LASS_ASSERT(index.isInternal());
1133 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_INIT_NODE(TInfo, info);
1134 LASS_ASSERT(index.node() < nodes_.size());
1135 const Node& node = nodes_[index.node()];
1138 const int hits = impl::aabb8::intersect(node.bounds, r, tMin, TNumTraits::infinity, ts);
1140#if 0 && LASS_HAVE_AVX
1141 alignas(16)
int order[4];
1143 auto order_ = _mm_set_epi32(3, 2, 1, 0);
1144 const auto m0 = _mm_set_epi32(-1, -1, -1, -1);
1145 const auto m1 = _mm_set_epi32(0, 0, -1, -1);
1146 const auto m2 = _mm_set_epi32(-1, -1, 0, 0);
1147 const auto mask0 = _mm_and_si128(m0, signs[node.axis[0]]);
1148 const auto mask1 = _mm_and_si128(m1, signs[node.axis[1]]);
1149 const auto mask2 = _mm_and_si128(m2, signs[node.axis[2]]);
1150 const auto mask21 = _mm_or_si128(mask2, mask1);
1151 const auto order21 = _mm_shuffle_epi32(order_, 0xB1);
1152 order_ = _mm_blendv_epi8(order_, order21, mask21);
1153 const auto order0 = _mm_shuffle_epi32(order_, 0x4E);
1154 order_ = _mm_blendv_epi8(order_, order0, mask0);
1155 LASS_ASSERT(
sizeof(order) ==
sizeof(order_));
1156 memcpy(order, &order_,
sizeof(order_));
1159 int order[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
1160 if (r.sign[node.axis[3]])
1162 std::swap(order[0], order[1]);
1164 if (r.sign[node.axis[4]])
1166 std::swap(order[2], order[3]);
1168 if (r.sign[node.axis[5]])
1170 std::swap(order[4], order[5]);
1172 if (r.sign[node.axis[6]])
1174 std::swap(order[6], order[7]);
1176 if (r.sign[node.axis[1]])
1178 std::swap(order[0], order[2]);
1179 std::swap(order[1], order[3]);
1181 if (r.sign[node.axis[2]])
1183 std::swap(order[4], order[6]);
1184 std::swap(order[5], order[7]);
1186 if (r.sign[node.axis[0]])
1188 std::swap(order[0], order[4]);
1189 std::swap(order[1], order[5]);
1190 std::swap(order[2], order[6]);
1191 std::swap(order[3], order[7]);
1196 for (
int k = 7; k >= 0; --k)
1198 const auto o = order[k];
1199 const Child child = node.children[o];
1200 if (!(hits & (child.isEmpty() ? 0 : 1) << o))
1204 if (stackSize < stackSize_)
1206 stack[stackSize++] = Visit(child, ts[o]);
1211 TObjectIterator b = doIntersect(child, ray, invDir, tb, tMin, info);
1212 if (best != *end_ && tb < tBest)
1229template <
typename O,
typename OT,
typename SH>
1230bool Aabb8Tree<O, OT, SH>::doIntersects(Child root,
const TRay& ray,
const TVector& invDir, TParam tMin, TParam tMax,
const TInfo* info)
const
1232 const auto r = makeRay(ray);
1234 Child stack[stackSize_];
1235 TIndex stackSize = 0;
1236 stack[stackSize++] = root;
1237 while (stackSize > 0)
1239 const Child index = stack[--stackSize];
1240 LASS_ASSERT(!index.isEmpty());
1244 const TIndex first = index.first();
1245 const TIndex last = first + index.count();
1246 for (TIndex i = first; i != last; ++i)
1248 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_VISIT_OBJECT;
1249 if (TObjectTraits::objectIntersects(objects_[i], ray, tMin, tMax, info))
1257 LASS_ASSERT(index.isInternal());
1258 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_INIT_NODE(TInfo, info);
1259 LASS_ASSERT(index.node() < nodes_.size());
1260 const Node& node = nodes_[index.node()];
1263 const int hits = impl::aabb8::intersect(node.bounds, r, tMin, tMax, ts);
1264 for (
size_t k = 0; k < 8; ++k)
1266 const Child child = node.children[k];
1267 if (!(hits & (child.isEmpty() ? 0 : 1) << k))
1271 if (stackSize < stackSize_)
1273 stack[stackSize++] = child;
1275 else if (doIntersects(child, ray, invDir, tMin, tMax, info))
1285namespace impl::aabb8
1288template <
typename T>
1289void sort8(
size_t order[8],
const T values[8])
1291 std::sort(order, order + 8, [&values](
size_t a,
size_t b) {
return values[a] < values[b]; });
1298template <
typename O,
typename OT,
typename SH>
1299void Aabb8Tree<O, OT, SH>::doNearestNeighbour(
1300 Child root,
const TPoint& target,
const TInfo* info, Neighbour& nearest)
const
1302 const auto pnt = makePoint(target);
1306 TValue squaredDistance;
1309 Visit(Child index, TValue squaredDistance): squaredDistance(squaredDistance), index(index) {}
1311 Visit stack[stackSize_];
1312 size_t stackSize = 0;
1313 stack[stackSize++] = Visit(root, 0);
1314 while (stackSize > 0)
1316 LASS_ASSERT(nearest.squaredDistance() >= 0);
1317 const Visit visit = stack[--stackSize];
1318 if (nearest.squaredDistance() <= visit.squaredDistance)
1323 const Child index = visit.index;
1324 LASS_ASSERT(!index.isEmpty());
1328 const auto first = index.first();
1329 const auto last = first + index.count();
1330 for (
auto i = first; i != last; ++i)
1332 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_VISIT_OBJECT;
1333 const TValue squaredDistance = TObjectTraits::objectSquaredDistance(objects_[i], target, info);
1334 if (squaredDistance < nearest.squaredDistance())
1336 nearest = Neighbour(objects_[i], squaredDistance);
1342 LASS_ASSERT(index.isInternal());
1343 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_INIT_NODE(TInfo, info);
1344 LASS_ASSERT(index.node() < nodes_.size());
1345 const Node& node = nodes_[index.node()];
1347 TValue squaredDistances[8];
1348 impl::aabb8::squaredDistance(node.bounds, pnt, squaredDistances);
1350 size_t order[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
1351 impl::aabb8::sort8(order, squaredDistances);
1354 for (
int k = 7; k >= 0; --k)
1356 const size_t o = order[k];
1357 const Child child = node.children[o];
1358 if (child.isEmpty())
1362 if (stackSize < stackSize_)
1364 stack[stackSize++] = Visit(child, squaredDistances[o]);
1368 doNearestNeighbour(child, target, info, nearest);
1376template <
typename O,
typename OT,
typename SH>
1377template <
typename RandomIterator>
1379Aabb8Tree<O, OT, SH>::doRangeSearch(
1380 Child root,
const TPoint& center, TReference squaredRadius,
size_t maxCount, RandomIterator first, RandomIterator last,
const TInfo* info)
const
1382 const auto pnt = makePoint(center);
1386 TValue squaredDistance;
1389 Visit(Child index, TValue squaredDistance): squaredDistance(squaredDistance), index(index) {}
1391 Visit stack[stackSize_];
1392 size_t stackSize = 0;
1393 stack[stackSize++] = Visit(root, 0);
1394 while (stackSize > 0)
1396 LASS_ASSERT(squaredRadius >= 0);
1397 const Visit visit = stack[--stackSize];
1398 if (squaredRadius <= visit.squaredDistance)
1403 const Child index = visit.index;
1404 LASS_ASSERT(!index.isEmpty());
1408 const auto frst = index.first();
1409 const auto lst = frst + index.count();
1410 for (
auto i = frst; i != lst; ++i)
1412 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_VISIT_OBJECT;
1413 const TValue squaredDistance = TObjectTraits::objectSquaredDistance(objects_[i], center, info);
1414 if (squaredDistance < squaredRadius)
1416 *last++ = Neighbour(objects_[i], squaredDistance);
1417 std::push_heap(first, last);
1418 LASS_ASSERT(last >= first);
1419 if (
static_cast<size_t>(last - first) > maxCount)
1421 std::pop_heap(first, last);
1423 squaredRadius = first->squaredDistance();
1430 LASS_ASSERT(index.isInternal());
1431 LASS_SPAT_OBJECT_TREES_DIAGNOSTICS_INIT_NODE(TInfo, info);
1432 LASS_ASSERT(index.node() < nodes_.size());
1433 const Node& node = nodes_[index.node()];
1435 TValue squaredDistances[8];
1436 impl::aabb8::squaredDistance(node.bounds, pnt, squaredDistances);
1438 size_t order[8] = { 0, 1, 2, 3, 4, 5, 6, 7 };
1439 impl::aabb8::sort8(order, squaredDistances);
1442 for (
int k = 7; k >= 0; --k)
1444 size_t o = order[k];
1445 const Child child = node.children[o];
1446 if (child.isEmpty())
1450 if (stackSize < stackSize_)
1452 stack[stackSize++] = Visit(child, squaredDistances[o]);
1456 last = doRangeSearch(child, center, squaredRadius, maxCount, first, last, info);
1466template <
typename O,
typename OT,
typename SH>
1467bool Aabb8Tree<O, OT, SH>::volumeIntersect(
const TAabb& box,
const TRay& ray,
const TVector& invDir, TReference t, TParam tMin)
const
1469 if (TObjectTraits::aabbContains(box, TObjectTraits::rayPoint(ray, tMin)))
1474 return TObjectTraits::aabbIntersect(box, ray, invDir, t, tMin);
1479template <
typename O,
typename OT,
typename SH>
1480bool Aabb8Tree<O, OT, SH>::volumeIntersects(
const TAabb& box,
const TRay& ray,
const TVector& invDir, TParam tMin, TParam tMax)
const
1483 return volumeIntersect(box, ray, invDir, t, tMin) && t <= tMax;
1488template <
typename O,
typename OT,
typename SH>
1489typename Aabb8Tree<O, OT, SH>::TPoint_
1490Aabb8Tree<O, OT, SH>::makePoint(
const TPoint& point)
1493 for (
size_t d = 0; d < dimension; ++d)
1495 result.x[d] = OT::coord(point, d);
1502template <
typename O,
typename OT,
typename SH>
1503typename Aabb8Tree<O, OT, SH>::TRay_
1504Aabb8Tree<O, OT, SH>::makeRay(
const TRay& ray)
1507 const auto support = OT::raySupport(ray);
1508 const auto direction = OT::rayDirection(ray);
1509 const auto invDir = TObjectTraits::vectorReciprocal(direction);
1510 for (
size_t d = 0; d < dimension; ++d)
1512 result.support[d] = OT::coord(support, d);
1513 result.invDir[d] = OT::coord(invDir, d);
1514 result.sign[d] = std::signbit(OT::coord(direction, d));
1521template <
typename O,
typename OT,
typename SH>
1522typename Aabb8Tree<O, OT, SH>::TAabb_
1523Aabb8Tree<O, OT, SH>::makeAabb(
const TAabb& aabb)
1526 const auto min = OT::aabbMin(aabb);
1527 const auto max = OT::aabbMax(aabb);
1528 for (
size_t d = 0; d < dimension; ++d)
1530 result.corners[d][0] = OT::coord(min, d);
1531 result.corners[d][1] = OT::coord(max, d);