150 // export kd-tree as .obj for visualization |
151 // export kd-tree as .obj for visualization |
151 // this would be hard to reconstruct later |
152 // this would be hard to reconstruct later |
152 static ofstream F("kdtree.obj"); |
153 static ofstream F("kdtree.obj"); |
153 Vector v; |
154 Vector v; |
154 static int f=0; |
155 static int f=0; |
155 v.cell[axis] = node->getSplit(); |
156 v[axis] = node->getSplit(); |
156 v.cell[(axis+1)%3] = bounds.L.cell[(axis+1)%3]; |
157 v[(axis+1)%3] = bounds.L[(axis+1)%3]; |
157 v.cell[(axis+2)%3] = bounds.L.cell[(axis+2)%3]; |
158 v[(axis+2)%3] = bounds.L[(axis+2)%3]; |
158 F << "v " << v.x << " " << v.y << " " << v.z << endl; |
159 F << "v " << v.x << " " << v.y << " " << v.z << endl; |
159 v.cell[(axis+1)%3] = bounds.L.cell[(axis+1)%3]; |
160 v[(axis+1)%3] = bounds.L[(axis+1)%3]; |
160 v.cell[(axis+2)%3] = bounds.H.cell[(axis+2)%3]; |
161 v[(axis+2)%3] = bounds.H[(axis+2)%3]; |
161 F << "v " << v.x << " " << v.y << " " << v.z << endl; |
162 F << "v " << v.x << " " << v.y << " " << v.z << endl; |
162 v.cell[(axis+1)%3] = bounds.H.cell[(axis+1)%3]; |
163 v[(axis+1)%3] = bounds.H[(axis+1)%3]; |
163 v.cell[(axis+2)%3] = bounds.H.cell[(axis+2)%3]; |
164 v[(axis+2)%3] = bounds.H[(axis+2)%3]; |
164 F << "v " << v.x << " " << v.y << " " << v.z << endl; |
165 F << "v " << v.x << " " << v.y << " " << v.z << endl; |
165 v.cell[(axis+1)%3] = bounds.H.cell[(axis+1)%3]; |
166 v[(axis+1)%3] = bounds.H[(axis+1)%3]; |
166 v.cell[(axis+2)%3] = bounds.L.cell[(axis+2)%3]; |
167 v[(axis+2)%3] = bounds.L[(axis+2)%3]; |
167 F << "v " << v.x << " " << v.y << " " << v.z << endl; |
168 F << "v " << v.x << " " << v.y << " " << v.z << endl; |
168 F << "f " << f+1 << " " << f+2 << " " << f+3 << " " << f+4 << endl; |
169 F << "f " << f+1 << " " << f+2 << " " << f+3 << " " << f+4 << endl; |
169 f += 4; |
170 f += 4; |
170 #endif |
171 #endif |
171 |
172 |
172 // split this node |
173 // split this node |
173 delete shapes; |
174 delete shapes; |
174 |
175 |
175 BBox lbb = bounds; |
176 BBox lbb = bounds; |
176 BBox rbb = bounds; |
177 BBox rbb = bounds; |
177 lbb.H.cell[axis] = node->getSplit(); |
178 lbb.H[axis] = node->getSplit(); |
178 rbb.L.cell[axis] = node->getSplit(); |
179 rbb.L[axis] = node->getSplit(); |
179 node->setChildren(new KdNode[2]); |
180 node->setChildren(new (mempool.alloc()) KdNode); |
|
181 new (mempool.alloc()) KdNode; |
180 node->setAxis(axis); |
182 node->setAxis(axis); |
181 |
183 |
182 for (edge = edges[axis].begin(); edge != splitedge; edge++) |
184 for (edge = edges[axis].begin(); edge != splitedge; edge++) |
183 if (!edge->end && edge->shape->intersect_bbox(lbb)) |
185 if (!edge->end && edge->shape->intersect_bbox(lbb)) |
184 node->getLeftChild()->addShape(edge->shape); |
186 node->getLeftChild()->addShape(edge->shape); |
326 |
333 |
327 /* ray leaves the scene */ |
334 /* ray leaves the scene */ |
328 return NULL; |
335 return NULL; |
329 } |
336 } |
330 |
337 |
331 #ifndef NO_SSE |
338 #ifndef NO_SIMD |
332 // stack element for kd-tree traversal (packet version) |
339 // stack element for kd-tree traversal (packet version) |
333 struct StackElem4 |
340 struct StackElem4 |
334 { |
341 { |
335 KdNode* node; /* pointer to far child */ |
342 KdNode* node; /* pointer to far child */ |
336 __m128 t; /* the entry/exit signed distance */ |
343 mfloat4 t; /* the entry/exit signed distance */ |
337 VectorPacket pb; /* the coordinates of entry/exit point */ |
344 VectorPacket pb; /* the coordinates of entry/exit point */ |
338 int prev; |
345 int prev; |
339 }; |
346 }; |
340 |
347 |
341 void KdTree::packet_intersection(const Shape **origin_shapes, const RayPacket &rays, |
348 void KdTree::packet_intersection(const Shape* const* origin_shapes, const RayPacket &rays, |
342 Float *nearest_distances, Shape **nearest_shapes) |
349 Float *nearest_distances, Shape **nearest_shapes) |
343 { |
350 { |
344 __m128 a, b; /* entry/exit signed distance */ |
351 mfloat4 a, b; /* entry/exit signed distance */ |
345 __m128 t; /* signed distance to the splitting plane */ |
352 mfloat4 t; /* signed distance to the splitting plane */ |
346 __m128 mask = mZero; |
353 mfloat4 mask = mZero; |
347 |
354 |
348 /* if we have no tree, fall back to naive test */ |
355 /* if we have no tree, fall back to naive test */ |
349 if (!built) |
356 if (!built) |
350 Container::packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes); |
357 Container::packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes); |
351 |
358 |
352 // nearest_shapes[0..4] = NULL |
359 // nearest_shapes[0..4] = NULL |
353 memset(nearest_shapes, 0, 4*sizeof(Shape*)); |
360 memset(nearest_shapes, 0, 4*sizeof(Shape*)); |
354 |
361 |
355 mask = bbox.intersect_packet(rays, a, b); |
362 mask = bbox.intersect_packet(rays, a, b); |
356 if (!_mm_movemask_ps(mask)) |
363 if (!mmovemask(mask)) |
357 return; |
364 return; |
358 |
365 |
359 /* pointers to the far child node and current node */ |
366 /* pointers to the far child node and current node */ |
360 KdNode *farchild, *node; |
367 KdNode *farchild, *node; |
361 node = root; |
368 node = root; |
362 |
369 |
|
370 #ifdef MSVC |
|
371 // MSVC wants constant expression here... hope it won't overflow :) |
|
372 StackElem4 stack[64]; |
|
373 #else |
363 StackElem4 stack[max_depth]; |
374 StackElem4 stack[max_depth]; |
|
375 #endif |
364 |
376 |
365 int entry = 0, exit = 1; |
377 int entry = 0, exit = 1; |
366 stack[entry].t = a; |
378 stack[entry].t = a; |
367 |
379 |
368 /* distinguish between internal and external origin of a ray*/ |
380 /* distinguish between internal and external origin of a ray*/ |
369 t = _mm_cmplt_ps(a, mZero); |
381 t = mcmplt(a, mZero); |
370 stack[entry].pb = rays.o + rays.dir * a; |
382 stack[entry].pb = rays.o + rays.dir * a; |
371 stack[entry].pb.mx = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mx), |
383 stack[entry].pb.mx = mselect(t, rays.o.mx, stack[entry].pb.mx); |
372 _mm_and_ps(t, rays.o.mx)); |
384 stack[entry].pb.my = mselect(t, rays.o.my, stack[entry].pb.my); |
373 stack[entry].pb.my = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.my), |
385 stack[entry].pb.mz = mselect(t, rays.o.mz, stack[entry].pb.mz); |
374 _mm_and_ps(t, rays.o.my)); |
|
375 stack[entry].pb.mz = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mz), |
|
376 _mm_and_ps(t, rays.o.mz)); |
|
377 |
386 |
378 /* setup initial exit point in the stack */ |
387 /* setup initial exit point in the stack */ |
379 stack[exit].t = b; |
388 stack[exit].t = b; |
380 stack[exit].pb = rays.o + rays.dir * b; |
389 stack[exit].pb = rays.o + rays.dir * b; |
381 stack[exit].node = NULL; |
390 stack[exit].node = NULL; |
382 |
391 |
383 /* loop, traverse through the whole kd-tree, |
392 /* loop, traverse through the whole kd-tree, |
384 until an object is intersected or ray leaves the scene */ |
393 until an object is intersected or ray leaves the scene */ |
385 __m128 splitVal; |
394 mfloat4 splitVal; |
386 int axis; |
395 int axis; |
387 static const int mod3[] = {0,1,2,0,1}; |
396 static const int mod3[] = {0,1,2,0,1}; |
388 const VectorPacket invdirs = mOne / rays.dir; |
397 const VectorPacket invdirs = mOne / rays.dir; |
389 while (node) |
398 while (node) |
390 { |
399 { |
391 /* loop until a leaf is found */ |
400 /* loop until a leaf is found */ |
392 while (!node->isLeaf()) |
401 while (!node->isLeaf()) |
393 { |
402 { |
394 /* retrieve position of splitting plane */ |
403 /* retrieve position of splitting plane */ |
395 splitVal = _mm_set_ps1(node->getSplit()); |
404 splitVal = mset1(node->getSplit()); |
396 axis = node->getAxis(); |
405 axis = node->getAxis(); |
397 |
406 |
398 // mask out invalid rays with near > far |
407 // mask out invalid rays with near > far |
399 const __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t)); |
408 const mfloat4 curmask = mand(mask, mcmple(stack[entry].t, stack[exit].t)); |
400 const __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal); |
409 const mfloat4 entry_lt = mcmplt(stack[entry].pb.ma[axis], splitVal); |
401 const __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal); |
410 const mfloat4 entry_gt = mcmpgt(stack[entry].pb.ma[axis], splitVal); |
402 const __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal); |
411 const mfloat4 exit_lt = mcmplt(stack[exit].pb.ma[axis], splitVal); |
403 const __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal); |
412 const mfloat4 exit_gt = mcmpgt(stack[exit].pb.ma[axis], splitVal); |
404 |
413 |
405 // if all of |
414 // if all of |
406 // stack[entry].pb[axis] <= splitVal, |
415 // stack[entry].pb[axis] <= splitVal, |
407 // stack[exit].pb[axis] <= splitVal |
416 // stack[exit].pb[axis] <= splitVal |
408 if (!_mm_movemask_ps( |
417 if (!mmovemask( |
409 _mm_and_ps(_mm_or_ps(entry_gt, exit_gt), curmask))) |
418 mand(mor(entry_gt, exit_gt), curmask))) |
410 { |
419 { |
411 node = node->getLeftChild(); |
420 node = node->getLeftChild(); |
412 continue; |
421 continue; |
413 } |
422 } |
414 |
423 |
415 // if all of |
424 // if all of |
416 // stack[entry].pb[axis] >= splitVal, |
425 // stack[entry].pb[axis] >= splitVal, |
417 // stack[exit].pb[axis] >= splitVal |
426 // stack[exit].pb[axis] >= splitVal |
418 if (!_mm_movemask_ps( |
427 if (!mmovemask( |
419 _mm_and_ps(_mm_or_ps(entry_lt, exit_lt), curmask))) |
428 mand(mor(entry_lt, exit_lt), curmask))) |
420 { |
429 { |
421 node = node->getRightChild(); |
430 node = node->getRightChild(); |
422 continue; |
431 continue; |
423 } |
432 } |
424 |
433 |
425 // any of |
434 // any of |
426 // stack[entry].pb[axis] < splitVal, |
435 // stack[entry].pb[axis] < splitVal, |
427 // stack[exit].pb[axis] > splitVal |
436 // stack[exit].pb[axis] > splitVal |
428 bool cond1 = _mm_movemask_ps( |
437 int cond1 = mmovemask( |
429 _mm_and_ps(_mm_and_ps(entry_lt, exit_gt), curmask)); |
438 mand(mand(entry_lt, exit_gt), curmask)); |
430 |
439 |
431 // any of |
440 // any of |
432 // stack[entry].pb[axis] > splitVal, |
441 // stack[entry].pb[axis] > splitVal, |
433 // stack[exit].pb[axis] < splitVal |
442 // stack[exit].pb[axis] < splitVal |
434 bool cond2 = _mm_movemask_ps( |
443 int cond2 = mmovemask( |
435 _mm_and_ps(_mm_and_ps(entry_gt, exit_lt), curmask)); |
444 mand(mand(entry_gt, exit_lt), curmask)); |
436 |
445 |
437 if ((!cond1 && !cond2) || (cond1 && cond2)) |
446 if ((!cond1 && !cond2) || (cond1 && cond2)) |
438 { |
447 { |
439 // fall back to single rays |
448 // fall back to single rays |
440 // FIXME: split rays and continue |
449 // FIXME: split rays and continue |
472 stack[exit].prev = tmp; |
481 stack[exit].prev = tmp; |
473 stack[exit].t = t; |
482 stack[exit].t = t; |
474 stack[exit].node = farchild; |
483 stack[exit].node = farchild; |
475 stack[exit].pb.ma[axis] = splitVal; |
484 stack[exit].pb.ma[axis] = splitVal; |
476 stack[exit].pb.ma[mod3[axis+1]] = |
485 stack[exit].pb.ma[mod3[axis+1]] = |
477 _mm_add_ps(rays.o.ma[mod3[axis+1]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+1]])); |
486 madd(rays.o.ma[mod3[axis+1]], mmul(t, rays.dir.ma[mod3[axis+1]])); |
478 stack[exit].pb.ma[mod3[axis+2]] = |
487 stack[exit].pb.ma[mod3[axis+2]] = |
479 _mm_add_ps(rays.o.ma[mod3[axis+2]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+2]])); |
488 madd(rays.o.ma[mod3[axis+2]], mmul(t, rays.dir.ma[mod3[axis+2]])); |
480 } |
489 } |
481 |
490 |
482 /* current node is the leaf . . . empty or full */ |
491 /* current node is the leaf . . . empty or full */ |
483 __m128 dists = stack[exit].t; |
492 mfloat4 dists = stack[exit].t; |
484 ShapeList::iterator shape; |
493 ShapeList::iterator shape; |
485 __m128 results; |
494 mfloat4 results; |
486 __m128 newmask = mask; |
495 mfloat4 newmask = mask; |
487 for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++) |
496 for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++) |
488 { |
497 { |
489 results = (*shape)->intersect_packet(rays, dists); |
498 results = (*shape)->intersect_packet(rays, dists); |
490 int valid = _mm_movemask_ps( |
499 int valid = mmovemask( |
491 _mm_and_ps(mask, _mm_and_ps(results, |
500 mand(mask, mand(results, |
492 _mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps))))); |
501 mcmpge(dists, msub(stack[entry].t, mEps))))); |
493 for (int i = 0; i < 4; i++) |
502 for (int i = 0; i < 4; i++) |
494 { |
503 { |
495 if (*shape != origin_shapes[i] && ((valid>>i)&1)) |
504 if (*shape != origin_shapes[i] && ((valid>>i)&1)) |
496 { |
505 { |
497 nearest_shapes[i] = *shape; |
506 nearest_shapes[i] = *shape; |