326 |
326 |
327 /* ray leaves the scene */ |
327 /* ray leaves the scene */ |
328 return NULL; |
328 return NULL; |
329 } |
329 } |
330 |
330 |
|
331 // stack element for kd-tree traversal (packet version) |
|
332 struct StackElem4 |
|
333 { |
|
334 KdNode* node; /* pointer to far child */ |
|
335 __m128 t; /* the entry/exit signed distance */ |
|
336 VectorPacket pb; /* the coordinates of entry/exit point */ |
|
337 int prev; |
|
338 }; |
|
339 |
|
340 void KdTree::packet_intersection(const Shape **origin_shapes, const RayPacket &rays, |
|
341 Float *nearest_distances, Shape **nearest_shapes) |
|
342 { |
|
343 __m128 a, b; /* entry/exit signed distance */ |
|
344 __m128 t; /* signed distance to the splitting plane */ |
|
345 __m128 mask = zeros; |
|
346 |
|
347 /* if we have no tree, fall back to naive test */ |
|
348 if (!built) |
|
349 Container::packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes); |
|
350 |
|
351 nearest_shapes[0] = NULL; |
|
352 nearest_shapes[1] = NULL; |
|
353 nearest_shapes[2] = NULL; |
|
354 nearest_shapes[3] = NULL; |
|
355 |
|
356 //bbox.intersect_packet(rays, a, b) |
|
357 if (bbox.intersect(rays[0], ((float*)&a)[0], ((float*)&b)[0])) |
|
358 ((int*)&mask)[0] = -1; |
|
359 if (bbox.intersect(rays[1], ((float*)&a)[1], ((float*)&b)[1])) |
|
360 ((int*)&mask)[1] = -1; |
|
361 if (bbox.intersect(rays[2], ((float*)&a)[2], ((float*)&b)[2])) |
|
362 ((int*)&mask)[2] = -1; |
|
363 if (bbox.intersect(rays[3], ((float*)&a)[3], ((float*)&b)[3])) |
|
364 ((int*)&mask)[3] = -1; |
|
365 |
|
366 if (!_mm_movemask_ps(mask)) |
|
367 return; |
|
368 |
|
369 /* pointers to the far child node and current node */ |
|
370 KdNode *farchild, *node; |
|
371 node = root; |
|
372 |
|
373 StackElem4 stack[max_depth]; |
|
374 |
|
375 int entry = 0, exit = 1; |
|
376 stack[entry].t = a; |
|
377 |
|
378 /* distinguish between internal and external origin of a ray*/ |
|
379 stack[entry].pb = rays.o + rays.dir * a; /* external */ |
|
380 for (int i = 0; i < 4; i++) |
|
381 if (((float*)&a)[i] < 0.0) |
|
382 { |
|
383 /* internal */ |
|
384 stack[entry].pb.x[i] = rays.o.x[i]; |
|
385 stack[entry].pb.y[i] = rays.o.y[i]; |
|
386 stack[entry].pb.z[i] = rays.o.z[i]; |
|
387 } |
|
388 |
|
389 /* setup initial exit point in the stack */ |
|
390 stack[exit].t = b; |
|
391 stack[exit].pb = rays.o + rays.dir * b; |
|
392 stack[exit].node = NULL; |
|
393 |
|
394 /* loop, traverse through the whole kd-tree, |
|
395 until an object is intersected or ray leaves the scene */ |
|
396 __m128 splitVal; |
|
397 int axis; |
|
398 static const int mod3[] = {0,1,2,0,1}; |
|
399 const VectorPacket invdirs = ones / rays.dir; |
|
400 while (node) |
|
401 { |
|
402 /* loop until a leaf is found */ |
|
403 while (!node->isLeaf()) |
|
404 { |
|
405 /* retrieve position of splitting plane */ |
|
406 splitVal = _mm_set_ps1(node->getSplit()); |
|
407 axis = node->getAxis(); |
|
408 |
|
409 // mask out invalid rays with near > far |
|
410 __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t)); |
|
411 __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal); |
|
412 __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal); |
|
413 __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal); |
|
414 __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal); |
|
415 |
|
416 // if all of |
|
417 // stack[entry].pb[axis] <= splitVal, |
|
418 // stack[exit].pb[axis] <= splitVal |
|
419 if (!_mm_movemask_ps( |
|
420 _mm_and_ps(_mm_or_ps(entry_gt, exit_gt), curmask))) |
|
421 { |
|
422 node = node->getLeftChild(); |
|
423 continue; |
|
424 } |
|
425 |
|
426 // if all of |
|
427 // stack[entry].pb[axis] >= splitVal, |
|
428 // stack[exit].pb[axis] >= splitVal |
|
429 if (!_mm_movemask_ps( |
|
430 _mm_and_ps(_mm_or_ps(entry_lt, exit_lt), curmask))) |
|
431 { |
|
432 node = node->getRightChild(); |
|
433 continue; |
|
434 } |
|
435 |
|
436 // any of |
|
437 // stack[entry].pb[axis] < splitVal, |
|
438 // stack[exit].pb[axis] > splitVal |
|
439 bool cond1 = _mm_movemask_ps( |
|
440 _mm_and_ps(_mm_and_ps(entry_lt, exit_gt), curmask)); |
|
441 |
|
442 // any of |
|
443 // stack[entry].pb[axis] > splitVal, |
|
444 // stack[exit].pb[axis] < splitVal |
|
445 bool cond2 = _mm_movemask_ps( |
|
446 _mm_and_ps(_mm_and_ps(entry_gt, exit_lt), curmask)); |
|
447 |
|
448 if ((!cond1 && !cond2) || (cond1 && cond2)) |
|
449 { |
|
450 // fall back to single rays |
|
451 // FIXME: split rays and continue |
|
452 for (int i = 0; i < 4; i++) |
|
453 if(!nearest_shapes[i]) |
|
454 nearest_shapes[i] = nearest_intersection(origin_shapes[i], |
|
455 rays[i], nearest_distances[i]); |
|
456 return; |
|
457 } |
|
458 |
|
459 if (cond1) |
|
460 { |
|
461 farchild = node->getRightChild(); |
|
462 node = node->getLeftChild(); |
|
463 } |
|
464 else |
|
465 { |
|
466 farchild = node->getLeftChild(); |
|
467 node = node->getRightChild(); |
|
468 } |
|
469 |
|
470 /* traverse both children */ |
|
471 |
|
472 /* signed distance to the splitting plane */ |
|
473 t = _mm_mul_ps(_mm_sub_ps(splitVal, rays.o.ma[axis]), invdirs.ma[axis]); |
|
474 |
|
475 /* setup the new exit point and push it onto stack */ |
|
476 register int tmp = exit; |
|
477 |
|
478 exit++; |
|
479 if (exit == entry) |
|
480 exit++; |
|
481 assert(exit < max_depth); |
|
482 |
|
483 stack[exit].prev = tmp; |
|
484 stack[exit].t = t; |
|
485 stack[exit].node = farchild; |
|
486 stack[exit].pb.ma[axis] = splitVal; |
|
487 stack[exit].pb.ma[mod3[axis+1]] = |
|
488 _mm_add_ps(rays.o.ma[mod3[axis+1]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+1]])); |
|
489 stack[exit].pb.ma[mod3[axis+2]] = |
|
490 _mm_add_ps(rays.o.ma[mod3[axis+2]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+2]])); |
|
491 } |
|
492 |
|
493 /* current node is the leaf . . . empty or full */ |
|
494 __m128 dists = stack[exit].t; |
|
495 ShapeList::iterator shape; |
|
496 for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++) |
|
497 { |
|
498 for (int i = 0; i < 4; i++) |
|
499 if ( ((_mm_movemask_ps(mask)>>(i))&1) && |
|
500 ((float*)&stack[entry].t)[i] < ((float*)&stack[exit].t)[i] && |
|
501 *shape != origin_shapes[i] && |
|
502 (*shape)->intersect(rays[i], ((float*)&dists)[i]) |
|
503 && ((float*)&dists)[i] >= ((float*)&stack[entry].t)[i] - Eps) |
|
504 { |
|
505 nearest_shapes[i] = *shape; |
|
506 nearest_distances[i] = ((float*)&dists)[i]; |
|
507 } |
|
508 |
|
509 /* |
|
510 bool results[4]; |
|
511 (*shape)->intersect_packet(rays, dists, results); |
|
512 int greater_than_entry = _mm_movemask_ps( |
|
513 _mm_and_ps(_mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)), mask)); |
|
514 for (int i = 0; i < 4; i++) |
|
515 { |
|
516 if (results[i] //&& *shape != origin_shapes[i] |
|
517 && ((greater_than_entry>>(3-i))&1)) |
|
518 { |
|
519 nearest_shapes[i] = *shape; |
|
520 nearest_distances[i] = ((float*)&dists)[i]; |
|
521 } |
|
522 } |
|
523 */ |
|
524 } |
|
525 |
|
526 for (int i = 0; i < 4; i++) |
|
527 if (nearest_shapes[i]) |
|
528 ((int*)&mask)[i] = 0; |
|
529 |
|
530 if (!_mm_movemask_ps(mask)) |
|
531 return; |
|
532 |
|
533 entry = exit; |
|
534 |
|
535 /* retrieve the pointer to the next node, |
|
536 it is possible that ray traversal terminates */ |
|
537 node = stack[entry].node; |
|
538 exit = stack[entry].prev; |
|
539 } |
|
540 |
|
541 /* ray leaves the scene */ |
|
542 } |
|
543 |
331 ostream & operator<<(ostream &st, KdNode &node) |
544 ostream & operator<<(ostream &st, KdNode &node) |
332 { |
545 { |
333 if (node.isLeaf()) |
546 if (node.isLeaf()) |
334 { |
547 { |
335 st << "(l," << node.getShapes()->size(); |
548 st << "(l," << node.getShapes()->size(); |