363 |
363 |
364 int entry = 0, exit = 1; |
364 int entry = 0, exit = 1; |
365 stack[entry].t = a; |
365 stack[entry].t = a; |
366 |
366 |
367 /* distinguish between internal and external origin of a ray*/ |
367 /* distinguish between internal and external origin of a ray*/ |
368 stack[entry].pb = rays.o + rays.dir * a; /* external */ |
368 t = _mm_cmplt_ps(a, mZero); |
369 for (int i = 0; i < 4; i++) |
369 stack[entry].pb = rays.o + rays.dir * a; |
370 if (((float*)&a)[i] < 0.0) |
370 stack[entry].pb.mx = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mx), |
371 { |
371 _mm_and_ps(t, rays.o.mx)); |
372 /* internal */ |
372 stack[entry].pb.my = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.my), |
373 stack[entry].pb.x[i] = rays.o.x[i]; |
373 _mm_and_ps(t, rays.o.my)); |
374 stack[entry].pb.y[i] = rays.o.y[i]; |
374 stack[entry].pb.mz = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mz), |
375 stack[entry].pb.z[i] = rays.o.z[i]; |
375 _mm_and_ps(t, rays.o.mz)); |
376 } |
|
377 |
376 |
378 /* setup initial exit point in the stack */ |
377 /* setup initial exit point in the stack */ |
379 stack[exit].t = b; |
378 stack[exit].t = b; |
380 stack[exit].pb = rays.o + rays.dir * b; |
379 stack[exit].pb = rays.o + rays.dir * b; |
381 stack[exit].node = NULL; |
380 stack[exit].node = NULL; |
394 /* retrieve position of splitting plane */ |
393 /* retrieve position of splitting plane */ |
395 splitVal = _mm_set_ps1(node->getSplit()); |
394 splitVal = _mm_set_ps1(node->getSplit()); |
396 axis = node->getAxis(); |
395 axis = node->getAxis(); |
397 |
396 |
398 // mask out invalid rays with near > far |
397 // mask out invalid rays with near > far |
399 __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t)); |
398 const __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t)); |
400 __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal); |
399 const __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal); |
401 __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal); |
400 const __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal); |
402 __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal); |
401 const __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal); |
403 __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal); |
402 const __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal); |
404 |
403 |
405 // if all of |
404 // if all of |
406 // stack[entry].pb[axis] <= splitVal, |
405 // stack[entry].pb[axis] <= splitVal, |
407 // stack[exit].pb[axis] <= splitVal |
406 // stack[exit].pb[axis] <= splitVal |
408 if (!_mm_movemask_ps( |
407 if (!_mm_movemask_ps( |
486 __m128 newmask = mask; |
485 __m128 newmask = mask; |
487 for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++) |
486 for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++) |
488 { |
487 { |
489 results = (*shape)->intersect_packet(rays, dists); |
488 results = (*shape)->intersect_packet(rays, dists); |
490 int valid = _mm_movemask_ps( |
489 int valid = _mm_movemask_ps( |
491 _mm_and_ps(mask, _mm_and_ps(results, _mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps))))); |
490 _mm_and_ps(mask, _mm_and_ps(results, |
|
491 _mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps))))); |
492 for (int i = 0; i < 4; i++) |
492 for (int i = 0; i < 4; i++) |
493 { |
493 { |
494 if (*shape != origin_shapes[i] && ((valid>>i)&1)) |
494 if (*shape != origin_shapes[i] && ((valid>>i)&1)) |
495 { |
495 { |
496 nearest_shapes[i] = *shape; |
496 nearest_shapes[i] = *shape; |