346 dist = t; |
346 dist = t; |
347 return true; |
347 return true; |
348 #endif |
348 #endif |
349 } |
349 } |
350 |
350 |
|
351 #ifdef TRI_BARI_PRE |
|
352 __m128 Triangle::intersect_packet(const RayPacket &rays, __m128 &dists) |
|
353 { |
|
354 static const int modulo3[5] = {0,1,2,0,1}; |
|
355 register const int u = modulo3[k+1]; |
|
356 register const int v = modulo3[k+2]; |
|
357 __m128 mask; |
|
358 |
|
359 const __m128 t = _mm_div_ps( |
|
360 _mm_sub_ps(_mm_sub_ps( |
|
361 _mm_sub_ps(_mm_set_ps1(nd), rays.o.ma[k]), |
|
362 _mm_mul_ps(_mm_set_ps1(nu), rays.o.ma[u]) |
|
363 ), _mm_mul_ps(_mm_set_ps1(nv), rays.o.ma[v])), |
|
364 _mm_add_ps(rays.dir.ma[k], |
|
365 _mm_add_ps(_mm_mul_ps(_mm_set_ps1(nu), rays.dir.ma[u]), |
|
366 _mm_mul_ps(_mm_set_ps1(nv), rays.dir.ma[v]))) |
|
367 ); |
|
368 |
|
369 mask = _mm_and_ps(_mm_cmplt_ps(t, dists), _mm_cmpge_ps(t, mEps)); |
|
370 if (!_mm_movemask_ps(mask)) |
|
371 return mask; |
|
372 |
|
373 const __m128 hu = _mm_sub_ps(_mm_add_ps(rays.o.ma[u], |
|
374 _mm_mul_ps(t, rays.dir.ma[u])), _mm_set_ps1(A->P[u])); |
|
375 const __m128 hv = _mm_sub_ps(_mm_add_ps(rays.o.ma[v], |
|
376 _mm_mul_ps(t, rays.dir.ma[v])), _mm_set_ps1(A->P[v])); |
|
377 const __m128 beta = _mm_add_ps(_mm_mul_ps(hv, _mm_set_ps1(bnu)), |
|
378 _mm_mul_ps(hu, _mm_set_ps1(bnv))); |
|
379 |
|
380 mask = _mm_and_ps(mask, _mm_cmpge_ps(beta, zeros)); |
|
381 if (!_mm_movemask_ps(mask)) |
|
382 return mask; |
|
383 |
|
384 const __m128 gamma = _mm_add_ps(_mm_mul_ps(hu, _mm_set_ps1(cnv)), |
|
385 _mm_mul_ps(hv, _mm_set_ps1(cnu))); |
|
386 |
|
387 mask = _mm_and_ps(mask, _mm_and_ps(_mm_cmpge_ps(gamma, zeros), |
|
388 _mm_cmple_ps(_mm_add_ps(beta, gamma), ones))); |
|
389 if (!_mm_movemask_ps(mask)) |
|
390 return mask; |
|
391 |
|
392 for (int i = 0; i < 4; i++) |
|
393 if ((_mm_movemask_ps(mask)>>i)&1) |
|
394 ((float*)&dists)[i] = ((float*)&t)[i]; |
|
395 return mask; |
|
396 } |
|
397 #endif |
|
398 |
351 bool Triangle::intersect_bbox(const BBox &bbox) const |
399 bool Triangle::intersect_bbox(const BBox &bbox) const |
352 { |
400 { |
353 const Vector3 boxcenter = (bbox.L+bbox.H)*0.5; |
401 const Vector3 boxcenter = (bbox.L+bbox.H)*0.5; |
354 const Vector3 boxhalfsize = (bbox.H-bbox.L)*0.5; |
402 const Vector3 boxhalfsize = (bbox.H-bbox.L)*0.5; |
355 const Vector3 v0 = A->P - boxcenter; |
403 const Vector3 v0 = A->P - boxcenter; |