375 const __m128 hv = _mm_sub_ps(_mm_add_ps(rays.o.ma[v], |
375 const __m128 hv = _mm_sub_ps(_mm_add_ps(rays.o.ma[v], |
376 _mm_mul_ps(t, rays.dir.ma[v])), _mm_set_ps1(A->P[v])); |
376 _mm_mul_ps(t, rays.dir.ma[v])), _mm_set_ps1(A->P[v])); |
377 const __m128 beta = _mm_add_ps(_mm_mul_ps(hv, _mm_set_ps1(bnu)), |
377 const __m128 beta = _mm_add_ps(_mm_mul_ps(hv, _mm_set_ps1(bnu)), |
378 _mm_mul_ps(hu, _mm_set_ps1(bnv))); |
378 _mm_mul_ps(hu, _mm_set_ps1(bnv))); |
379 |
379 |
380 mask = _mm_and_ps(mask, _mm_cmpge_ps(beta, zeros)); |
380 mask = _mm_and_ps(mask, _mm_cmpge_ps(beta, mZero)); |
381 if (!_mm_movemask_ps(mask)) |
381 if (!_mm_movemask_ps(mask)) |
382 return mask; |
382 return mask; |
383 |
383 |
384 const __m128 gamma = _mm_add_ps(_mm_mul_ps(hu, _mm_set_ps1(cnv)), |
384 const __m128 gamma = _mm_add_ps(_mm_mul_ps(hu, _mm_set_ps1(cnv)), |
385 _mm_mul_ps(hv, _mm_set_ps1(cnu))); |
385 _mm_mul_ps(hv, _mm_set_ps1(cnu))); |
386 |
386 |
387 mask = _mm_and_ps(mask, _mm_and_ps(_mm_cmpge_ps(gamma, zeros), |
387 mask = _mm_and_ps(mask, _mm_and_ps(_mm_cmpge_ps(gamma, mZero), |
388 _mm_cmple_ps(_mm_add_ps(beta, gamma), ones))); |
388 _mm_cmple_ps(_mm_add_ps(beta, gamma), mOne))); |
389 if (!_mm_movemask_ps(mask)) |
389 if (!_mm_movemask_ps(mask)) |
390 return mask; |
390 return mask; |
391 |
391 |
392 for (int i = 0; i < 4; i++) |
392 for (int i = 0; i < 4; i++) |
393 if ((_mm_movemask_ps(mask)>>i)&1) |
393 if ((_mm_movemask_ps(mask)>>i)&1) |