src/shapes.cc
branchpyrit
changeset 92 9af5c039b678
parent 91 9d66d323c354
child 93 96d65f841791
equal deleted inserted replaced
91:9d66d323c354 92:9af5c039b678
    52 		}
    52 		}
    53 	}
    53 	}
    54 	return false;
    54 	return false;
    55 }
    55 }
    56 
    56 
    57 #ifndef NO_SSE
    57 #ifndef NO_SIMD
    58 __m128 Sphere::intersect_packet(const RayPacket &rays, __m128 &dists)
    58 mfloat4 Sphere::intersect_packet(const RayPacket &rays, mfloat4 &dists) const
    59 {
    59 {
    60 	VectorPacket V = rays.o - VectorPacket(center);
    60 	VectorPacket V = rays.o - VectorPacket(center);
    61 	register __m128 d = _mm_sub_ps(mZero, dot(V, rays.dir));
    61 	register mfloat4 d = msub(mZero, dot(V, rays.dir));
    62 	register __m128 Det = _mm_sub_ps(_mm_mul_ps(d, d),
    62 	register mfloat4 Det = msub(mmul(d, d), msub(dot(V,V), mset1(sqr_radius)));
    63 		_mm_sub_ps(dot(V,V), _mm_set_ps1(sqr_radius)));
    63 	register mfloat4 t1, t2, mask;
    64 	register __m128 t1, t2, mask;
    64 
    65 
    65 	mask = mcmpgt(Det, mZero);
    66 	mask = _mm_cmpgt_ps(Det, mZero);
    66 	if (!mmovemask(mask))
    67 	if (!_mm_movemask_ps(mask))
       
    68 		return mask;
    67 		return mask;
    69 
    68 
    70 	Det = _mm_sqrt_ps(Det);
    69 	Det = msqrt(Det);
    71 	t1 = _mm_sub_ps(d, Det);
    70 	t1 = msub(d, Det);
    72 	t2 = _mm_add_ps(d, Det);
    71 	t2 = madd(d, Det);
    73 
    72 
    74 	mask = _mm_and_ps(mask, _mm_cmpgt_ps(t2, mZero));
    73 	mask = mand(mask, mcmpgt(t2, mZero));
    75 
    74 
    76 	const __m128 cond1 = _mm_and_ps(_mm_cmpgt_ps(t1, mZero), _mm_cmplt_ps(t1, dists));
    75 	const mfloat4 cond1 = mand(mcmpgt(t1, mZero), mcmplt(t1, dists));
    77 	const __m128 cond2 = _mm_and_ps(_mm_cmple_ps(t1, mZero), _mm_cmplt_ps(t2, dists));
    76 	const mfloat4 cond2 = mand(mcmple(t1, mZero), mcmplt(t2, dists));
    78 	const __m128 newdists = _mm_or_ps(_mm_and_ps(cond1, t1), _mm_and_ps(cond2, t2));
    77 	const mfloat4 newdists = mor(mand(cond1, t1), mand(cond2, t2));
    79 	mask = _mm_and_ps(mask, _mm_or_ps(cond1, cond2));
    78 	mask = mand(mask, mor(cond1, cond2));
    80 	dists = _mm_or_ps(_mm_and_ps(mask, newdists), _mm_andnot_ps(mask, dists));
    79 	dists = mselect(mask, newdists, dists);
    81 	return mask;
    80 	return mask;
    82 }
    81 }
    83 #endif
    82 #endif
    84 
    83 
    85 /* if there should be CSG sometimes, this may be needed... */
    84 /* if there should be CSG sometimes, this may be needed... */
   175 		return true;
   174 		return true;
   176 	}
   175 	}
   177 	return false;
   176 	return false;
   178 }
   177 }
   179 
   178 
   180 #ifndef NO_SSE
   179 #ifndef NO_SIMD
   181 __m128 Box::intersect_packet(const RayPacket &rays, __m128 &dists)
   180 mfloat4 Box::intersect_packet(const RayPacket &rays, mfloat4 &dists) const
   182 {
   181 {
   183 	register __m128 tnear = mZero;
   182 	mfloat4 origin = rays.o.ma[0];
   184 	register __m128 tfar = mInf;
   183 	mfloat4 invdir = rays.invdir.ma[0];
   185 	register __m128 t1, t2;
   184 	mfloat4 t1 = mmul(msub(mset1(L[0]), origin), invdir);
   186 	register __m128 mask = mAllSet;
   185 	mfloat4 t2 = mmul(msub(mset1(H[0]), origin), invdir);
   187 
   186 	mfloat4 tmin = mmin(t1, t2);
   188 	for (int i = 0; i < 3; i++)
   187 	mfloat4 tmax = mmax(t1, t2);
   189 	{
   188 
   190 		const __m128 mL = _mm_set_ps1(L[i]);
   189 	origin = rays.o.ma[1];
   191 		const __m128 mH = _mm_set_ps1(H[i]);
   190 	invdir = rays.invdir.ma[1];
   192 		mask = _mm_and_ps(mask,
   191 	t1 = mmul(msub(mset1(L[1]), origin), invdir);
   193 		_mm_or_ps(
   192 	t2 = mmul(msub(mset1(H[1]), origin), invdir);
   194 		_mm_or_ps(_mm_cmplt_ps(rays.dir.ma[i], mMEps), _mm_cmpgt_ps(rays.dir.ma[i], mEps)),
   193 	tmin = mmax(mmin(t1, t2), tmin);
   195 		_mm_and_ps(_mm_cmpge_ps(rays.o.ma[i], mL), _mm_cmple_ps(rays.o.ma[i], mH))
   194 	tmax = mmin(mmax(t1, t2), tmax);
   196 		));
   195 
   197 		if (!_mm_movemask_ps(mask))
   196 	origin = rays.o.ma[2];
   198 			return mask;
   197 	invdir = rays.invdir.ma[2];
   199 
   198 	t1 = mmul(msub(mset1(L[2]), origin), invdir);
   200 		/* compute the intersection distance of the planes */
   199 	t2 = mmul(msub(mset1(H[2]), origin), invdir);
   201 		t1 = _mm_div_ps(_mm_sub_ps(mL, rays.o.ma[i]), rays.dir.ma[i]);
   200 	tmin = mmax(mmin(t1, t2), tmin);
   202 		t2 = _mm_div_ps(_mm_sub_ps(mH, rays.o.ma[i]), rays.dir.ma[i]);
   201 	tmax = mmin(mmax(t1, t2), tmax);
   203 
   202 
   204 		__m128 t = _mm_min_ps(t1, t2);
   203 	mfloat4 mask = mand(mand(mcmplt(tmin, tmax), mcmpgt(tmax, mZero)), mcmplt(tmin, dists));
   205 		t2 = _mm_max_ps(t1, t2);
   204 	dists = mselect(mask, tmin, dists);
   206 		t1 = t;
       
   207 
       
   208 		tnear = _mm_max_ps(tnear, t1);	/* want largest Tnear */
       
   209 		tfar = _mm_min_ps(tfar, t2);	/* want smallest Tfar */
       
   210 
       
   211 		mask = _mm_and_ps(mask,
       
   212 			_mm_and_ps(_mm_cmple_ps(tnear, tfar), _mm_cmpge_ps(tfar, mZero)));
       
   213 		if (!_mm_movemask_ps(mask))
       
   214 			return mask;
       
   215 	}
       
   216 
       
   217 	mask = _mm_and_ps(mask, _mm_cmplt_ps(tnear, dists));
       
   218 	dists = _mm_or_ps(_mm_and_ps(mask, tnear), _mm_andnot_ps(mask, dists));
       
   219 	return mask;
   205 	return mask;
   220 }
   206 }
   221 #endif
   207 #endif
   222 
   208 
   223 bool Box::intersect_bbox(const BBox &bbox) const
   209 bool Box::intersect_bbox(const BBox &bbox) const
   417 	dist = t;
   403 	dist = t;
   418 	return true;
   404 	return true;
   419 #endif
   405 #endif
   420 }
   406 }
   421 
   407 
   422 #if not defined(NO_SSE) and defined(TRI_BARI_PRE)
   408 #if !defined(NO_SIMD) && defined(TRI_BARI_PRE)
   423 __m128 Triangle::intersect_packet(const RayPacket &rays, __m128 &dists)
   409 mfloat4 Triangle::intersect_packet(const RayPacket &rays, mfloat4 &dists) const
   424 {
   410 {
   425 	static const int modulo3[5] = {0,1,2,0,1};
   411 	static const int modulo3[5] = {0,1,2,0,1};
   426 	register const int u = modulo3[k+1];
   412 	register const int u = modulo3[k+1];
   427 	register const int v = modulo3[k+2];
   413 	register const int v = modulo3[k+2];
   428 	__m128 mask;
   414 	mfloat4 mask;
   429 
   415 
   430 	const __m128 t = _mm_div_ps(
   416 	const mfloat4 t = mdiv(
   431 		_mm_sub_ps(_mm_sub_ps(
   417 		msub(msub(
   432 		_mm_sub_ps(_mm_set_ps1(nd), rays.o.ma[k]),
   418 		msub(mset1(nd), rays.o.ma[k]),
   433 		_mm_mul_ps(_mm_set_ps1(nu), rays.o.ma[u])
   419 		mmul(mset1(nu), rays.o.ma[u])
   434 		), _mm_mul_ps(_mm_set_ps1(nv), rays.o.ma[v])),
   420 		), mmul(mset1(nv), rays.o.ma[v])),
   435 		_mm_add_ps(rays.dir.ma[k],
   421 		madd(rays.dir.ma[k],
   436 		_mm_add_ps(_mm_mul_ps(_mm_set_ps1(nu), rays.dir.ma[u]),
   422 		madd(mmul(mset1(nu), rays.dir.ma[u]),
   437 		_mm_mul_ps(_mm_set_ps1(nv), rays.dir.ma[v])))
   423 		mmul(mset1(nv), rays.dir.ma[v])))
   438 		);
   424 		);
   439 
   425 
   440 	mask = _mm_and_ps(_mm_cmplt_ps(t, dists), _mm_cmpge_ps(t, mEps));
   426 	mask = mand(mcmplt(t, dists), mcmpge(t, mEps));
   441 	if (!_mm_movemask_ps(mask))
   427 	if (!mmovemask(mask))
   442 		return mask;
   428 		return mask;
   443 
   429 
   444 	const __m128 hu = _mm_sub_ps(_mm_add_ps(rays.o.ma[u],
   430 	const mfloat4 hu = msub(madd(rays.o.ma[u],
   445 		_mm_mul_ps(t, rays.dir.ma[u])), _mm_set_ps1(A->P[u]));
   431 		mmul(t, rays.dir.ma[u])), mset1(A->P[u]));
   446 	const __m128 hv = _mm_sub_ps(_mm_add_ps(rays.o.ma[v],
   432 	const mfloat4 hv = msub(madd(rays.o.ma[v],
   447 		_mm_mul_ps(t, rays.dir.ma[v])), _mm_set_ps1(A->P[v]));
   433 		mmul(t, rays.dir.ma[v])), mset1(A->P[v]));
   448 	const __m128 beta = _mm_add_ps(_mm_mul_ps(hv, _mm_set_ps1(bnu)),
   434 	const mfloat4 beta = madd(mmul(hv, mset1(bnu)),
   449 		_mm_mul_ps(hu, _mm_set_ps1(bnv)));
   435 		mmul(hu, mset1(bnv)));
   450 
   436 
   451 	mask = _mm_and_ps(mask, _mm_cmpge_ps(beta, mZero));
   437 	mask = mand(mask, mcmpge(beta, mZero));
   452 	if (!_mm_movemask_ps(mask))
   438 	if (!mmovemask(mask))
   453 		return mask;
   439 		return mask;
   454 
   440 
   455 	const __m128 gamma = _mm_add_ps(_mm_mul_ps(hu, _mm_set_ps1(cnv)),
   441 	const mfloat4 gamma = madd(mmul(hu, mset1(cnv)),
   456 		_mm_mul_ps(hv, _mm_set_ps1(cnu)));
   442 		mmul(hv, mset1(cnu)));
   457 
   443 
   458 	mask = _mm_and_ps(mask, _mm_and_ps(_mm_cmpge_ps(gamma, mZero),
   444 	mask = mand(mask, mand(mcmpge(gamma, mZero),
   459 		_mm_cmple_ps(_mm_add_ps(beta, gamma), mOne)));
   445 		mcmple(madd(beta, gamma), mOne)));
   460 	if (!_mm_movemask_ps(mask))
   446 	if (!mmovemask(mask))
   461 		return mask;
   447 		return mask;
   462 
   448 
   463 	dists = _mm_or_ps(_mm_andnot_ps(mask, dists), _mm_and_ps(mask, t));
   449 	dists = mselect(mask, t, dists);
   464 	return mask;
   450 	return mask;
   465 }
   451 }
   466 #endif
   452 #endif
   467 
   453 
   468 bool Triangle::intersect_bbox(const BBox &bbox) const
   454 bool Triangle::intersect_bbox(const BBox &bbox) const
   578 	for(int q=0;q<3;q++)
   564 	for(int q=0;q<3;q++)
   579 	{
   565 	{
   580 		v=v0[q];
   566 		v=v0[q];
   581 		if(N[q]>0.0f)
   567 		if(N[q]>0.0f)
   582 		{
   568 		{
   583 			vmin.cell[q]=-boxhalfsize[q] - v;
   569 			vmin[q]=-boxhalfsize[q] - v;
   584 			vmax.cell[q]= boxhalfsize[q] - v;
   570 			vmax[q]= boxhalfsize[q] - v;
   585 		}
   571 		}
   586 		else
   572 		else
   587 		{
   573 		{
   588 			vmin.cell[q]= boxhalfsize[q] - v;
   574 			vmin[q]= boxhalfsize[q] - v;
   589 			vmax.cell[q]=-boxhalfsize[q] - v;
   575 			vmax[q]=-boxhalfsize[q] - v;
   590 		}
   576 		}
   591 	}
   577 	}
   592 	if(dot(N,vmin)>0.0f) return false;
   578 	if(dot(N,vmin)>0.0f) return false;
   593 	if(dot(N,vmax)>=0.0f) return true;
   579 	if(dot(N,vmax)>=0.0f) return true;
   594 
   580