src/kdtree.cc
branchpyrit
changeset 87 1081e3dd3f3e
parent 86 ce6abe0aeeae
child 91 9d66d323c354
equal deleted inserted replaced
86:ce6abe0aeeae 87:1081e3dd3f3e
   363 
   363 
   364 	int entry = 0, exit = 1;
   364 	int entry = 0, exit = 1;
   365 	stack[entry].t = a;
   365 	stack[entry].t = a;
   366 
   366 
   367 	/* distinguish between internal and external origin of a ray*/
   367 	/* distinguish between internal and external origin of a ray*/
   368 	stack[entry].pb = rays.o + rays.dir * a; /* external */
   368 	t = _mm_cmplt_ps(a, mZero);
   369 	for (int i = 0; i < 4; i++)
   369 	stack[entry].pb = rays.o + rays.dir * a;
   370 		if (((float*)&a)[i] < 0.0)
   370 	stack[entry].pb.mx = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mx),
   371 		{
   371 		_mm_and_ps(t, rays.o.mx));
   372 			/* internal */
   372 	stack[entry].pb.my = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.my),
   373 			stack[entry].pb.x[i] = rays.o.x[i];
   373 		_mm_and_ps(t, rays.o.my));
   374 			stack[entry].pb.y[i] = rays.o.y[i];
   374 	stack[entry].pb.mz = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mz),
   375 			stack[entry].pb.z[i] = rays.o.z[i];
   375 		_mm_and_ps(t, rays.o.mz));
   376 		}
       
   377 
   376 
   378 	/* setup initial exit point in the stack */
   377 	/* setup initial exit point in the stack */
   379 	stack[exit].t = b;
   378 	stack[exit].t = b;
   380 	stack[exit].pb = rays.o + rays.dir * b;
   379 	stack[exit].pb = rays.o + rays.dir * b;
   381 	stack[exit].node = NULL;
   380 	stack[exit].node = NULL;
   394 			/* retrieve position of splitting plane */
   393 			/* retrieve position of splitting plane */
   395 			splitVal = _mm_set_ps1(node->getSplit());
   394 			splitVal = _mm_set_ps1(node->getSplit());
   396 			axis = node->getAxis();
   395 			axis = node->getAxis();
   397 
   396 
   398 			// mask out invalid rays with near > far
   397 			// mask out invalid rays with near > far
   399 			__m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t));
   398 			const __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t));
   400 			__m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal);
   399 			const __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal);
   401 			__m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal);
   400 			const __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal);
   402 			__m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal);
   401 			const __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal);
   403 			__m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal);
   402 			const __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal);
   404 
   403 
   405 			// if all of
   404 			// if all of
   406 			// stack[entry].pb[axis] <= splitVal,
   405 			// stack[entry].pb[axis] <= splitVal,
   407 			// stack[exit].pb[axis] <= splitVal
   406 			// stack[exit].pb[axis] <= splitVal
   408 			if (!_mm_movemask_ps(
   407 			if (!_mm_movemask_ps(
   486 		__m128 newmask = mask;
   485 		__m128 newmask = mask;
   487 		for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++)
   486 		for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++)
   488 		{
   487 		{
   489 			results = (*shape)->intersect_packet(rays, dists);
   488 			results = (*shape)->intersect_packet(rays, dists);
   490 			int valid = _mm_movemask_ps(
   489 			int valid = _mm_movemask_ps(
   491 				_mm_and_ps(mask, _mm_and_ps(results, _mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)))));
   490 				_mm_and_ps(mask, _mm_and_ps(results,
       
   491 				_mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)))));
   492 			for (int i = 0; i < 4; i++)
   492 			for (int i = 0; i < 4; i++)
   493 			{
   493 			{
   494 				if (*shape != origin_shapes[i] && ((valid>>i)&1))
   494 				if (*shape != origin_shapes[i] && ((valid>>i)&1))
   495 				{
   495 				{
   496 					nearest_shapes[i] = *shape;
   496 					nearest_shapes[i] = *shape;