src/kdtree.cc
branchpyrit
changeset 84 6f7fe14782c2
parent 80 907929fa9b59
child 85 907a634e5c02
equal deleted inserted replaced
83:e3a2a5b26abb 84:6f7fe14782c2
   254 				if (stack[exit].pb[axis] <= splitVal)
   254 				if (stack[exit].pb[axis] <= splitVal)
   255 				{ /* case N1, N2, N3, P5, Z2, and Z3 */
   255 				{ /* case N1, N2, N3, P5, Z2, and Z3 */
   256 					node = node->getLeftChild();
   256 					node = node->getLeftChild();
   257 					continue;
   257 					continue;
   258 				}
   258 				}
   259 				if (stack[exit].pb[axis] == splitVal)
   259 				if (stack[entry].pb[axis] == splitVal)
   260 				{ /* case Z1 */
   260 				{ /* case Z1 */
   261 					node = node->getRightChild();
   261 					node = node->getRightChild();
   262 					continue;
   262 					continue;
   263 				}
   263 				}
   264 				/* case N4 */
   264 				/* case N4 */
   326 
   326 
   327 	/* ray leaves the scene */
   327 	/* ray leaves the scene */
   328 	return NULL;
   328 	return NULL;
   329 }
   329 }
   330 
   330 
       
   331 // stack element for kd-tree traversal (packet version)
       
   332 struct StackElem4
       
   333 {
       
   334 	KdNode* node; /* pointer to far child */
       
   335 	__m128 t; /* the entry/exit signed distance */
       
   336 	VectorPacket pb; /* the coordinates of entry/exit point */
       
   337 	int prev;
       
   338 };
       
   339 
       
   340 void KdTree::packet_intersection(const Shape **origin_shapes, const RayPacket &rays,
       
   341 		Float *nearest_distances, Shape **nearest_shapes)
       
   342 {
       
   343 	__m128 a, b; /* entry/exit signed distance */
       
   344 	__m128 t;    /* signed distance to the splitting plane */
       
   345 	__m128 mask = zeros;
       
   346 
       
   347 	/* if we have no tree, fall back to naive test */
       
   348 	if (!built)
       
   349 		Container::packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes);
       
   350 
       
   351 	nearest_shapes[0] = NULL;
       
   352 	nearest_shapes[1] = NULL;
       
   353 	nearest_shapes[2] = NULL;
       
   354 	nearest_shapes[3] = NULL;
       
   355 
       
   356 	//bbox.intersect_packet(rays, a, b)
       
   357 	if (bbox.intersect(rays[0], ((float*)&a)[0], ((float*)&b)[0]))
       
   358 		((int*)&mask)[0] = -1;
       
   359 	if (bbox.intersect(rays[1], ((float*)&a)[1], ((float*)&b)[1]))
       
   360 		((int*)&mask)[1] = -1;
       
   361 	if (bbox.intersect(rays[2], ((float*)&a)[2], ((float*)&b)[2]))
       
   362 		((int*)&mask)[2] = -1;
       
   363 	if (bbox.intersect(rays[3], ((float*)&a)[3], ((float*)&b)[3]))
       
   364 		((int*)&mask)[3] = -1;
       
   365 
       
   366 	if (!_mm_movemask_ps(mask))
       
   367 		return;
       
   368 
       
   369 	/* pointers to the far child node and current node */
       
   370 	KdNode *farchild, *node;
       
   371 	node = root;
       
   372 
       
   373 	StackElem4 stack[max_depth];
       
   374 
       
   375 	int entry = 0, exit = 1;
       
   376 	stack[entry].t = a;
       
   377 
       
   378 	/* distinguish between internal and external origin of a ray*/
       
   379 	stack[entry].pb = rays.o + rays.dir * a; /* external */
       
   380 	for (int i = 0; i < 4; i++)
       
   381 		if (((float*)&a)[i] < 0.0)
       
   382 		{
       
   383 			/* internal */
       
   384 			stack[entry].pb.x[i] = rays.o.x[i];
       
   385 			stack[entry].pb.y[i] = rays.o.y[i];
       
   386 			stack[entry].pb.z[i] = rays.o.z[i];
       
   387 		}
       
   388 
       
   389 	/* setup initial exit point in the stack */
       
   390 	stack[exit].t = b;
       
   391 	stack[exit].pb = rays.o + rays.dir * b;
       
   392 	stack[exit].node = NULL;
       
   393 
       
   394 	/* loop, traverse through the whole kd-tree,
       
   395 	until an object is intersected or ray leaves the scene */
       
   396 	__m128 splitVal;
       
   397 	int axis;
       
   398 	static const int mod3[] = {0,1,2,0,1};
       
   399 	const VectorPacket invdirs = ones / rays.dir;
       
   400 	while (node)
       
   401 	{
       
   402 		/* loop until a leaf is found */
       
   403 		while (!node->isLeaf())
       
   404 		{
       
   405 			/* retrieve position of splitting plane */
       
   406 			splitVal = _mm_set_ps1(node->getSplit());
       
   407 			axis = node->getAxis();
       
   408 
       
   409 			// mask out invalid rays with near > far
       
   410 			__m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t));
       
   411 			__m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal);
       
   412 			__m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal);
       
   413 			__m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal);
       
   414 			__m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal);
       
   415 
       
   416 			// if all of
       
   417 			// stack[entry].pb[axis] <= splitVal,
       
   418 			// stack[exit].pb[axis] <= splitVal
       
   419 			if (!_mm_movemask_ps(
       
   420 				_mm_and_ps(_mm_or_ps(entry_gt, exit_gt), curmask)))
       
   421 			{
       
   422 				node = node->getLeftChild();
       
   423 				continue;
       
   424 			}
       
   425 
       
   426 			// if all of
       
   427 			// stack[entry].pb[axis] >= splitVal,
       
   428 			// stack[exit].pb[axis] >= splitVal
       
   429 			if (!_mm_movemask_ps(
       
   430 				_mm_and_ps(_mm_or_ps(entry_lt, exit_lt), curmask)))
       
   431 			{
       
   432 				node = node->getRightChild();
       
   433 				continue;
       
   434 			}
       
   435 
       
   436 			// any of
       
   437 			// stack[entry].pb[axis] < splitVal,
       
   438 			// stack[exit].pb[axis] > splitVal
       
   439 			bool cond1 = _mm_movemask_ps(
       
   440 				_mm_and_ps(_mm_and_ps(entry_lt, exit_gt), curmask));
       
   441 
       
   442 			// any of
       
   443 			// stack[entry].pb[axis] > splitVal,
       
   444 			// stack[exit].pb[axis] < splitVal
       
   445 			bool cond2 = _mm_movemask_ps(
       
   446 				_mm_and_ps(_mm_and_ps(entry_gt, exit_lt), curmask));
       
   447 
       
   448 			if ((!cond1 && !cond2) || (cond1 && cond2))
       
   449 			{
       
   450 				// fall back to single rays
       
   451 				// FIXME: split rays and continue
       
   452 				for (int i = 0; i < 4; i++)
       
   453 					if(!nearest_shapes[i])
       
   454 						nearest_shapes[i] = nearest_intersection(origin_shapes[i],
       
   455 							rays[i], nearest_distances[i]);
       
   456 				return;
       
   457 			}
       
   458 
       
   459 			if (cond1)
       
   460 			{
       
   461 				farchild = node->getRightChild();
       
   462 				node = node->getLeftChild();
       
   463 			}
       
   464 			else
       
   465 			{
       
   466 				farchild = node->getLeftChild();
       
   467 				node = node->getRightChild();
       
   468 			}
       
   469 
       
   470 			/* traverse both children */
       
   471 
       
   472 			/* signed distance to the splitting plane */
       
   473 			t = _mm_mul_ps(_mm_sub_ps(splitVal, rays.o.ma[axis]), invdirs.ma[axis]);
       
   474 
       
   475 			/* setup the new exit point and push it onto stack */
       
   476 			register int tmp = exit;
       
   477 
       
   478 			exit++;
       
   479 			if (exit == entry)
       
   480 				exit++;
       
   481 			assert(exit < max_depth);
       
   482 
       
   483 			stack[exit].prev = tmp;
       
   484 			stack[exit].t = t;
       
   485 			stack[exit].node = farchild;
       
   486 			stack[exit].pb.ma[axis] = splitVal;
       
   487 			stack[exit].pb.ma[mod3[axis+1]] =
       
   488 				_mm_add_ps(rays.o.ma[mod3[axis+1]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+1]]));
       
   489 			stack[exit].pb.ma[mod3[axis+2]] =
       
   490 				_mm_add_ps(rays.o.ma[mod3[axis+2]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+2]]));
       
   491 		}
       
   492 
       
   493 		/* current node is the leaf . . . empty or full */
       
   494 		__m128 dists = stack[exit].t;
       
   495 		ShapeList::iterator shape;
       
   496 		for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++)
       
   497 		{
       
   498 			for (int i = 0; i < 4; i++)
       
   499 				if ( ((_mm_movemask_ps(mask)>>(i))&1) &&
       
   500 				((float*)&stack[entry].t)[i] < ((float*)&stack[exit].t)[i] &&
       
   501 				*shape != origin_shapes[i] &&
       
   502 				(*shape)->intersect(rays[i], ((float*)&dists)[i])
       
   503 				&& ((float*)&dists)[i] >= ((float*)&stack[entry].t)[i] - Eps)
       
   504 				{
       
   505 					nearest_shapes[i] = *shape;
       
   506 					nearest_distances[i] = ((float*)&dists)[i];
       
   507 				}
       
   508 
       
   509 			/*
       
   510 			bool results[4];
       
   511 			(*shape)->intersect_packet(rays, dists, results);
       
   512 			int greater_than_entry = _mm_movemask_ps(
       
   513 				_mm_and_ps(_mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)), mask));
       
   514 			for (int i = 0; i < 4; i++)
       
   515 			{
       
   516 				if (results[i] //&& *shape != origin_shapes[i]
       
   517 				&& ((greater_than_entry>>(3-i))&1))
       
   518 				{
       
   519 					nearest_shapes[i] = *shape;
       
   520 					nearest_distances[i] = ((float*)&dists)[i];
       
   521 				}
       
   522 			}
       
   523 			*/
       
   524 		}
       
   525 
       
   526 		for (int i = 0; i < 4; i++)
       
   527 			if (nearest_shapes[i])
       
   528 				((int*)&mask)[i] = 0;
       
   529 
       
   530 		if (!_mm_movemask_ps(mask))
       
   531 			return;
       
   532 
       
   533 		entry = exit;
       
   534 
       
   535 		/* retrieve the pointer to the next node,
       
   536 		it is possible that ray traversal terminates */
       
   537 		node = stack[entry].node;
       
   538 		exit = stack[entry].prev;
       
   539 	}
       
   540 
       
   541 	/* ray leaves the scene */
       
   542 }
       
   543 
   331 ostream & operator<<(ostream &st, KdNode &node)
   544 ostream & operator<<(ostream &st, KdNode &node)
   332 {
   545 {
   333 	if (node.isLeaf())
   546 	if (node.isLeaf())
   334 	{
   547 	{
   335 		st << "(l," << node.getShapes()->size();
   548 		st << "(l," << node.getShapes()->size();