src/kdtree.cc
branchpyrit
changeset 92 9af5c039b678
parent 91 9d66d323c354
child 93 96d65f841791
equal deleted inserted replaced
91:9d66d323c354 92:9af5c039b678
    62 
    62 
    63 KdNode::~KdNode()
    63 KdNode::~KdNode()
    64 {
    64 {
    65 	if (isLeaf())
    65 	if (isLeaf())
    66 		delete getShapes();
    66 		delete getShapes();
    67 	else
       
    68 		delete[] getLeftChild();
       
    69 }
    67 }
    70 
    68 
    71 // kd-tree recursive build algorithm, inspired by PBRT (www.pbrt.org)
    69 // kd-tree recursive build algorithm, inspired by PBRT (www.pbrt.org)
    72 void KdTree::recursive_build(KdNode *node, BBox bounds, int maxdepth)
    70 void KdTree::recursive_build(KdNode *node, const BBox &bounds, int maxdepth)
    73 {
    71 {
    74 	ShapeList *shapes = node->getShapes();
    72 	ShapeList *shapes = node->getShapes();
    75 
    73 
    76 	if (maxdepth <= 0 || shapes->size() <= 2)
    74 	if (maxdepth <= 0 || shapes->size() <= 2)
    77 	{
    75 	{
   100 	}
    98 	}
   101 	for (int ax = 0; ax < 3; ax++)
    99 	for (int ax = 0; ax < 3; ax++)
   102 		sort(edges[ax].begin(), edges[ax].end());
   100 		sort(edges[ax].begin(), edges[ax].end());
   103 
   101 
   104 	// choose best split pos
   102 	// choose best split pos
   105 	const Float K = 1.4; // constant, K = cost of traversal / cost of ray-triangle intersection
   103 	const Float K = 1.4f; // constant, K = cost of traversal / cost of ray-triangle intersection
   106 	Float SAV = (bounds.w()*bounds.h() +  // surface area of node
   104 	Float SAV = (bounds.w()*bounds.h() +  // surface area of node
   107 		bounds.w()*bounds.d() + bounds.h()*bounds.d());
   105 		bounds.w()*bounds.d() + bounds.h()*bounds.d());
   108 	Float cost = SAV * (K + shapes->size()); // initial cost = non-split cost
   106 	Float cost = SAV * (K + shapes->size()); // initial cost = non-split cost
   109 
   107 
   110 	vector<ShapeBound>::iterator edge, splitedge = edges[2].end();
   108 	vector<ShapeBound>::iterator edge, splitedge = edges[0].end();
   111 	int axis = 0;
   109 	int axis = 0;
   112 	for (int ax = 0; ax < 3; ax++)
   110 	for (int ax = 0; ax < 3; ax++)
   113 	{
   111 	{
   114 		int lnum = 0, rnum = shapes->size();
   112 		int lnum = 0, rnum = shapes->size();
   115 		BBox lbb = bounds;
   113 		BBox lbb = bounds;
   118 		{
   116 		{
   119 			if (edge->end)
   117 			if (edge->end)
   120 				rnum--;
   118 				rnum--;
   121 
   119 
   122 			// calculate SAH cost of this split
   120 			// calculate SAH cost of this split
   123 			lbb.H.cell[ax] = edge->pos;
   121 			lbb.H[ax] = edge->pos;
   124 			rbb.L.cell[ax] = edge->pos;
   122 			rbb.L[ax] = edge->pos;
   125 			Float SAL = (lbb.w()*lbb.h() + lbb.w()*lbb.d() + lbb.h()*lbb.d());
   123 			Float SAL = (lbb.w()*lbb.h() + lbb.w()*lbb.d() + lbb.h()*lbb.d());
   126 			Float SAR = (rbb.w()*rbb.h() + rbb.w()*rbb.d() + rbb.h()*rbb.d());
   124 			Float SAR = (rbb.w()*rbb.h() + rbb.w()*rbb.d() + rbb.h()*rbb.d());
   127 			Float splitcost = K*SAV + SAL*(K + lnum) + SAR*(K + rnum);
   125 			Float splitcost = K*SAV + SAL*(K + lnum) + SAR*(K + rnum);
   128 
   126 
   129 			if (splitcost < cost)
   127 			if (splitcost < cost)
   136 			if (!edge->end)
   134 			if (!edge->end)
   137 				lnum++;
   135 				lnum++;
   138 		}
   136 		}
   139 	}
   137 	}
   140 
   138 
   141 	if (splitedge == edges[2].end())
   139 	// we actually need to compare with edges[0].end(), but
       
   140 	// MSVC does not allow comparison of iterators from differen instances of vector
       
   141 	// it's OK this way, because axis will be zero if no good split was found
       
   142 	if (splitedge == edges[axis].end())
   142 	{
   143 	{
   143 		node->setLeaf();
   144 		node->setLeaf();
   144 		return;
   145 		return;
   145 	}
   146 	}
   146 
   147 
   150 // export kd-tree as .obj for visualization
   151 // export kd-tree as .obj for visualization
   151 // this would be hard to reconstruct later
   152 // this would be hard to reconstruct later
   152 	static ofstream F("kdtree.obj");
   153 	static ofstream F("kdtree.obj");
   153 	Vector v;
   154 	Vector v;
   154 	static int f=0;
   155 	static int f=0;
   155 	v.cell[axis] = node->getSplit();
   156 	v[axis] = node->getSplit();
   156 	v.cell[(axis+1)%3] = bounds.L.cell[(axis+1)%3];
   157 	v[(axis+1)%3] = bounds.L[(axis+1)%3];
   157 	v.cell[(axis+2)%3] = bounds.L.cell[(axis+2)%3];
   158 	v[(axis+2)%3] = bounds.L[(axis+2)%3];
   158 	F << "v " << v.x << " " << v.y << " " << v.z << endl;
   159 	F << "v " << v.x << " " << v.y << " " << v.z << endl;
   159 	v.cell[(axis+1)%3] = bounds.L.cell[(axis+1)%3];
   160 	v[(axis+1)%3] = bounds.L[(axis+1)%3];
   160 	v.cell[(axis+2)%3] = bounds.H.cell[(axis+2)%3];
   161 	v[(axis+2)%3] = bounds.H[(axis+2)%3];
   161 	F << "v " << v.x << " " << v.y << " " << v.z << endl;
   162 	F << "v " << v.x << " " << v.y << " " << v.z << endl;
   162 	v.cell[(axis+1)%3] = bounds.H.cell[(axis+1)%3];
   163 	v[(axis+1)%3] = bounds.H[(axis+1)%3];
   163 	v.cell[(axis+2)%3] = bounds.H.cell[(axis+2)%3];
   164 	v[(axis+2)%3] = bounds.H[(axis+2)%3];
   164 	F << "v " << v.x << " " << v.y << " " << v.z << endl;
   165 	F << "v " << v.x << " " << v.y << " " << v.z << endl;
   165 	v.cell[(axis+1)%3] = bounds.H.cell[(axis+1)%3];
   166 	v[(axis+1)%3] = bounds.H[(axis+1)%3];
   166 	v.cell[(axis+2)%3] = bounds.L.cell[(axis+2)%3];
   167 	v[(axis+2)%3] = bounds.L[(axis+2)%3];
   167 	F << "v " << v.x << " " << v.y << " " << v.z << endl;
   168 	F << "v " << v.x << " " << v.y << " " << v.z << endl;
   168 	F << "f " << f+1 << " " << f+2 << " " << f+3 << " " << f+4 << endl;
   169 	F << "f " << f+1 << " " << f+2 << " " << f+3 << " " << f+4 << endl;
   169 	f += 4;
   170 	f += 4;
   170 #endif
   171 #endif
   171 
   172 
   172 	// split this node
   173 	// split this node
   173 	delete shapes;
   174 	delete shapes;
   174 
   175 
   175 	BBox lbb = bounds;
   176 	BBox lbb = bounds;
   176 	BBox rbb = bounds;
   177 	BBox rbb = bounds;
   177 	lbb.H.cell[axis] = node->getSplit();
   178 	lbb.H[axis] = node->getSplit();
   178 	rbb.L.cell[axis] = node->getSplit();
   179 	rbb.L[axis] = node->getSplit();
   179 	node->setChildren(new KdNode[2]);
   180 	node->setChildren(new (mempool.alloc()) KdNode);
       
   181 	new (mempool.alloc()) KdNode;
   180 	node->setAxis(axis);
   182 	node->setAxis(axis);
   181 
   183 
   182 	for (edge = edges[axis].begin(); edge != splitedge; edge++)
   184 	for (edge = edges[axis].begin(); edge != splitedge; edge++)
   183 		if (!edge->end && edge->shape->intersect_bbox(lbb))
   185 		if (!edge->end && edge->shape->intersect_bbox(lbb))
   184 			node->getLeftChild()->addShape(edge->shape);
   186 			node->getLeftChild()->addShape(edge->shape);
   217 
   219 
   218 	/* pointers to the far child node and current node */
   220 	/* pointers to the far child node and current node */
   219 	KdNode *farchild, *node;
   221 	KdNode *farchild, *node;
   220 	node = root;
   222 	node = root;
   221 
   223 
       
   224 #ifdef MSVC
       
   225 	// MSVC wants constant expression here... hope it won't overflow :)
       
   226 	StackElem stack[64];
       
   227 #else
   222 	StackElem stack[max_depth];
   228 	StackElem stack[max_depth];
       
   229 #endif
   223 
   230 
   224 	int entry = 0, exit = 1;
   231 	int entry = 0, exit = 1;
   225 	stack[entry].t = a;
   232 	stack[entry].t = a;
   226 
   233 
   227 	/* distinguish between internal and external origin of a ray*/
   234 	/* distinguish between internal and external origin of a ray*/
   292 			assert(exit < max_depth);
   299 			assert(exit < max_depth);
   293 
   300 
   294 			stack[exit].prev = tmp;
   301 			stack[exit].prev = tmp;
   295 			stack[exit].t = t;
   302 			stack[exit].t = t;
   296 			stack[exit].node = farchild;
   303 			stack[exit].node = farchild;
   297 			stack[exit].pb.cell[axis] = splitVal;
   304 			stack[exit].pb[axis] = splitVal;
   298 			stack[exit].pb.cell[mod3[axis+1]] = ray.o.cell[mod3[axis+1]]
   305 			stack[exit].pb[mod3[axis+1]] = ray.o[mod3[axis+1]]
   299 				+ t * ray.dir.cell[mod3[axis+1]];
   306 				+ t * ray.dir[mod3[axis+1]];
   300 			stack[exit].pb.cell[mod3[axis+2]] = ray.o.cell[mod3[axis+2]]
   307 			stack[exit].pb[mod3[axis+2]] = ray.o[mod3[axis+2]]
   301 				+ t * ray.dir.cell[mod3[axis+2]];
   308 				+ t * ray.dir[mod3[axis+2]];
   302 		}
   309 		}
   303 
   310 
   304 		/* current node is the leaf . . . empty or full */
   311 		/* current node is the leaf . . . empty or full */
   305 		Shape *nearest_shape = NULL;
   312 		Shape *nearest_shape = NULL;
   306 		Float dist = stack[exit].t;
   313 		Float dist = stack[exit].t;
   326 
   333 
   327 	/* ray leaves the scene */
   334 	/* ray leaves the scene */
   328 	return NULL;
   335 	return NULL;
   329 }
   336 }
   330 
   337 
   331 #ifndef NO_SSE
   338 #ifndef NO_SIMD
   332 // stack element for kd-tree traversal (packet version)
   339 // stack element for kd-tree traversal (packet version)
   333 struct StackElem4
   340 struct StackElem4
   334 {
   341 {
   335 	KdNode* node; /* pointer to far child */
   342 	KdNode* node; /* pointer to far child */
   336 	__m128 t; /* the entry/exit signed distance */
   343 	mfloat4 t; /* the entry/exit signed distance */
   337 	VectorPacket pb; /* the coordinates of entry/exit point */
   344 	VectorPacket pb; /* the coordinates of entry/exit point */
   338 	int prev;
   345 	int prev;
   339 };
   346 };
   340 
   347 
   341 void KdTree::packet_intersection(const Shape **origin_shapes, const RayPacket &rays,
   348 void KdTree::packet_intersection(const Shape* const* origin_shapes, const RayPacket &rays,
   342 		Float *nearest_distances, Shape **nearest_shapes)
   349 		Float *nearest_distances, Shape **nearest_shapes)
   343 {
   350 {
   344 	__m128 a, b; /* entry/exit signed distance */
   351 	mfloat4 a, b; /* entry/exit signed distance */
   345 	__m128 t;    /* signed distance to the splitting plane */
   352 	mfloat4 t;    /* signed distance to the splitting plane */
   346 	__m128 mask = mZero;
   353 	mfloat4 mask = mZero;
   347 
   354 
   348 	/* if we have no tree, fall back to naive test */
   355 	/* if we have no tree, fall back to naive test */
   349 	if (!built)
   356 	if (!built)
   350 		Container::packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes);
   357 		Container::packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes);
   351 
   358 
   352 	// nearest_shapes[0..4] = NULL
   359 	// nearest_shapes[0..4] = NULL
   353 	memset(nearest_shapes, 0, 4*sizeof(Shape*));
   360 	memset(nearest_shapes, 0, 4*sizeof(Shape*));
   354 
   361 
   355 	mask = bbox.intersect_packet(rays, a, b);
   362 	mask = bbox.intersect_packet(rays, a, b);
   356 	if (!_mm_movemask_ps(mask))
   363 	if (!mmovemask(mask))
   357 		return;
   364 		return;
   358 
   365 
   359 	/* pointers to the far child node and current node */
   366 	/* pointers to the far child node and current node */
   360 	KdNode *farchild, *node;
   367 	KdNode *farchild, *node;
   361 	node = root;
   368 	node = root;
   362 
   369 
       
   370 #ifdef MSVC
       
   371 	// MSVC wants constant expression here... hope it won't overflow :)
       
   372 	StackElem4 stack[64];
       
   373 #else
   363 	StackElem4 stack[max_depth];
   374 	StackElem4 stack[max_depth];
       
   375 #endif
   364 
   376 
   365 	int entry = 0, exit = 1;
   377 	int entry = 0, exit = 1;
   366 	stack[entry].t = a;
   378 	stack[entry].t = a;
   367 
   379 
   368 	/* distinguish between internal and external origin of a ray*/
   380 	/* distinguish between internal and external origin of a ray*/
   369 	t = _mm_cmplt_ps(a, mZero);
   381 	t = mcmplt(a, mZero);
   370 	stack[entry].pb = rays.o + rays.dir * a;
   382 	stack[entry].pb = rays.o + rays.dir * a;
   371 	stack[entry].pb.mx = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mx),
   383 	stack[entry].pb.mx = mselect(t, rays.o.mx, stack[entry].pb.mx);
   372 		_mm_and_ps(t, rays.o.mx));
   384 	stack[entry].pb.my = mselect(t, rays.o.my, stack[entry].pb.my);
   373 	stack[entry].pb.my = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.my),
   385 	stack[entry].pb.mz = mselect(t, rays.o.mz, stack[entry].pb.mz);
   374 		_mm_and_ps(t, rays.o.my));
       
   375 	stack[entry].pb.mz = _mm_or_ps(_mm_andnot_ps(t, stack[entry].pb.mz),
       
   376 		_mm_and_ps(t, rays.o.mz));
       
   377 
   386 
   378 	/* setup initial exit point in the stack */
   387 	/* setup initial exit point in the stack */
   379 	stack[exit].t = b;
   388 	stack[exit].t = b;
   380 	stack[exit].pb = rays.o + rays.dir * b;
   389 	stack[exit].pb = rays.o + rays.dir * b;
   381 	stack[exit].node = NULL;
   390 	stack[exit].node = NULL;
   382 
   391 
   383 	/* loop, traverse through the whole kd-tree,
   392 	/* loop, traverse through the whole kd-tree,
   384 	until an object is intersected or ray leaves the scene */
   393 	until an object is intersected or ray leaves the scene */
   385 	__m128 splitVal;
   394 	mfloat4 splitVal;
   386 	int axis;
   395 	int axis;
   387 	static const int mod3[] = {0,1,2,0,1};
   396 	static const int mod3[] = {0,1,2,0,1};
   388 	const VectorPacket invdirs = mOne / rays.dir;
   397 	const VectorPacket invdirs = mOne / rays.dir;
   389 	while (node)
   398 	while (node)
   390 	{
   399 	{
   391 		/* loop until a leaf is found */
   400 		/* loop until a leaf is found */
   392 		while (!node->isLeaf())
   401 		while (!node->isLeaf())
   393 		{
   402 		{
   394 			/* retrieve position of splitting plane */
   403 			/* retrieve position of splitting plane */
   395 			splitVal = _mm_set_ps1(node->getSplit());
   404 			splitVal = mset1(node->getSplit());
   396 			axis = node->getAxis();
   405 			axis = node->getAxis();
   397 
   406 
   398 			// mask out invalid rays with near > far
   407 			// mask out invalid rays with near > far
   399 			const __m128 curmask = _mm_and_ps(mask, _mm_cmple_ps(stack[entry].t, stack[exit].t));
   408 			const mfloat4 curmask = mand(mask, mcmple(stack[entry].t, stack[exit].t));
   400 			const __m128 entry_lt = _mm_cmplt_ps(stack[entry].pb.ma[axis], splitVal);
   409 			const mfloat4 entry_lt = mcmplt(stack[entry].pb.ma[axis], splitVal);
   401 			const __m128 entry_gt = _mm_cmpgt_ps(stack[entry].pb.ma[axis], splitVal);
   410 			const mfloat4 entry_gt = mcmpgt(stack[entry].pb.ma[axis], splitVal);
   402 			const __m128 exit_lt = _mm_cmplt_ps(stack[exit].pb.ma[axis], splitVal);
   411 			const mfloat4 exit_lt = mcmplt(stack[exit].pb.ma[axis], splitVal);
   403 			const __m128 exit_gt = _mm_cmpgt_ps(stack[exit].pb.ma[axis], splitVal);
   412 			const mfloat4 exit_gt = mcmpgt(stack[exit].pb.ma[axis], splitVal);
   404 
   413 
   405 			// if all of
   414 			// if all of
   406 			// stack[entry].pb[axis] <= splitVal,
   415 			// stack[entry].pb[axis] <= splitVal,
   407 			// stack[exit].pb[axis] <= splitVal
   416 			// stack[exit].pb[axis] <= splitVal
   408 			if (!_mm_movemask_ps(
   417 			if (!mmovemask(
   409 				_mm_and_ps(_mm_or_ps(entry_gt, exit_gt), curmask)))
   418 				mand(mor(entry_gt, exit_gt), curmask)))
   410 			{
   419 			{
   411 				node = node->getLeftChild();
   420 				node = node->getLeftChild();
   412 				continue;
   421 				continue;
   413 			}
   422 			}
   414 
   423 
   415 			// if all of
   424 			// if all of
   416 			// stack[entry].pb[axis] >= splitVal,
   425 			// stack[entry].pb[axis] >= splitVal,
   417 			// stack[exit].pb[axis] >= splitVal
   426 			// stack[exit].pb[axis] >= splitVal
   418 			if (!_mm_movemask_ps(
   427 			if (!mmovemask(
   419 				_mm_and_ps(_mm_or_ps(entry_lt, exit_lt), curmask)))
   428 				mand(mor(entry_lt, exit_lt), curmask)))
   420 			{
   429 			{
   421 				node = node->getRightChild();
   430 				node = node->getRightChild();
   422 				continue;
   431 				continue;
   423 			}
   432 			}
   424 
   433 
   425 			// any of
   434 			// any of
   426 			// stack[entry].pb[axis] < splitVal,
   435 			// stack[entry].pb[axis] < splitVal,
   427 			// stack[exit].pb[axis] > splitVal
   436 			// stack[exit].pb[axis] > splitVal
   428 			bool cond1 = _mm_movemask_ps(
   437 			int cond1 = mmovemask(
   429 				_mm_and_ps(_mm_and_ps(entry_lt, exit_gt), curmask));
   438 				mand(mand(entry_lt, exit_gt), curmask));
   430 
   439 
   431 			// any of
   440 			// any of
   432 			// stack[entry].pb[axis] > splitVal,
   441 			// stack[entry].pb[axis] > splitVal,
   433 			// stack[exit].pb[axis] < splitVal
   442 			// stack[exit].pb[axis] < splitVal
   434 			bool cond2 = _mm_movemask_ps(
   443 			int cond2 = mmovemask(
   435 				_mm_and_ps(_mm_and_ps(entry_gt, exit_lt), curmask));
   444 				mand(mand(entry_gt, exit_lt), curmask));
   436 
   445 
   437 			if ((!cond1 && !cond2) || (cond1 && cond2))
   446 			if ((!cond1 && !cond2) || (cond1 && cond2))
   438 			{
   447 			{
   439 				// fall back to single rays
   448 				// fall back to single rays
   440 				// FIXME: split rays and continue
   449 				// FIXME: split rays and continue
   457 			}
   466 			}
   458 
   467 
   459 			/* traverse both children */
   468 			/* traverse both children */
   460 
   469 
   461 			/* signed distance to the splitting plane */
   470 			/* signed distance to the splitting plane */
   462 			t = _mm_mul_ps(_mm_sub_ps(splitVal, rays.o.ma[axis]), invdirs.ma[axis]);
   471 			t = mmul(msub(splitVal, rays.o.ma[axis]), invdirs.ma[axis]);
   463 
   472 
   464 			/* setup the new exit point and push it onto stack */
   473 			/* setup the new exit point and push it onto stack */
   465 			register int tmp = exit;
   474 			register int tmp = exit;
   466 
   475 
   467 			exit++;
   476 			exit++;
   472 			stack[exit].prev = tmp;
   481 			stack[exit].prev = tmp;
   473 			stack[exit].t = t;
   482 			stack[exit].t = t;
   474 			stack[exit].node = farchild;
   483 			stack[exit].node = farchild;
   475 			stack[exit].pb.ma[axis] = splitVal;
   484 			stack[exit].pb.ma[axis] = splitVal;
   476 			stack[exit].pb.ma[mod3[axis+1]] =
   485 			stack[exit].pb.ma[mod3[axis+1]] =
   477 				_mm_add_ps(rays.o.ma[mod3[axis+1]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+1]]));
   486 				madd(rays.o.ma[mod3[axis+1]], mmul(t, rays.dir.ma[mod3[axis+1]]));
   478 			stack[exit].pb.ma[mod3[axis+2]] =
   487 			stack[exit].pb.ma[mod3[axis+2]] =
   479 				_mm_add_ps(rays.o.ma[mod3[axis+2]], _mm_mul_ps(t, rays.dir.ma[mod3[axis+2]]));
   488 				madd(rays.o.ma[mod3[axis+2]], mmul(t, rays.dir.ma[mod3[axis+2]]));
   480 		}
   489 		}
   481 
   490 
   482 		/* current node is the leaf . . . empty or full */
   491 		/* current node is the leaf . . . empty or full */
   483 		__m128 dists = stack[exit].t;
   492 		mfloat4 dists = stack[exit].t;
   484 		ShapeList::iterator shape;
   493 		ShapeList::iterator shape;
   485 		__m128 results;
   494 		mfloat4 results;
   486 		__m128 newmask = mask;
   495 		mfloat4 newmask = mask;
   487 		for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++)
   496 		for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++)
   488 		{
   497 		{
   489 			results = (*shape)->intersect_packet(rays, dists);
   498 			results = (*shape)->intersect_packet(rays, dists);
   490 			int valid = _mm_movemask_ps(
   499 			int valid = mmovemask(
   491 				_mm_and_ps(mask, _mm_and_ps(results,
   500 				mand(mask, mand(results,
   492 				_mm_cmpge_ps(dists, _mm_sub_ps(stack[entry].t, mEps)))));
   501 				mcmpge(dists, msub(stack[entry].t, mEps)))));
   493 			for (int i = 0; i < 4; i++)
   502 			for (int i = 0; i < 4; i++)
   494 			{
   503 			{
   495 				if (*shape != origin_shapes[i] && ((valid>>i)&1))
   504 				if (*shape != origin_shapes[i] && ((valid>>i)&1))
   496 				{
   505 				{
   497 					nearest_shapes[i] = *shape;
   506 					nearest_shapes[i] = *shape;
   500 				}
   509 				}
   501 			}
   510 			}
   502 		}
   511 		}
   503 
   512 
   504 		mask = newmask;
   513 		mask = newmask;
   505 		if (!_mm_movemask_ps(mask))
   514 		if (!mmovemask(mask))
   506 			return;
   515 			return;
   507 
   516 
   508 		entry = exit;
   517 		entry = exit;
   509 
   518 
   510 		/* retrieve the pointer to the next node,
   519 		/* retrieve the pointer to the next node,