src/raytracer.cc
branchpyrit
changeset 92 9af5c039b678
parent 91 9d66d323c354
child 93 96d65f841791
equal deleted inserted replaced
91:9d66d323c354 92:9af5c039b678
   118 	}
   118 	}
   119 
   119 
   120 	return acc;
   120 	return acc;
   121 }
   121 }
   122 
   122 
   123 #ifndef NO_SSE
   123 #ifndef NO_SIMD
   124 VectorPacket Raytracer::PhongShader_packet(const Shape **shapes,
   124 VectorPacket Raytracer::PhongShader_packet(const Shape* const* shapes,
   125 	const VectorPacket &P, const VectorPacket &N, const VectorPacket &V)
   125 	const VectorPacket &P, const VectorPacket &N, const VectorPacket &V)
   126 {
   126 {
   127 	VectorPacket acc, colour;
   127 	VectorPacket acc, colour;
   128 	union { __m128 ambient; float ambient_f[4]; };
   128 	union { mfloat4 ambient; float ambient_f[4]; };
   129 	union { __m128 diffuse; float diffuse_f[4]; };
   129 	union { mfloat4 diffuse; float diffuse_f[4]; };
   130 	union { __m128 specular; float specular_f[4]; };
   130 	union { mfloat4 specular; float specular_f[4]; };
   131 	union { __m128 shininess; float shininess_f[4]; };
   131 	union { mfloat4 shininess; float shininess_f[4]; };
   132 
   132 
   133 	for (int i = 0; i < 4; i++)
   133 	for (int i = 0; i < 4; i++)
   134 		if (shapes[i] == NULL)
   134 		if (shapes[i] == NULL)
   135 		{
   135 		{
   136 			ambient_f[i] = 0;
   136 			ambient_f[i] = 0;
   152 		}
   152 		}
   153 
   153 
   154 	// ambient
   154 	// ambient
   155 	acc = colour * ambient;
   155 	acc = colour * ambient;
   156 
   156 
   157 	Shape **shadow_shapes;
   157 	Shape *shadow_shapes[4];
   158 	vector<Light*>::iterator light;
   158 	vector<Light*>::iterator light;
   159 	for (light = lights.begin(); light != lights.end(); light++)
   159 	for (light = lights.begin(); light != lights.end(); light++)
   160 	{
   160 	{
   161 		 // direction vector to light
   161 		 // direction vector to light
   162 		VectorPacket L = VectorPacket((*light)->pos) - P;
   162 		VectorPacket L = VectorPacket((*light)->pos) - P;
   163 		L.normalize();
   163 		L.normalize();
   164 		const __m128 L_dot_N = dot(L, N);
   164 		const mfloat4 L_dot_N = dot(L, N);
   165 		__m128 valid = _mm_cmpgt_ps(L_dot_N, mZero);
   165 		mfloat4 valid = mcmpgt(L_dot_N, mZero);
   166 
   166 
   167 		// test if this light is occluded (sharp shadows)
   167 		// test if this light is occluded (sharp shadows)
   168 		if ((*light)->cast_shadows)
   168 		if ((*light)->cast_shadows)
   169 		{
   169 		{
   170 			const RayPacket shadow_rays = RayPacket(P, L);
   170 			const RayPacket shadow_rays = RayPacket(P, L);
   171 			union { __m128 dists; float dists_f[4]; };
   171 			union { mfloat4 dists; float dists_f[4]; };
   172 			dists = mInf;
   172 			dists = mInf;
   173 			top->packet_intersection(shapes, shadow_rays,
   173 			top->packet_intersection(shapes, shadow_rays,
   174 				dists_f, shadow_shapes);
   174 				dists_f, shadow_shapes);
   175 			valid = _mm_and_ps(valid, _mm_cmpeq_ps(dists, mInf));
   175 			valid = mand(valid, mcmpeq(dists, mInf));
   176 		}
   176 		}
   177 
   177 
   178 		const VectorPacket R = L - N * _mm_mul_ps(mTwo, L_dot_N);
   178 		const VectorPacket R = L - N * mmul(mTwo, L_dot_N);
   179 		const __m128 R_dot_V = dot(R, V);
   179 		const mfloat4 R_dot_V = dot(R, V);
   180 
   180 
   181 		// diffuse
   181 		// diffuse
   182 		acc.selectiveAdd(valid,
   182 		acc.selectiveAdd(valid,
   183 			colour * VectorPacket((*light)->colour) * _mm_mul_ps(diffuse, L_dot_N));
   183 			colour * VectorPacket((*light)->colour) * mmul(diffuse, L_dot_N));
   184 
   184 
   185 		// specular
   185 		// specular
   186 		valid = _mm_and_ps(valid, _mm_cmpgt_ps(R_dot_V, mZero));
   186 		valid = mand(valid, mcmpgt(R_dot_V, mZero));
   187 		__m128 spec = _mm_mul_ps(_mm_mul_ps(specular, _mm_set_ps1((*light)->colour.r)),
   187 		mfloat4 spec = mmul(mmul(specular, mset1((*light)->colour.r)),
   188 			_mm_fastpow(R_dot_V, shininess));
   188 			mfastpow(R_dot_V, shininess));
   189 		acc.selectiveAdd(valid, spec);
   189 		acc.selectiveAdd(valid, spec);
   190 	}
   190 	}
   191 	return acc;
   191 	return acc;
   192 }
   192 }
   193 #endif
   193 #endif
   295 		lightScatter(ray, nearest_shape, depth, P, normal, from_inside, col);
   295 		lightScatter(ray, nearest_shape, depth, P, normal, from_inside, col);
   296 		return col;
   296 		return col;
   297 	}
   297 	}
   298 }
   298 }
   299 
   299 
   300 #ifndef NO_SSE
   300 #ifndef NO_SIMD
   301 void Raytracer::raytracePacket(RayPacket &rays, Colour *results)
   301 void Raytracer::raytracePacket(RayPacket &rays, Colour *results)
   302 {
   302 {
   303 	union {
   303 	union {
   304 		float nearest_distances[4];
   304 		float nearest_distances[4];
   305 		__m128 m_nearest_distances;
   305 		mfloat4 m_nearest_distances;
   306 	};
   306 	};
   307 	__m128 mask;
   307 	mfloat4 mask;
   308 	Shape *nearest_shapes[4];
   308 	Shape *nearest_shapes[4];
   309 	static const Shape *origin_shapes[4] = {NULL, NULL, NULL, NULL};
   309 	static const Shape *origin_shapes[4] = {NULL, NULL, NULL, NULL};
   310 	m_nearest_distances = mInf;
   310 	m_nearest_distances = mInf;
   311 	mask = mAllSet;
       
   312 
   311 
   313 	top->packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes);
   312 	top->packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes);
   314 
   313 
   315 	mask = _mm_cmpneq_ps(m_nearest_distances, mInf);
   314 	mask = mcmpneq(m_nearest_distances, mInf);
   316 	if (!_mm_movemask_ps(mask))
   315 	if (!mmovemask(mask))
   317 	{
   316 	{
   318 		for (int i = 0; i < 4; i++)
   317 		for (int i = 0; i < 4; i++)
   319 			results[i] = bg_colour;
   318 			results[i] = bg_colour;
   320 		return;
   319 		return;
   321 	}
   320 	}
   322 
   321 
   323 	const VectorPacket P = rays.o + rays.dir * m_nearest_distances; // point of intersection
   322 	const VectorPacket P = rays.o + rays.dir * m_nearest_distances; // point of intersection
   324 
       
   325 	VectorPacket normal;
   323 	VectorPacket normal;
   326 	for (int i = 0; i < 4; i++)
   324 	for (int i = 0; i < 4; i++)
   327 		if (nearest_shapes[i] != NULL)
   325 		if (nearest_shapes[i] != NULL)
   328 			normal.setVector(i, nearest_shapes[i]->normal(P.getVector(i)));
   326 			normal.setVector(i, nearest_shapes[i]->normal(P.getVector(i)));
   329 
   327 
   330 	// make shapes double sided
   328 	// make shapes double sided
   331 	__m128 from_inside = _mm_cmpgt_ps(dot(normal, rays.dir), mZero);
   329 	mfloat4 from_inside = mcmpgt(dot(normal, rays.dir), mZero);
   332 	normal.mx = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.mx)),
   330 	normal.mx = mselect(from_inside, msub(mZero, normal.mx), normal.mx);
   333 		_mm_andnot_ps(from_inside, normal.mx));
   331 	normal.my = mselect(from_inside, msub(mZero, normal.my), normal.my);
   334 	normal.my = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.my)),
   332 	normal.mz = mselect(from_inside, msub(mZero, normal.mz), normal.mz);
   335 		_mm_andnot_ps(from_inside, normal.my));
       
   336 	normal.mz = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.mz)),
       
   337 		_mm_andnot_ps(from_inside, normal.mz));
       
   338 
   333 
   339 	// shading function
   334 	// shading function
   340 	VectorPacket pres =
   335 	VectorPacket pres = PhongShader_packet(nearest_shapes, P, normal, rays.dir);
   341 		PhongShader_packet(const_cast<const Shape**>(nearest_shapes), P, normal, rays.dir);
   336 	//pres.mx = mselect(mask, pres.mx, mset1(bg_colour.r));
   342 	//pres.mx = _mm_or_ps(_mm_and_ps(mask, pres.mx), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.r)));
   337 	//pres.my = mselect(mask, pres.my, mset1(bg_colour.g));
   343 	//pres.my = _mm_or_ps(_mm_and_ps(mask, pres.my), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.g)));
   338 	//pres.mz = mselect(mask, pres.mz, mset1(bg_colour.b));
   344 	//pres.mz = _mm_or_ps(_mm_and_ps(mask, pres.mz), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.b)));
       
   345 
   339 
   346 	for (int i = 0; i < 4; i++)
   340 	for (int i = 0; i < 4; i++)
   347 		if (nearest_shapes[i] != NULL)
   341 		if (nearest_shapes[i] != NULL)
   348 		{
   342 		{
   349 			results[i] = pres.getVector(i);
   343 			results[i] = pres.getVector(i);
   350 			lightScatter(rays[i], nearest_shapes[i], 0,
   344 			lightScatter(rays[i], nearest_shapes[i], 0,
   351 				P.getVector(i), normal.getVector(i), (_mm_movemask_ps(from_inside)>>i)&1,
   345 				P.getVector(i), normal.getVector(i), (mmovemask(from_inside)>>i)&1,
   352 				results[i]);
   346 				results[i]);
   353 		}
   347 		}
   354 		else
   348 		else
   355 			results[i] = bg_colour;
   349 			results[i] = bg_colour;
   356 }
   350 }
   362 	Raytracer *rt = (Raytracer*)d;
   356 	Raytracer *rt = (Raytracer*)d;
   363 	Sample my_queue[my_queue_size];
   357 	Sample my_queue[my_queue_size];
   364 	Colour my_colours[my_queue_size];
   358 	Colour my_colours[my_queue_size];
   365 	int my_count;
   359 	int my_count;
   366 	Ray ray;
   360 	Ray ray;
   367 #ifndef NO_SSE
   361 #ifndef NO_SIMD
   368 	RayPacket rays;
   362 	RayPacket rays;
   369 	const bool can_use_packets = (rt->use_packets && rt->sampler->packetableSamples());
   363 	const bool can_use_packets = (rt->use_packets && rt->sampler->packetableSamples());
   370 #endif
   364 #endif
   371 	for (;;)
   365 	for (;;)
   372 	{
   366 	{
   405 		if (rt->sample_queue_count <= my_queue_size*2)
   399 		if (rt->sample_queue_count <= my_queue_size*2)
   406 			pthread_cond_signal(&rt->worker_ready_cond);
   400 			pthread_cond_signal(&rt->worker_ready_cond);
   407 		pthread_mutex_unlock(&rt->sample_queue_mutex);
   401 		pthread_mutex_unlock(&rt->sample_queue_mutex);
   408 
   402 
   409 		// do the work
   403 		// do the work
   410 #ifndef NO_SSE
   404 #ifndef NO_SIMD
   411 		if (can_use_packets)
   405 		if (can_use_packets)
   412 		{
   406 		{
   413 			// packet ray tracing
   407 			// packet ray tracing
   414 			assert((my_count % 4) == 0);
   408 			assert((my_count % 4) == 0);
   415 			for (int i = 0; i < my_count; i+=4)
   409 			for (int i = 0; i < my_count; i+=4)
   433 		pthread_mutex_lock(&rt->sampler_mutex);
   427 		pthread_mutex_lock(&rt->sampler_mutex);
   434 		for (int i = 0; i < my_count; i++)
   428 		for (int i = 0; i < my_count; i++)
   435 			rt->sampler->saveSample(my_queue[i], my_colours[i]);
   429 			rt->sampler->saveSample(my_queue[i], my_colours[i]);
   436 		pthread_mutex_unlock(&rt->sampler_mutex);
   430 		pthread_mutex_unlock(&rt->sampler_mutex);
   437 	}
   431 	}
       
   432 	return NULL;
   438 }
   433 }
   439 
   434 
   440 void Raytracer::render()
   435 void Raytracer::render()
   441 {
   436 {
   442 	if (!sampler || !camera || !top)
   437 	if (!sampler || !camera || !top)
   453 
   448 
   454 	sampler->init();
   449 	sampler->init();
   455 
   450 
   456 	// create workers
   451 	// create workers
   457 	dbgmsg(1, "* using %d threads\n", num_threads);
   452 	dbgmsg(1, "* using %d threads\n", num_threads);
   458 	pthread_t threads[num_threads];
   453 	pthread_t *threads = new pthread_t[num_threads];
   459 
   454 
   460 	dbgmsg(1, "* raytracing...\n");
   455 	dbgmsg(1, "* raytracing...\n");
   461 
   456 
   462 	while ( (sampnum = sampler->initSampleSet()) > 0 )
   457 	while ( (sampnum = sampler->initSampleSet()) > 0 )
   463 	{
   458 	{
   534 			pthread_join(threads[t], NULL);
   529 			pthread_join(threads[t], NULL);
   535 
   530 
   536 		phase ++;
   531 		phase ++;
   537 	}
   532 	}
   538 
   533 
       
   534 	delete[] threads;
   539 	delete[] sample_queue;
   535 	delete[] sample_queue;
   540 }
   536 }
   541 
   537 
   542 void Raytracer::ambientOcclusion(int samples, Float distance, Float angle)
   538 void Raytracer::ambientOcclusion(int samples, Float distance, Float angle)
   543 {
   539 {