118 } |
118 } |
119 |
119 |
120 return acc; |
120 return acc; |
121 } |
121 } |
122 |
122 |
123 #ifndef NO_SSE |
123 #ifndef NO_SIMD |
124 VectorPacket Raytracer::PhongShader_packet(const Shape **shapes, |
124 VectorPacket Raytracer::PhongShader_packet(const Shape* const* shapes, |
125 const VectorPacket &P, const VectorPacket &N, const VectorPacket &V) |
125 const VectorPacket &P, const VectorPacket &N, const VectorPacket &V) |
126 { |
126 { |
127 VectorPacket acc, colour; |
127 VectorPacket acc, colour; |
128 union { __m128 ambient; float ambient_f[4]; }; |
128 union { mfloat4 ambient; float ambient_f[4]; }; |
129 union { __m128 diffuse; float diffuse_f[4]; }; |
129 union { mfloat4 diffuse; float diffuse_f[4]; }; |
130 union { __m128 specular; float specular_f[4]; }; |
130 union { mfloat4 specular; float specular_f[4]; }; |
131 union { __m128 shininess; float shininess_f[4]; }; |
131 union { mfloat4 shininess; float shininess_f[4]; }; |
132 |
132 |
133 for (int i = 0; i < 4; i++) |
133 for (int i = 0; i < 4; i++) |
134 if (shapes[i] == NULL) |
134 if (shapes[i] == NULL) |
135 { |
135 { |
136 ambient_f[i] = 0; |
136 ambient_f[i] = 0; |
152 } |
152 } |
153 |
153 |
154 // ambient |
154 // ambient |
155 acc = colour * ambient; |
155 acc = colour * ambient; |
156 |
156 |
157 Shape **shadow_shapes; |
157 Shape *shadow_shapes[4]; |
158 vector<Light*>::iterator light; |
158 vector<Light*>::iterator light; |
159 for (light = lights.begin(); light != lights.end(); light++) |
159 for (light = lights.begin(); light != lights.end(); light++) |
160 { |
160 { |
161 // direction vector to light |
161 // direction vector to light |
162 VectorPacket L = VectorPacket((*light)->pos) - P; |
162 VectorPacket L = VectorPacket((*light)->pos) - P; |
163 L.normalize(); |
163 L.normalize(); |
164 const __m128 L_dot_N = dot(L, N); |
164 const mfloat4 L_dot_N = dot(L, N); |
165 __m128 valid = _mm_cmpgt_ps(L_dot_N, mZero); |
165 mfloat4 valid = mcmpgt(L_dot_N, mZero); |
166 |
166 |
167 // test if this light is occluded (sharp shadows) |
167 // test if this light is occluded (sharp shadows) |
168 if ((*light)->cast_shadows) |
168 if ((*light)->cast_shadows) |
169 { |
169 { |
170 const RayPacket shadow_rays = RayPacket(P, L); |
170 const RayPacket shadow_rays = RayPacket(P, L); |
171 union { __m128 dists; float dists_f[4]; }; |
171 union { mfloat4 dists; float dists_f[4]; }; |
172 dists = mInf; |
172 dists = mInf; |
173 top->packet_intersection(shapes, shadow_rays, |
173 top->packet_intersection(shapes, shadow_rays, |
174 dists_f, shadow_shapes); |
174 dists_f, shadow_shapes); |
175 valid = _mm_and_ps(valid, _mm_cmpeq_ps(dists, mInf)); |
175 valid = mand(valid, mcmpeq(dists, mInf)); |
176 } |
176 } |
177 |
177 |
178 const VectorPacket R = L - N * _mm_mul_ps(mTwo, L_dot_N); |
178 const VectorPacket R = L - N * mmul(mTwo, L_dot_N); |
179 const __m128 R_dot_V = dot(R, V); |
179 const mfloat4 R_dot_V = dot(R, V); |
180 |
180 |
181 // diffuse |
181 // diffuse |
182 acc.selectiveAdd(valid, |
182 acc.selectiveAdd(valid, |
183 colour * VectorPacket((*light)->colour) * _mm_mul_ps(diffuse, L_dot_N)); |
183 colour * VectorPacket((*light)->colour) * mmul(diffuse, L_dot_N)); |
184 |
184 |
185 // specular |
185 // specular |
186 valid = _mm_and_ps(valid, _mm_cmpgt_ps(R_dot_V, mZero)); |
186 valid = mand(valid, mcmpgt(R_dot_V, mZero)); |
187 __m128 spec = _mm_mul_ps(_mm_mul_ps(specular, _mm_set_ps1((*light)->colour.r)), |
187 mfloat4 spec = mmul(mmul(specular, mset1((*light)->colour.r)), |
188 _mm_fastpow(R_dot_V, shininess)); |
188 mfastpow(R_dot_V, shininess)); |
189 acc.selectiveAdd(valid, spec); |
189 acc.selectiveAdd(valid, spec); |
190 } |
190 } |
191 return acc; |
191 return acc; |
192 } |
192 } |
193 #endif |
193 #endif |
295 lightScatter(ray, nearest_shape, depth, P, normal, from_inside, col); |
295 lightScatter(ray, nearest_shape, depth, P, normal, from_inside, col); |
296 return col; |
296 return col; |
297 } |
297 } |
298 } |
298 } |
299 |
299 |
300 #ifndef NO_SSE |
300 #ifndef NO_SIMD |
301 void Raytracer::raytracePacket(RayPacket &rays, Colour *results) |
301 void Raytracer::raytracePacket(RayPacket &rays, Colour *results) |
302 { |
302 { |
303 union { |
303 union { |
304 float nearest_distances[4]; |
304 float nearest_distances[4]; |
305 __m128 m_nearest_distances; |
305 mfloat4 m_nearest_distances; |
306 }; |
306 }; |
307 __m128 mask; |
307 mfloat4 mask; |
308 Shape *nearest_shapes[4]; |
308 Shape *nearest_shapes[4]; |
309 static const Shape *origin_shapes[4] = {NULL, NULL, NULL, NULL}; |
309 static const Shape *origin_shapes[4] = {NULL, NULL, NULL, NULL}; |
310 m_nearest_distances = mInf; |
310 m_nearest_distances = mInf; |
311 mask = mAllSet; |
|
312 |
311 |
313 top->packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes); |
312 top->packet_intersection(origin_shapes, rays, nearest_distances, nearest_shapes); |
314 |
313 |
315 mask = _mm_cmpneq_ps(m_nearest_distances, mInf); |
314 mask = mcmpneq(m_nearest_distances, mInf); |
316 if (!_mm_movemask_ps(mask)) |
315 if (!mmovemask(mask)) |
317 { |
316 { |
318 for (int i = 0; i < 4; i++) |
317 for (int i = 0; i < 4; i++) |
319 results[i] = bg_colour; |
318 results[i] = bg_colour; |
320 return; |
319 return; |
321 } |
320 } |
322 |
321 |
323 const VectorPacket P = rays.o + rays.dir * m_nearest_distances; // point of intersection |
322 const VectorPacket P = rays.o + rays.dir * m_nearest_distances; // point of intersection |
324 |
|
325 VectorPacket normal; |
323 VectorPacket normal; |
326 for (int i = 0; i < 4; i++) |
324 for (int i = 0; i < 4; i++) |
327 if (nearest_shapes[i] != NULL) |
325 if (nearest_shapes[i] != NULL) |
328 normal.setVector(i, nearest_shapes[i]->normal(P.getVector(i))); |
326 normal.setVector(i, nearest_shapes[i]->normal(P.getVector(i))); |
329 |
327 |
330 // make shapes double sided |
328 // make shapes double sided |
331 __m128 from_inside = _mm_cmpgt_ps(dot(normal, rays.dir), mZero); |
329 mfloat4 from_inside = mcmpgt(dot(normal, rays.dir), mZero); |
332 normal.mx = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.mx)), |
330 normal.mx = mselect(from_inside, msub(mZero, normal.mx), normal.mx); |
333 _mm_andnot_ps(from_inside, normal.mx)); |
331 normal.my = mselect(from_inside, msub(mZero, normal.my), normal.my); |
334 normal.my = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.my)), |
332 normal.mz = mselect(from_inside, msub(mZero, normal.mz), normal.mz); |
335 _mm_andnot_ps(from_inside, normal.my)); |
|
336 normal.mz = _mm_or_ps(_mm_and_ps(from_inside, _mm_sub_ps(mZero, normal.mz)), |
|
337 _mm_andnot_ps(from_inside, normal.mz)); |
|
338 |
333 |
339 // shading function |
334 // shading function |
340 VectorPacket pres = |
335 VectorPacket pres = PhongShader_packet(nearest_shapes, P, normal, rays.dir); |
341 PhongShader_packet(const_cast<const Shape**>(nearest_shapes), P, normal, rays.dir); |
336 //pres.mx = mselect(mask, pres.mx, mset1(bg_colour.r)); |
342 //pres.mx = _mm_or_ps(_mm_and_ps(mask, pres.mx), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.r))); |
337 //pres.my = mselect(mask, pres.my, mset1(bg_colour.g)); |
343 //pres.my = _mm_or_ps(_mm_and_ps(mask, pres.my), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.g))); |
338 //pres.mz = mselect(mask, pres.mz, mset1(bg_colour.b)); |
344 //pres.mz = _mm_or_ps(_mm_and_ps(mask, pres.mz), _mm_andnot_ps(mask, _mm_set_ps1(bg_colour.b))); |
|
345 |
339 |
346 for (int i = 0; i < 4; i++) |
340 for (int i = 0; i < 4; i++) |
347 if (nearest_shapes[i] != NULL) |
341 if (nearest_shapes[i] != NULL) |
348 { |
342 { |
349 results[i] = pres.getVector(i); |
343 results[i] = pres.getVector(i); |
350 lightScatter(rays[i], nearest_shapes[i], 0, |
344 lightScatter(rays[i], nearest_shapes[i], 0, |
351 P.getVector(i), normal.getVector(i), (_mm_movemask_ps(from_inside)>>i)&1, |
345 P.getVector(i), normal.getVector(i), (mmovemask(from_inside)>>i)&1, |
352 results[i]); |
346 results[i]); |
353 } |
347 } |
354 else |
348 else |
355 results[i] = bg_colour; |
349 results[i] = bg_colour; |
356 } |
350 } |