76 Vector3 dir = p - (u*samp.x + v*samp.y)*F; |
93 Vector3 dir = p - (u*samp.x + v*samp.y)*F; |
77 dir.normalize(); |
94 dir.normalize(); |
78 return Ray(eye, dir); |
95 return Ray(eye, dir); |
79 }; |
96 }; |
80 |
97 |
81 void makeRayPacket(Sample *samples, Ray *rays) |
98 void makeRayPacket(Sample *samples, RayPacket &rays) |
82 { |
99 { |
83 __m128 m1x,m1y,m1z; |
100 __m128 m1x,m1y,m1z; |
84 __m128 m2x,m2y,m2z; |
101 __m128 m2x,m2y,m2z; |
85 __m128 m; |
102 __m128 m; |
86 |
103 |
87 // m1(xyz) = u * samples[i].x |
104 // m1(xyz) = u * samples[i].x |
88 m1x = _mm_set1_ps(u.x); |
105 m1x = _mm_set_ps1(u.x); |
89 m1y = _mm_set1_ps(u.y); |
106 m1y = _mm_set_ps1(u.y); |
90 m1z = _mm_set1_ps(u.z); |
107 m1z = _mm_set_ps1(u.z); |
91 m = _mm_set_ps(samples[0].x, samples[1].x, samples[2].x, samples[3].x); |
108 m = _mm_set_ps(samples[3].x, samples[2].x, samples[1].x, samples[0].x); |
92 m1x = _mm_mul_ps(m1x, m); |
109 m1x = _mm_mul_ps(m1x, m); |
93 m1y = _mm_mul_ps(m1y, m); |
110 m1y = _mm_mul_ps(m1y, m); |
94 m1z = _mm_mul_ps(m1z, m); |
111 m1z = _mm_mul_ps(m1z, m); |
95 |
112 |
96 // m2(xyz) = v * samples[i].y |
113 // m2(xyz) = v * samples[i].y |
97 m2x = _mm_set1_ps(v.x); |
114 m2x = _mm_set_ps1(v.x); |
98 m2y = _mm_set1_ps(v.y); |
115 m2y = _mm_set_ps1(v.y); |
99 m2z = _mm_set1_ps(v.z); |
116 m2z = _mm_set_ps1(v.z); |
100 m = _mm_set_ps(samples[0].y, samples[1].y, samples[2].y, samples[3].y); |
117 m = _mm_set_ps(samples[3].y, samples[2].y, samples[1].y, samples[0].y); |
101 m2x = _mm_mul_ps(m2x, m); |
118 m2x = _mm_mul_ps(m2x, m); |
102 m2y = _mm_mul_ps(m2y, m); |
119 m2y = _mm_mul_ps(m2y, m); |
103 m2z = _mm_mul_ps(m2z, m); |
120 m2z = _mm_mul_ps(m2z, m); |
104 |
121 |
105 // m1(xyz) = (m1 + m2) = (u*samples[i].x + v*samples[i].y) |
122 // m1(xyz) = (m1 + m2) = (u*samples[i].x + v*samples[i].y) |
106 m1x = _mm_add_ps(m1x, m2x); |
123 m1x = _mm_add_ps(m1x, m2x); |
107 m1y = _mm_add_ps(m1y, m2y); |
124 m1y = _mm_add_ps(m1y, m2y); |
108 m1z = _mm_add_ps(m1z, m2z); |
125 m1z = _mm_add_ps(m1z, m2z); |
109 |
126 |
110 // m1(xyz) = m1*F = (u*samples[i].x + v*samples[i].y)*F |
127 // m1(xyz) = m1*F = (u*samples[i].x + v*samples[i].y)*F |
111 m = _mm_set_ps(F,F,F,F); |
128 m = _mm_set_ps1(F); |
112 m1x = _mm_mul_ps(m1x, m); |
129 m1x = _mm_mul_ps(m1x, m); |
113 m1y = _mm_mul_ps(m1y, m); |
130 m1y = _mm_mul_ps(m1y, m); |
114 m1z = _mm_mul_ps(m1z, m); |
131 m1z = _mm_mul_ps(m1z, m); |
115 |
132 |
116 // m1(xyz) = p - m1 = p - (u*samples[i].x + v*samples[i].y)*F = dir |
133 // m1(xyz) = p - m1 = p - (u*samples[i].x + v*samples[i].y)*F = dir |
117 m2x = _mm_set1_ps(p.x); |
134 m2x = _mm_set_ps1(p.x); |
118 m2y = _mm_set1_ps(p.y); |
135 m2y = _mm_set_ps1(p.y); |
119 m2z = _mm_set1_ps(p.z); |
136 m2z = _mm_set_ps1(p.z); |
120 m2x = _mm_sub_ps(m2x, m1x); |
137 rays.dir.mx = _mm_sub_ps(m2x, m1x); |
121 m2y = _mm_sub_ps(m2y, m1y); |
138 rays.dir.my = _mm_sub_ps(m2y, m1y); |
122 m2z = _mm_sub_ps(m2z, m1z); |
139 rays.dir.mz = _mm_sub_ps(m2z, m1z); |
123 |
140 |
124 // normalize dir |
141 // copy origin |
125 m1x = _mm_mul_ps(m2x, m2x); // x*x |
142 rays.o.mx = _mm_set_ps1(eye.x); |
126 m1y = _mm_mul_ps(m2y, m2y); // y*y |
143 rays.o.my = _mm_set_ps1(eye.y); |
127 m1z = _mm_mul_ps(m2z, m2z); // z*z |
144 rays.o.mz = _mm_set_ps1(eye.z); |
128 m = _mm_add_ps(m1x, m1y); // x*x + y*y |
145 |
129 m = _mm_add_ps(m, m1z); // m = x*x + y*y + z*z |
146 rays.dir.normalize(); |
130 m = _mm_sqrt_ps(m); // m = sqrt(m) |
|
131 m2x = _mm_div_ps(m2x, m); // dir(xyz) /= m |
|
132 m2y = _mm_div_ps(m2y, m); |
|
133 m2z = _mm_div_ps(m2z, m); |
|
134 |
|
135 for (int i = 0; i < 4; i++) |
|
136 { |
|
137 Vector3 dir(((float*)&m2x)[3-i], ((float*)&m2y)[3-i], ((float*)&m2z)[3-i]); |
|
138 rays[i] = Ray(eye, dir); |
|
139 } |
|
140 }; |
147 }; |
141 }; |
148 }; |
142 |
149 |
143 /** |
150 /** |
144 * light object |
151 * light object |
169 BBox(const Vector3 aL, const Vector3 aH): L(aL), H(aH) {}; |
176 BBox(const Vector3 aL, const Vector3 aH): L(aL), H(aH) {}; |
170 Float w() { return H.x-L.x; }; |
177 Float w() { return H.x-L.x; }; |
171 Float h() { return H.y-L.y; }; |
178 Float h() { return H.y-L.y; }; |
172 Float d() { return H.z-L.z; }; |
179 Float d() { return H.z-L.z; }; |
173 bool intersect(const Ray &ray, Float &a, Float &b); |
180 bool intersect(const Ray &ray, Float &a, Float &b); |
|
181 bool intersect_packet(const RayPacket &rays, __m128 &a, __m128 &b) |
|
182 { |
|
183 return intersect(rays[0], ((float*)&a)[0], ((float*)&b)[0]) |
|
184 || intersect(rays[1], ((float*)&a)[1], ((float*)&b)[1]) |
|
185 || intersect(rays[2], ((float*)&a)[2], ((float*)&b)[2]) |
|
186 || intersect(rays[3], ((float*)&a)[3], ((float*)&b)[3]); |
|
187 }; |
174 }; |
188 }; |
175 |
189 |
176 #endif |
190 #endif |