35 using namespace std; |
35 using namespace std; |
36 |
36 |
37 /** |
37 /** |
38 * three cell vector |
38 * three cell vector |
39 */ |
39 */ |
40 class Vector3 |
40 class Vector |
41 { |
41 { |
42 public: |
42 public: |
43 // data |
43 // data |
44 union { |
44 union { |
45 struct { |
45 #ifndef NO_SSE |
46 Float x, y, z; |
46 __m128 mps; |
47 }; |
47 #endif |
48 struct { |
48 Float cell[4]; |
49 Float r, g, b; |
49 struct { Float x, y, z, w; }; |
50 }; |
50 struct { Float r, g, b, a; }; |
51 Float cell[3]; |
|
52 }; |
51 }; |
53 |
52 |
54 // constructors |
53 // constructors |
55 Vector3(): x(0.0f), y(0.0f), z(0.0f) {}; |
54 #ifndef NO_SSE |
56 Vector3(Float ax, Float ay, Float az): x(ax), y(ay), z(az) {}; |
55 Vector(__m128 m): mps(m) {}; |
|
56 #endif |
|
57 Vector(): x(0.0f), y(0.0f), z(0.0f), w(1.0) {}; |
|
58 Vector(Float ax, Float ay, Float az): x(ax), y(ay), z(az), w(1.0) {}; |
57 |
59 |
58 // index operator |
60 // index operator |
59 const Float &operator[](int index) const { return cell[index]; }; |
61 const Float &operator[](int index) const { return cell[index]; }; |
60 |
62 |
61 bool operator==(Vector3 &v) const { return x==v.x && y==v.y && z==v.z; }; |
63 bool operator==(Vector &v) const { return x==v.x && y==v.y && z==v.z; }; |
62 |
64 |
63 // normalize |
65 // normalize |
64 Vector3 normalize() |
66 Vector normalize() |
65 { |
67 { |
66 Float f = 1.0f / mag(); |
68 const Float f = 1.0f / mag(); |
|
69 *this *= f; |
|
70 return *this; |
|
71 }; |
|
72 |
|
73 // get normalized copy |
|
74 friend Vector normalize(const Vector &v) |
|
75 { |
|
76 const Float f = 1.0f / v.mag(); |
|
77 return v * f; |
|
78 }; |
|
79 |
|
80 // square magnitude, magnitude |
|
81 Float mag2() const { return dot(*this, *this); }; |
|
82 Float mag() const { return sqrtf(mag2()); }; |
|
83 |
|
84 // negative |
|
85 Vector operator-() const { return Vector(-x, -y, -z); }; |
|
86 |
|
87 // accumulate |
|
88 Vector operator+=(const Vector &v) |
|
89 { |
|
90 #ifdef NO_SSE |
|
91 x += v.x; |
|
92 y += v.y; |
|
93 z += v.z; |
|
94 #else |
|
95 mps = _mm_add_ps(mps, v.mps); |
|
96 #endif |
|
97 return *this; |
|
98 }; |
|
99 |
|
100 // multiply |
|
101 Vector operator*=(const Float &f) |
|
102 { |
67 x *= f; |
103 x *= f; |
68 y *= f; |
104 y *= f; |
69 z *= f; |
105 z *= f; |
70 return *this; |
106 return *this; |
71 }; |
107 }; |
72 |
108 |
73 // get normalized copy |
|
74 friend Vector3 normalize(Vector3 &v) |
|
75 { |
|
76 const Float f = 1.0f / v.mag(); |
|
77 return v * f; |
|
78 }; |
|
79 |
|
80 // square magnitude, magnitude |
|
81 Float mag2() const { return x * x + y * y + z * z; }; |
|
82 Float mag() const { return sqrtf(mag2()); }; |
|
83 |
|
84 // negative |
|
85 Vector3 operator-() const { return Vector3(-x, -y, -z); }; |
|
86 |
|
87 // accumulate |
|
88 Vector3 operator+=(const Vector3 &v) |
|
89 { |
|
90 x += v.x; |
|
91 y += v.y; |
|
92 z += v.z; |
|
93 return *this; |
|
94 }; |
|
95 |
109 |
96 // cut |
110 // cut |
97 Vector3 operator/=(const Float &f) |
111 Vector operator/=(const Float &f) |
98 { |
112 { |
99 x /= f; |
113 Float finv = 1./f; |
100 y /= f; |
114 x *= finv; |
101 z /= f; |
115 y *= finv; |
|
116 z *= finv; |
102 return *this; |
117 return *this; |
103 }; |
118 }; |
104 |
119 |
105 // sum |
120 // sum |
106 friend Vector3 operator+(const Vector3 &a, const Vector3 &b) |
121 friend Vector operator+(const Vector &a, const Vector &b) |
107 { |
122 { |
108 return Vector3(a.x + b.x, a.y + b.y, a.z + b.z); |
123 #ifdef NO_SSE |
|
124 return Vector(a.x + b.x, a.y + b.y, a.z + b.z); |
|
125 #else |
|
126 return Vector(_mm_add_ps(a.mps, b.mps)); |
|
127 #endif |
109 }; |
128 }; |
110 |
129 |
111 // difference |
130 // difference |
112 friend Vector3 operator-(const Vector3 &a, const Vector3 &b) |
131 friend Vector operator-(const Vector &a, const Vector &b) |
113 { |
132 { |
114 return Vector3(a.x - b.x, a.y - b.y, a.z - b.z); |
133 #ifdef NO_SSE |
|
134 return Vector(a.x - b.x, a.y - b.y, a.z - b.z); |
|
135 #else |
|
136 return Vector(_mm_sub_ps(a.mps, b.mps)); |
|
137 #endif |
115 }; |
138 }; |
116 |
139 |
117 // dot product |
140 // dot product |
118 friend Float dot(const Vector3 &a, const Vector3 &b) |
141 friend Float dot(const Vector &a, const Vector &b) |
119 { |
142 { |
120 return a.x * b.x + a.y * b.y + a.z * b.z; |
143 return a.x * b.x + a.y * b.y + a.z * b.z; |
121 }; |
144 }; |
122 |
145 |
123 // cross product |
146 // cross product |
124 friend Vector3 cross(const Vector3 &a, const Vector3 &b) |
147 friend Vector cross(const Vector &a, const Vector &b) |
125 { |
148 { |
126 return Vector3(a.y * b.z - a.z * b.y, |
149 return Vector(a.y * b.z - a.z * b.y, |
127 a.z * b.x - a.x * b.z, |
150 a.z * b.x - a.x * b.z, |
128 a.x * b.y - a.y * b.x); |
151 a.x * b.y - a.y * b.x); |
129 }; |
152 }; |
130 |
153 |
131 // product of vector and scalar |
154 // product of vector and scalar |
132 friend Vector3 operator*(const Vector3 &v, const Float &f) |
155 friend Vector operator*(const Vector &v, const Float &f) |
133 { |
156 { |
134 return Vector3(f * v.x, f * v.y, f * v.z); |
157 return Vector(f * v.x, f * v.y, f * v.z); |
135 }; |
158 }; |
136 |
159 |
137 friend Vector3 operator*(const Float &f, const Vector3 &v) |
160 friend Vector operator*(const Float &f, const Vector &v) |
138 { |
161 { |
139 return v * f; |
162 return v * f; |
140 }; |
163 }; |
141 |
164 |
142 // scalar division |
165 // scalar division |
143 friend Vector3 operator/(const Vector3 &v, const Float &f) |
166 friend Vector operator/(const Vector &v, const Float &f) |
144 { |
167 { |
145 return Vector3(v.x / f, v.y / f, v.z / f); |
168 const Float finv = 1./f; |
146 }; |
169 return Vector(v.x * finv, v.y * finv, v.z * finv); |
147 |
170 }; |
148 friend Vector3 operator/(const Float &f, const Vector3 &v) |
171 |
149 { |
172 friend Vector operator/(const Float &f, const Vector &v) |
150 return Vector3(f / v.x, f / v.y, f / v.z); |
173 { |
|
174 #ifdef NO_SSE |
|
175 return Vector(f / v.x, f / v.y, f / v.z); |
|
176 #else |
|
177 return Vector(_mm_div_ps(_mm_set_ps1(f), v.mps)); |
|
178 #endif |
151 }; |
179 }; |
152 |
180 |
153 // vector plus scalar |
181 // vector plus scalar |
154 friend Vector3 operator+(const Vector3 &v, const Float &f) |
182 friend Vector operator+(const Vector &v, const Float &f) |
155 { |
183 { |
156 return Vector3(v.x + f, v.y + f, v.z + f); |
184 return Vector(v.x + f, v.y + f, v.z + f); |
157 }; |
185 }; |
158 |
186 |
159 // vector minus scalar |
187 // vector minus scalar |
160 friend Vector3 operator-(const Vector3 &v, const Float &f) |
188 friend Vector operator-(const Vector &v, const Float &f) |
161 { |
189 { |
162 return Vector3(v.x - f, v.y - f, v.z - f); |
190 return Vector(v.x - f, v.y - f, v.z - f); |
163 }; |
191 }; |
164 |
192 |
165 // cell by cell product (only usable for colours) |
193 // cell by cell product (only usable for colours) |
166 friend Vector3 operator*(const Vector3 &a, const Vector3 &b) |
194 friend Vector operator*(const Vector &a, const Vector &b) |
167 { |
195 { |
168 return Vector3(a.x * b.x, a.y * b.y, a.z * b.z); |
196 #ifdef NO_SSE |
|
197 return Vector(a.x * b.x, a.y * b.y, a.z * b.z); |
|
198 #else |
|
199 return Vector(_mm_mul_ps(a.mps, b.mps)); |
|
200 #endif |
169 }; |
201 }; |
170 |
202 |
171 // write |
203 // write |
172 friend ostream & operator<<(ostream &st, const Vector3 &v) |
204 friend ostream & operator<<(ostream &st, const Vector &v) |
173 { |
205 { |
174 return st << "(" << v.x << "," << v.y << "," << v.z << ")"; |
206 return st << "(" << v.x << "," << v.y << "," << v.z << ")"; |
175 }; |
207 }; |
176 |
208 |
177 // read |
209 // read |
178 friend istream & operator>>(istream &st, Vector3 &v) |
210 friend istream & operator>>(istream &st, Vector &v) |
179 { |
211 { |
180 char s[10]; |
212 char s[10]; |
181 st.getline(s, 10, '('); |
213 st.getline(s, 10, '('); |
182 st >> v.x; |
214 st >> v.x; |
183 st.getline(s, 10, ','); |
215 st.getline(s, 10, ','); |
232 mx = _mm_mul_ps(mx, m); |
270 mx = _mm_mul_ps(mx, m); |
233 my = _mm_mul_ps(my, m); |
271 my = _mm_mul_ps(my, m); |
234 mz = _mm_mul_ps(mz, m); |
272 mz = _mm_mul_ps(mz, m); |
235 }; |
273 }; |
236 |
274 |
|
275 // accumulate |
|
276 VectorPacket operator+=(const VectorPacket &v) |
|
277 { |
|
278 mx = _mm_add_ps(mx, v.mx); |
|
279 my = _mm_add_ps(my, v.my); |
|
280 mz = _mm_add_ps(mz, v.mz); |
|
281 return *this; |
|
282 }; |
|
283 |
|
284 // add to non-masked components |
|
285 VectorPacket selectiveAdd(__m128 mask, const VectorPacket &v) |
|
286 { |
|
287 mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, v.mx)), |
|
288 _mm_andnot_ps(mask, mx)); |
|
289 my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, v.my)), |
|
290 _mm_andnot_ps(mask, my)); |
|
291 mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, v.mz)), |
|
292 _mm_andnot_ps(mask, mz)); |
|
293 return *this; |
|
294 }; |
|
295 |
|
296 // add scalar to non-masked components |
|
297 VectorPacket selectiveAdd(__m128 mask, const __m128 m) |
|
298 { |
|
299 mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, m)), |
|
300 _mm_andnot_ps(mask, mx)); |
|
301 my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, m)), |
|
302 _mm_andnot_ps(mask, my)); |
|
303 mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, m)), |
|
304 _mm_andnot_ps(mask, mz)); |
|
305 return *this; |
|
306 }; |
|
307 |
237 // dot product |
308 // dot product |
238 friend __m128 dot(const VectorPacket &a, const VectorPacket &b) |
309 friend __m128 dot(const VectorPacket &a, const VectorPacket &b) |
239 { |
310 { |
240 return _mm_add_ps(_mm_add_ps( |
311 return _mm_add_ps(_mm_add_ps( |
241 _mm_mul_ps(a.mx, b.mx), |
312 _mm_mul_ps(a.mx, b.mx), |