packetize Phong shader
new scons config options:
simd=(yes|no) - allow/suppress explicit SSE
force_flags=(yes|no) - force use of specified flags instead of autodetected
profile=(yes|no) - enable gcc's profiling (-pg option)
check for pthread.h header, don't try to build without it
add fourth Vector3 component for better memory aligning
rename Vector3 to Vector
partialy SSE-ize Vector class (only fully vertical operations)
build static lib and python module in distinctive directories
to avoid collision of library file names on some platforms
/* * vector.h: Vector class with Colour alias * * This file is part of Pyrit Ray Tracer. * * Copyright 2006, 2007 Radek Brich * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */#ifndef VECTOR_H#define VECTOR_H#include <math.h>#include <iostream>#include "common.h"using namespace std;/** * three cell vector */class Vector{public: // data union {#ifndef NO_SSE __m128 mps;#endif Float cell[4]; struct { Float x, y, z, w; }; struct { Float r, g, b, a; }; }; // constructors#ifndef NO_SSE Vector(__m128 m): mps(m) {};#endif Vector(): x(0.0f), y(0.0f), z(0.0f), w(1.0) {}; Vector(Float ax, Float ay, Float az): x(ax), y(ay), z(az), w(1.0) {}; // index operator const Float &operator[](int index) const { return cell[index]; }; bool operator==(Vector &v) const { return x==v.x && y==v.y && z==v.z; }; // normalize Vector normalize() { const Float f = 1.0f / mag(); *this *= f; return *this; }; // get normalized copy friend Vector normalize(const Vector &v) { const Float f = 1.0f / v.mag(); return v * f; }; // square magnitude, magnitude Float mag2() const { return dot(*this, *this); }; Float mag() const { return sqrtf(mag2()); }; // negative Vector operator-() const { return Vector(-x, -y, -z); }; // accumulate Vector operator+=(const Vector &v) {#ifdef NO_SSE x += v.x; y += v.y; z += v.z;#else mps = _mm_add_ps(mps, v.mps);#endif return *this; }; // multiply Vector operator*=(const Float &f) { x *= f; y *= f; z *= f; return *this; }; // cut Vector operator/=(const Float &f) { Float finv = 1./f; x *= finv; y *= finv; z *= finv; return *this; }; // sum friend Vector operator+(const Vector &a, const Vector &b) {#ifdef NO_SSE return Vector(a.x + b.x, a.y + b.y, a.z + b.z);#else return Vector(_mm_add_ps(a.mps, b.mps));#endif }; // difference friend Vector operator-(const Vector &a, const Vector &b) {#ifdef NO_SSE return Vector(a.x - b.x, a.y - b.y, a.z - b.z);#else return Vector(_mm_sub_ps(a.mps, b.mps));#endif }; // dot product friend Float dot(const Vector &a, const Vector &b) { return a.x * b.x + a.y * b.y + a.z * b.z; }; // cross product friend Vector cross(const Vector &a, const Vector &b) { return Vector(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); }; // product of vector and scalar friend Vector operator*(const Vector &v, const Float &f) { return Vector(f * v.x, f * v.y, f * v.z); }; friend Vector operator*(const Float &f, const Vector &v) { return v * f; }; // scalar division friend Vector operator/(const Vector &v, const Float &f) { const Float finv = 1./f; return Vector(v.x * finv, v.y * finv, v.z * finv); }; friend Vector operator/(const Float &f, const Vector &v) {#ifdef NO_SSE return Vector(f / v.x, f / v.y, f / v.z);#else return Vector(_mm_div_ps(_mm_set_ps1(f), v.mps));#endif }; // vector plus scalar friend Vector operator+(const Vector &v, const Float &f) { return Vector(v.x + f, v.y + f, v.z + f); }; // vector minus scalar friend Vector operator-(const Vector &v, const Float &f) { return Vector(v.x - f, v.y - f, v.z - f); }; // cell by cell product (only usable for colours) friend Vector operator*(const Vector &a, const Vector &b) {#ifdef NO_SSE return Vector(a.x * b.x, a.y * b.y, a.z * b.z);#else return Vector(_mm_mul_ps(a.mps, b.mps));#endif }; // write friend ostream & operator<<(ostream &st, const Vector &v) { return st << "(" << v.x << "," << v.y << "," << v.z << ")"; }; // read friend istream & operator>>(istream &st, Vector &v) { char s[10]; st.getline(s, 10, '('); st >> v.x; st.getline(s, 10, ','); st >> v.y; st.getline(s, 10, ','); st >> v.z; st.getline(s, 10, ')'); return st; };};typedef Vector Colour;#ifndef NO_SSEclass VectorPacket{public: union { __m128 ma[3]; struct { __m128 mx; __m128 my; __m128 mz; }; struct { float x[4]; float y[4]; float z[4]; }; }; VectorPacket() {}; VectorPacket(__m128 ax, __m128 ay, __m128 az): mx(ax), my(ay), mz(az) {}; VectorPacket(const Vector &v): mx(_mm_set_ps1(v.x)), my(_mm_set_ps1(v.y)), mz(_mm_set_ps1(v.z)) {}; Vector getVector(int i) const { return Vector(x[i], y[i], z[i]); }; void setVector(int i, const Vector &v) { x[i] = v.x; y[i] = v.y; z[i] = v.z; }; void normalize() { __m128 m,x,y,z; x = _mm_mul_ps(mx, mx); // x*x y = _mm_mul_ps(my, my); // y*y z = _mm_mul_ps(mz, mz); // z*z m = _mm_add_ps(x, y); m = _mm_add_ps(m, z); // x*x + y*y + z*z m = _mm_sqrt_ps(m); m = _mm_div_ps(mOne, m); // m = 1/sqrt(m) mx = _mm_mul_ps(mx, m); my = _mm_mul_ps(my, m); mz = _mm_mul_ps(mz, m); }; // accumulate VectorPacket operator+=(const VectorPacket &v) { mx = _mm_add_ps(mx, v.mx); my = _mm_add_ps(my, v.my); mz = _mm_add_ps(mz, v.mz); return *this; }; // add to non-masked components VectorPacket selectiveAdd(__m128 mask, const VectorPacket &v) { mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, v.mx)), _mm_andnot_ps(mask, mx)); my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, v.my)), _mm_andnot_ps(mask, my)); mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, v.mz)), _mm_andnot_ps(mask, mz)); return *this; }; // add scalar to non-masked components VectorPacket selectiveAdd(__m128 mask, const __m128 m) { mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, m)), _mm_andnot_ps(mask, mx)); my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, m)), _mm_andnot_ps(mask, my)); mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, m)), _mm_andnot_ps(mask, mz)); return *this; }; // dot product friend __m128 dot(const VectorPacket &a, const VectorPacket &b) { return _mm_add_ps(_mm_add_ps( _mm_mul_ps(a.mx, b.mx), _mm_mul_ps(a.my, b.my)), _mm_mul_ps(a.mz, b.mz)); }; friend VectorPacket operator+(const VectorPacket &a, const VectorPacket &b) { return VectorPacket( _mm_add_ps(a.mx, b.mx), _mm_add_ps(a.my, b.my), _mm_add_ps(a.mz, b.mz)); }; friend VectorPacket operator-(const VectorPacket &a, const VectorPacket &b) { return VectorPacket( _mm_sub_ps(a.mx, b.mx), _mm_sub_ps(a.my, b.my), _mm_sub_ps(a.mz, b.mz)); }; friend VectorPacket operator*(const VectorPacket &v, const __m128 &m) { return VectorPacket( _mm_mul_ps(v.mx, m), _mm_mul_ps(v.my, m), _mm_mul_ps(v.mz, m)); }; friend VectorPacket operator/(const __m128 &m, const VectorPacket &v) { return VectorPacket( _mm_div_ps(m, v.mx), _mm_div_ps(m, v.my), _mm_div_ps(m, v.mz)); }; // cell by cell product (only usable for colours) friend VectorPacket operator*(const VectorPacket &a, const VectorPacket &b) { return VectorPacket( _mm_mul_ps(a.mx, b.mx), _mm_mul_ps(a.my, b.my), _mm_mul_ps(a.mz, b.mz)); }; // write to character stream friend ostream & operator<<(ostream &st, const VectorPacket &v) { return st << "[" << v.getVector(0) << "," << v.getVector(1) << "," << v.getVector(2) << "," << v.getVector(3) << ")"; };};#endif#endif