packetize Phong shader
new scons config options:
simd=(yes|no) - allow/suppress explicit SSE
force_flags=(yes|no) - force use of specified flags instead of autodetected
profile=(yes|no) - enable gcc's profiling (-pg option)
check for pthread.h header, don't try to build without it
add fourth Vector3 component for better memory aligning
rename Vector3 to Vector
partialy SSE-ize Vector class (only fully vertical operations)
build static lib and python module in distinctive directories
to avoid collision of library file names on some platforms
/* * shapes.cc: shape classes * * This file is part of Pyrit Ray Tracer. * * Copyright 2006, 2007, 2008 Radek Brich * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */#include "shapes.h"#include "serialize.h"bool Sphere::intersect(const Ray &ray, Float &dist) const{ Vector V = ray.o - center; register Float d = -dot(V, ray.dir); register Float Det = d * d - (dot(V,V) - sqr_radius); register Float t1,t2; if (Det > 0) { Det = sqrtf(Det); t1 = d - Det; t2 = d + Det; if (t1 > 0) { if (t1 < dist) { dist = t1; return true; } } else if (t2 > 0 && t2 < dist) { dist = t2; return true; } } return false;}#ifndef NO_SSE__m128 Sphere::intersect_packet(const RayPacket &rays, __m128 &dists){ VectorPacket V = rays.o - VectorPacket(center); register __m128 d = _mm_sub_ps(mZero, dot(V, rays.dir)); register __m128 Det = _mm_sub_ps(_mm_mul_ps(d, d), _mm_sub_ps(dot(V,V), _mm_set_ps1(sqr_radius))); register __m128 t1, t2, mask; mask = _mm_cmpgt_ps(Det, mZero); if (!_mm_movemask_ps(mask)) return mask; Det = _mm_sqrt_ps(Det); t1 = _mm_sub_ps(d, Det); t2 = _mm_add_ps(d, Det); mask = _mm_and_ps(mask, _mm_cmpgt_ps(t2, mZero)); const __m128 cond1 = _mm_and_ps(_mm_cmpgt_ps(t1, mZero), _mm_cmplt_ps(t1, dists)); const __m128 cond2 = _mm_and_ps(_mm_cmple_ps(t1, mZero), _mm_cmplt_ps(t2, dists)); const __m128 newdists = _mm_or_ps(_mm_and_ps(cond1, t1), _mm_and_ps(cond2, t2)); mask = _mm_and_ps(mask, _mm_or_ps(cond1, cond2)); dists = _mm_or_ps(_mm_and_ps(mask, newdists), _mm_andnot_ps(mask, dists)); return mask;}#endif/* if there should be CSG sometimes, this may be needed... */bool Sphere::intersect_all(const Ray &ray, Float dist, vector<Float> &allts) const{ //allts = new vector<Float>(); Vector V = ((Ray)ray).o - center; Float Vd = - dot(V, ray.dir); Float Det = Vd * Vd - (dot(V,V) - sqr_radius); if (Det > 0) { Det = sqrtf(Det); Float t1 = Vd - Det; Float t2 = Vd + Det; if (t1 < 0) { if (t2 > 0) { // ray from inside of the sphere allts.push_back(0.0); allts.push_back(t2); return true; } else return false; } else { allts.push_back(t1); allts.push_back(t2); return true; } } return false;}bool Sphere::intersect_bbox(const BBox &bbox) const{ register float dmin = 0; for (int i = 0; i < 3; i++) { if (center[i] < bbox.L[i]) dmin += (center[i] - bbox.L[i])*(center[i] - bbox.L[i]); else if (center[i] > bbox.H[i]) dmin += (center[i] - bbox.H[i])*(center[i] - bbox.H[i]); } if (dmin <= sqr_radius) return true; return false;};BBox Sphere::get_bbox() const{ return BBox(center - radius, center + radius);}bool Box::intersect(const Ray &ray, Float &dist) const{ register Float tnear = -Inf; register Float tfar = Inf; register Float t1, t2; for (int i = 0; i < 3; i++) { if (ray.dir[i] == 0) { /* ray is parallel to these planes */ if (ray.o[i] < L[i] || ray.o[i] > H[i]) return false; } else { /* compute the intersection distance of the planes */ t1 = (L[i] - ray.o[i]) / ray.dir[i]; t2 = (H[i] - ray.o[i]) / ray.dir[i]; if (t1 > t2) swap(t1, t2); if (t1 > tnear) tnear = t1; /* want largest Tnear */ if (t2 < tfar) tfar = t2; /* want smallest Tfar */ if (tnear > tfar || tfar < 0) return false; /* box missed; box is behind ray */ } } if (tnear < dist) { dist = tnear; return true; } return false;}#ifndef NO_SSE__m128 Box::intersect_packet(const RayPacket &rays, __m128 &dists){ register __m128 tnear = mZero; register __m128 tfar = mInf; register __m128 t1, t2; register __m128 mask = mAllSet; for (int i = 0; i < 3; i++) { const __m128 mL = _mm_set_ps1(L[i]); const __m128 mH = _mm_set_ps1(H[i]); mask = _mm_and_ps(mask, _mm_or_ps( _mm_or_ps(_mm_cmplt_ps(rays.dir.ma[i], mMEps), _mm_cmpgt_ps(rays.dir.ma[i], mEps)), _mm_and_ps(_mm_cmpge_ps(rays.o.ma[i], mL), _mm_cmple_ps(rays.o.ma[i], mH)) )); if (!_mm_movemask_ps(mask)) return mask; /* compute the intersection distance of the planes */ t1 = _mm_div_ps(_mm_sub_ps(mL, rays.o.ma[i]), rays.dir.ma[i]); t2 = _mm_div_ps(_mm_sub_ps(mH, rays.o.ma[i]), rays.dir.ma[i]); __m128 t = _mm_min_ps(t1, t2); t2 = _mm_max_ps(t1, t2); t1 = t; tnear = _mm_max_ps(tnear, t1); /* want largest Tnear */ tfar = _mm_min_ps(tfar, t2); /* want smallest Tfar */ mask = _mm_and_ps(mask, _mm_and_ps(_mm_cmple_ps(tnear, tfar), _mm_cmpge_ps(tfar, mZero))); if (!_mm_movemask_ps(mask)) return mask; } mask = _mm_and_ps(mask, _mm_cmplt_ps(tnear, dists)); dists = _mm_or_ps(_mm_and_ps(mask, tnear), _mm_andnot_ps(mask, dists)); return mask;}#endifbool Box::intersect_bbox(const BBox &bbox) const{ return ( H.x > bbox.L.x && L.x < bbox.H.x && H.y > bbox.L.y && L.y < bbox.H.y && H.z > bbox.L.z && L.z < bbox.H.z);}const Vector Box::normal(const Vector &P) const{ register Vector l = P - L; register Vector h = H - P; if (l.x < h.x) h.x = -1; else { l.x = h.x; h.x = +1; } if (l.y < h.y) h.y = -1; else { l.y = h.y; h.y = +1; } if (l.z < h.z) h.z = -1; else { l.z = h.z; h.z = +1; } if (l.x > l.y) { h.x = 0; if (l.y > l.z) h.y = 0; else h.z = 0; } else { h.y = 0; if (l.x > l.z) h.x = 0; else h.z = 0; } return h;}#ifdef TRI_PLUCKERinline void Plucker(const Vector &p, const Vector &q, Float* pl){ pl[0] = p.x*q.y - q.x*p.y; pl[1] = p.x*q.z - q.x*p.z; pl[2] = p.x - q.x; pl[3] = p.y*q.z - q.y*p.z; pl[4] = p.z - q.z; pl[5] = q.y - p.y;}inline Float Side(const Float* pla, const Float* plb){ return pla[0]*plb[4] + pla[1]*plb[5] + pla[2]*plb[3] + pla[4]*plb[0] + pla[5]*plb[1] + pla[3]*plb[2];}#endifTriangle::Triangle(Vertex *aA, Vertex *aB, Vertex *aC, Material *amaterial) : A(aA), B(aB), C(aC){ material = amaterial; const Vector c = B->P - A->P; const Vector b = C->P - A->P; N = cross(c, b); N.normalize();#ifdef TRI_PLUCKER Plucker(B->P,C->P,pla); Plucker(C->P,A->P,plb); Plucker(A->P,B->P,plc);#endif#if defined(TRI_BARI) || defined(TRI_BARI_PRE) if (fabsf(N.x) > fabsf(N.y)) { if (fabsf(N.x) > fabsf(N.z)) k = 0; else k = 2; } else { if (fabsf(N.y) > fabsf(N.z)) k = 1; else k = 2; }#endif#ifdef TRI_BARI_PRE int u = (k + 1) % 3; int v = (k + 2) % 3; Float krec = 1.0 / N[k]; nu = N[u] * krec; nv = N[v] * krec; nd = dot(N, A->P) * krec; // first line equation Float reci = 1.0f / (b[u] * c[v] - b[v] * c[u]); bnu = b[u] * reci; bnv = -b[v] * reci; // second line equation cnu = -c[u] * reci; cnv = c[v] * reci;#endif}bool Triangle::intersect(const Ray &ray, Float &dist) const{#ifdef TRI_PLUCKER Float plr[6]; Plucker(ray.o, ray.o+ray.dir, plr); const bool side0 = Side(plr, pla) >= 0.0; const bool side1 = Side(plr, plb) >= 0.0; if (side0 != side1) return false; const bool side2 = Side(plr, plc) >= 0.0; if (side0 != side2) return false; const Float t = - dot( (ray.o - A->P), N) / dot(ray.dir,N); if(t <= Eps || t >= dist) return false; dist = t; return true;#endif#if defined(TRI_BARI) || defined(TRI_BARI_PRE) static const int modulo3[5] = {0,1,2,0,1}; const Vector &O = ray.o; const Vector &D = ray.dir; register const int u = modulo3[k+1]; register const int v = modulo3[k+2];#endif#ifdef TRI_BARI_PRE const Float t = (nd - O[k] - nu * O[u] - nv * O[v]) / (D[k] + nu * D[u] + nv * D[v]); if (t >= dist || t < Eps) return false; const Float hu = O[u] + t * D[u] - A->P[u]; const Float hv = O[v] + t * D[v] - A->P[v]; const Float beta = hv * bnu + hu * bnv; if (beta < 0.) return false; const Float gamma = hu * cnv + hv * cnu; if (gamma < 0. || beta + gamma > 1.) return false; dist = t; return true;#endif#ifdef TRI_BARI // original barycentric coordinates based intesection // not optimized, just for reference const Vector c = B - A; const Vector b = C - A; // distance test const Float t = - dot( (O-A), N) / dot(D,N); if (t < Eps || t > dist) return false; // calc hitpoint const Float Hu = O[u] + t * D[u] - A[u]; const Float Hv = O[v] + t * D[v] - A[v]; const Float beta = (b[u] * Hv - b[v] * Hu) / (b[u] * c[v] - b[v] * c[u]); if (beta < 0) return false; const Float gamma = (c[v] * Hu - c[u] * Hv) / (b[u] * c[v] - b[v] * c[u]); if (gamma < 0) return false; if (beta+gamma > 1) return false; dist = t; return true;#endif}#if not defined(NO_SSE) and defined(TRI_BARI_PRE)__m128 Triangle::intersect_packet(const RayPacket &rays, __m128 &dists){ static const int modulo3[5] = {0,1,2,0,1}; register const int u = modulo3[k+1]; register const int v = modulo3[k+2]; __m128 mask; const __m128 t = _mm_div_ps( _mm_sub_ps(_mm_sub_ps( _mm_sub_ps(_mm_set_ps1(nd), rays.o.ma[k]), _mm_mul_ps(_mm_set_ps1(nu), rays.o.ma[u]) ), _mm_mul_ps(_mm_set_ps1(nv), rays.o.ma[v])), _mm_add_ps(rays.dir.ma[k], _mm_add_ps(_mm_mul_ps(_mm_set_ps1(nu), rays.dir.ma[u]), _mm_mul_ps(_mm_set_ps1(nv), rays.dir.ma[v]))) ); mask = _mm_and_ps(_mm_cmplt_ps(t, dists), _mm_cmpge_ps(t, mEps)); if (!_mm_movemask_ps(mask)) return mask; const __m128 hu = _mm_sub_ps(_mm_add_ps(rays.o.ma[u], _mm_mul_ps(t, rays.dir.ma[u])), _mm_set_ps1(A->P[u])); const __m128 hv = _mm_sub_ps(_mm_add_ps(rays.o.ma[v], _mm_mul_ps(t, rays.dir.ma[v])), _mm_set_ps1(A->P[v])); const __m128 beta = _mm_add_ps(_mm_mul_ps(hv, _mm_set_ps1(bnu)), _mm_mul_ps(hu, _mm_set_ps1(bnv))); mask = _mm_and_ps(mask, _mm_cmpge_ps(beta, mZero)); if (!_mm_movemask_ps(mask)) return mask; const __m128 gamma = _mm_add_ps(_mm_mul_ps(hu, _mm_set_ps1(cnv)), _mm_mul_ps(hv, _mm_set_ps1(cnu))); mask = _mm_and_ps(mask, _mm_and_ps(_mm_cmpge_ps(gamma, mZero), _mm_cmple_ps(_mm_add_ps(beta, gamma), mOne))); if (!_mm_movemask_ps(mask)) return mask; dists = _mm_or_ps(_mm_andnot_ps(mask, dists), _mm_and_ps(mask, t)); return mask;}#endifbool Triangle::intersect_bbox(const BBox &bbox) const{ const Vector boxcenter = (bbox.L+bbox.H)*0.5; const Vector boxhalfsize = (bbox.H-bbox.L)*0.5; const Vector v0 = A->P - boxcenter; const Vector v1 = B->P - boxcenter; const Vector v2 = C->P - boxcenter; const Vector e0 = v1-v0; const Vector e1 = v2-v1; const Vector e2 = v0-v2; Float fex = fabsf(e0.x); Float fey = fabsf(e0.y); Float fez = fabsf(e0.z); Float p0,p1,p2,min,max,rad; p0 = e0.z*v0.y - e0.y*v0.z; p2 = e0.z*v2.y - e0.y*v2.z; if(p0<p2) {min=p0; max=p2;} else {min=p2; max=p0;} rad = fez * boxhalfsize.y + fey * boxhalfsize.z; if(min>rad || max<-rad) return false; p0 = -e0.z*v0.x + e0.x*v0.z; p2 = -e0.z*v2.x + e0.x*v2.z; if(p0<p2) {min=p0; max=p2;} else {min=p2; max=p0;} rad = fez * boxhalfsize.x + fex * boxhalfsize.z; if(min>rad || max<-rad) return false; p1 = e0.y*v1.x - e0.x*v1.y; p2 = e0.y*v2.x - e0.x*v2.y; if(p2<p1) {min=p2; max=p1;} else {min=p1; max=p2;} rad = fey * boxhalfsize.x + fex * boxhalfsize.y; if(min>rad || max<-rad) return false; fex = fabsf(e1.x); fey = fabsf(e1.y); fez = fabsf(e1.z); p0 = e1.z*v0.y - e1.y*v0.z; p2 = e1.z*v2.y - e1.y*v2.z; if(p0<p2) {min=p0; max=p2;} else {min=p2; max=p0;} rad = fez * boxhalfsize.y + fey * boxhalfsize.z; if(min>rad || max<-rad) return false; p0 = -e1.z*v0.x + e1.x*v0.z; p2 = -e1.z*v2.x + e1.x*v2.z; if(p0<p2) {min=p0; max=p2;} else {min=p2; max=p0;} rad = fez * boxhalfsize.x + fex * boxhalfsize.z; if(min>rad || max<-rad) return false; p0 = e1.y*v0.x - e1.x*v0.y; p1 = e1.y*v1.x - e1.x*v1.y; if(p0<p1) {min=p0; max=p1;} else {min=p1; max=p0;} rad = fey * boxhalfsize.x + fex * boxhalfsize.y; if(min>rad || max<-rad) return false; fex = fabsf(e2.x); fey = fabsf(e2.y); fez = fabsf(e2.z); p0 = e2.z*v0.y - e2.y*v0.z; p1 = e2.z*v1.y - e2.y*v1.z; if(p0<p1) {min=p0; max=p1;} else {min=p1; max=p0;} rad = fez * boxhalfsize.y + fey * boxhalfsize.z; if(min>rad || max<-rad) return false; p0 = -e2.z*v0.x + e2.x*v0.z; p1 = -e2.z*v1.x + e2.x*v1.z; if(p0<p1) {min=p0; max=p1;} else {min=p1; max=p0;} rad = fez * boxhalfsize.x + fex * boxhalfsize.z; if(min>rad || max<-rad) return false; p1 = e2.y*v1.x - e2.x*v1.y; p2 = e2.y*v2.x - e2.x*v2.y; if(p2<p1) {min=p2; max=p1;} else {min=p1; max=p2;} rad = fey * boxhalfsize.x + fex * boxhalfsize.y; if(min>rad || max<-rad) return false; /* test overlap in the {x,y,z}-directions */ /* test in X-direction */ min = v0.x; if (v1.x < min) min = v1.x; if (v2.x < min) min = v2.x; max = v0.x; if (v1.x > max) max = v1.x; if (v2.x > max) max = v2.x; if(min>boxhalfsize.x || max<-boxhalfsize.x) return false; /* test in Y-direction */ min = v0.y; if (v1.y < min) min = v1.y; if (v2.y < min) min = v2.y; max = v0.y; if (v1.y > max) max = v1.y; if (v2.y > max) max = v2.y; if(min>boxhalfsize.y || max<-boxhalfsize.y) return false; /* test in Z-direction */ min = v0.z; if (v1.z < min) min = v1.z; if (v2.z < min) min = v2.z; max = v0.z; if (v1.z > max) max = v1.z; if (v2.z > max) max = v2.z; if(min>boxhalfsize.z || max<-boxhalfsize.z) return false; /* test if the box intersects the plane of the triangle */ Vector vmin,vmax; Float v; for(int q=0;q<3;q++) { v=v0[q]; if(N[q]>0.0f) { vmin.cell[q]=-boxhalfsize[q] - v; vmax.cell[q]= boxhalfsize[q] - v; } else { vmin.cell[q]= boxhalfsize[q] - v; vmax.cell[q]=-boxhalfsize[q] - v; } } if(dot(N,vmin)>0.0f) return false; if(dot(N,vmax)>=0.0f) return true; return false;}BBox Triangle::get_bbox() const{ BBox bbox = BBox(); bbox.L = A->P; if (B->P.x < bbox.L.x) bbox.L.x = B->P.x; if (C->P.x < bbox.L.x) bbox.L.x = C->P.x; if (B->P.y < bbox.L.y) bbox.L.y = B->P.y; if (C->P.y < bbox.L.y) bbox.L.y = C->P.y; if (B->P.z < bbox.L.z) bbox.L.z = B->P.z; if (C->P.z < bbox.L.z) bbox.L.z = C->P.z; bbox.H = A->P; if (B->P.x > bbox.H.x) bbox.H.x = B->P.x; if (C->P.x > bbox.H.x) bbox.H.x = C->P.x; if (B->P.y > bbox.H.y) bbox.H.y = B->P.y; if (C->P.y > bbox.H.y) bbox.H.y = C->P.y; if (B->P.z > bbox.H.z) bbox.H.z = B->P.z; if (C->P.z > bbox.H.z) bbox.H.z = C->P.z; return bbox;}ostream & Sphere::dump(ostream &st) const{ return st << "(sphere," << center << "," << radius << ")";}ostream & Box::dump(ostream &st) const{ return st << "(box," << L << "," << H << ")";}ostream & Vertex::dump(ostream &st) const{ return st << "(v," << P << ")";}ostream & NormalVertex::dump(ostream &st) const{ return st << "(vn," << P << "," << N << ")";}ostream & Triangle::dump(ostream &st) const{ int idxA, idxB, idxC; if (!vertex_index.get(A, idxA)) st << *A << "," << endl; if (!vertex_index.get(B, idxB)) st << *B << "," << endl; if (!vertex_index.get(C, idxC)) st << *C << "," << endl; return st << "(t," << idxA << "," << idxB << "," << idxC << ")";}