build script fixes, add ldflags build option pyrit
authorRadek Brich <radek.brich@devl.cz>
Sat, 10 May 2008 14:29:37 +0200
branchpyrit
changeset 95 ca7d4c665531
parent 94 4c8abb8977dc
child 96 9eb71e76c7fd
build script fixes, add ldflags build option update and enhance demos fix bug in 4x grid oversampling warn if writePNG called while compiled without libpng make shapes in ShapeList const and add many other const needed due to snowball effect slightly optimize Camera::makeRayPacket using _mm_shuffle_ps make Vector SIMD vectorization disabled by default (causes problems) fix bug in implicit reflection of transmissive surfaces, when surface's reflection parameter is set to zero
SConstruct
ccdemos/SConscript
demos/boxes.py
demos/bunny.py
demos/render_nff.py
demos/spheres_ao.py
demos/spheres_glass.py
demos/spheres_shadow.py
demos/triangles_monkey.py
demos/triangles_sphere.py
include/common.h
include/container.h
include/kdtree.h
include/octree.h
include/pixmap.h
include/raytracer.h
include/scene.h
include/serialize.h
include/shapes.h
include/simd.h
include/vector.h
src/container.cc
src/kdtree.cc
src/octree.cc
src/pixmap.cc
src/raytracer.cc
src/sampler.cc
src/serialize.cc
--- a/SConstruct	Thu May 08 09:21:25 2008 +0200
+++ b/SConstruct	Sat May 10 14:29:37 2008 +0200
@@ -50,6 +50,7 @@
 	('precision', 'floating point number precision (single/double)', "single"),
 	('flags', 'add additional compiler flags', ""),
 	BoolOption('force_flags', "use only flags specified by 'flags' option (do not autodetect arch/sse flags)", False),
+	('ldflags', 'add additional linker flags', ""),
 	BoolOption('profile', "enable gcc's profiling support (-pg)", False),
 )
 if env['PLATFORM'] == 'win32':
@@ -149,9 +150,6 @@
 	conf.CheckIntelC()
 	msvc=False
 
-if intelc or gcc:
-	conf.CheckCPUFlags()
-
 if intelc and (not gcc or conf.env['intelc']):
 	Tool('intelc').generate(conf.env)
 	cc = 'intelc'
@@ -164,6 +162,9 @@
 else:
 	cc = 'none'
 
+if cc == 'intelc' or cc == 'gcc':
+	conf.CheckCPUFlags()
+
 if platform == 'win32' and cc == 'gcc':
 	conf.env.Append(LIBPATH=["C:/mingw/lib", "C:/msys/mingw/lib"])
 	conf.env.Append(CPPPATH=["C:/mingw/include", "C:/msys/mingw/include"])
@@ -208,6 +209,9 @@
 if conf.env['profile'] and cc == 'gcc':
 	conf.env.Append(CCFLAGS="-pg", LINKFLAGS="-pg")
 
+if conf.env['ldflags']:
+	conf.env.Append(LINKFLAGS=conf.env['ldflags'])
+
 
 # configure pthread
 pthread = True
@@ -227,9 +231,7 @@
 
 
 # configure libpng
-if conf.CheckLibWithHeader('png', 'png.h', 'C'):
-	conf.Define('HAVE_PNG')
-elif conf.CheckLib('libpng'):
+if conf.CheckLibWithHeader('libpng', 'png.h', 'C'):
 	conf.Define('HAVE_PNG')
 
 env = conf.Finish()
@@ -291,16 +293,13 @@
 
 ### build targets
 
-Export('env pyenv sdlenv cc')
+Export('env pyenv sdlenv have_sdl cc')
 lib = SConscript('src/SConscript', build_dir='build/lib', duplicate=0,
 	exports={'buildmodule':False})
 pymodule = SConscript('src/SConscript', build_dir='build/pymodule', duplicate=0,
 	exports={'buildmodule':True})
 
-if have_sdl:
-	SConscript('ccdemos/SConscript', build_dir='build/ccdemos', duplicate=0,
-		exports='lib')
-
+SConscript('ccdemos/SConscript', build_dir='build/ccdemos', duplicate=0, exports='lib')
 SConscript('demos/SConscript', exports='pymodule')
 SConscript('tests/SConscript', build_dir='build/tests', duplicate=0, exports='lib')
 SConscript('models/SConscript')
--- a/ccdemos/SConscript	Thu May 08 09:21:25 2008 +0200
+++ b/ccdemos/SConscript	Sat May 10 14:29:37 2008 +0200
@@ -1,13 +1,14 @@
-Import('env sdlenv lib')
+Import('env sdlenv have_sdl lib')
 myenv = sdlenv.Clone()
 myenv.Append(LIBPATH=['#build/lib'])
 myenv.Prepend(LIBS=['pyrit'])
 
 l = []
-l.append( myenv.Program(['realtime.cc']) )
-l.append( myenv.Program(['realtime_bunny.cc']) )
-l.append( myenv.Program(['realtime_dragon.cc']) )
-l.append( myenv.Program(['spheres_shadow.cc']) )
-l.append( myenv.Program(['textures.cc']) )
+if have_sdl:
+	l.append( myenv.Program(['realtime.cc']) )
+	l.append( myenv.Program(['realtime_bunny.cc']) )
+	l.append( myenv.Program(['realtime_dragon.cc']) )
+	l.append( myenv.Program(['spheres_shadow.cc']) )
+	l.append( myenv.Program(['textures.cc']) )
 
 env.Alias('cc-demos', l)
--- a/demos/boxes.py	Thu May 08 09:21:25 2008 +0200
+++ b/demos/boxes.py	Sat May 10 14:29:37 2008 +0200
@@ -3,27 +3,37 @@
 from pyrit import *
 
 rt = Raytracer()
+rt.setBgColour((1.0,1.0,1.0))
 top = Octree()
 rt.setTop(top)
-rt.setCamera(Camera())
 
-light1 = Light(position=(0.0, 5.0, -5.0), colour=(0.7, 0.3, 0.6))
+light1 = Light(position=(0.0, 5.0, -5.0), colour=(0.1, 0.3, 0.6))
+light1.castShadows(False)
 rt.addLight(light1)
 
 light2 = Light(position=(-2.0, 10.0, -2.0), colour=(0.4, 0.6, 0.3))
+light2.castShadows(False)
 rt.addLight(light2)
 
-mat0 = Material(colour=(0.7, 0.7, 0.7))
-mat0.setReflectivity(0.0)
+mat = Material(colour=(0.7, 0.7, 0.7))
+mat.setReflectivity(0.3)
+mat.setTransmissivity(0.5, 1.5)
 for x in range(8):
 	for y in range(8):
 		for z in range(8):
-			box = Box(L=(-4.3+x, -4.6+y, -8.6+z), H=(-3.7+x, -4.0+y, -8.0+z), material=mat0)
+			box = Box(L=(-3.8+x, -3.8+y, -3.8+z), H=(-3.2+x, -3.2+y, -3.2+z), material=mat)
 			rt.addShape(box)
 
 top.optimize()
 
 sampler = DefaultSampler(800, 600)
+sampler.setOversample(1)
 rt.setSampler(sampler)
+
+rt.setCamera(Camera(eye=(-6,6,-10),lookat=(0,0,0)))
 rt.render()
-sampler.getPixmap().writePNG('boxes.png')
+sampler.getPixmap().writePNG('boxes-1.png')
+
+rt.setCamera(Camera(eye=(-4,3,-5),lookat=(0,0,0)))
+rt.render()
+sampler.getPixmap().writePNG('boxes-2.png')
--- a/demos/bunny.py	Thu May 08 09:21:25 2008 +0200
+++ b/demos/bunny.py	Sat May 10 14:29:37 2008 +0200
@@ -9,36 +9,35 @@
 rt = Raytracer()
 top = KdTree()
 rt.setTop(top)
-rt.setCamera(Camera())
+rt.setCamera(Camera(eye=(4,2,3.5),lookat=(0.5,0.5,-3)))
+rt.setBgColour((0.5, 0.5, 0.2))
 
-#rt.ambientocclusion(samples=100, distance=16.0, angle=0.5)
-
-mat = Material(colour=(0.9, 0.9, 0.9))
-mat.setTransmissivity(0.8)
+mat = Material(colour=(0.4, 0.5, 0.9))
+mat.setTransmissivity(0.95, 1.5)
+mat.setReflectivity(0)
 mat.setSmooth(True)
 LoadStanfordPlyFile(rt, "../models/ply/bunny/bun_zipper.ply",
 	mat, scale=(-29.0, 29.0, 29.0), trans=(-1,-2.5,-3))
 
-mat0 = Material(colour=(0.1, 0.2, 0.6))
-box1 = Box(L=(-20.0, -1.7, -20.0), H=(20.0, -1.5, 20.0), material=mat0)
-rt.addShape(box1)
+mat0 = Material(colour=(0.3, 0.5, 1.0))
+floor = Box(L=(-20.0, -1.7, -20.0), H=(20.0, -1.5, 20.0), material=mat0)
+rt.addShape(floor)
 
 mat1 = Material(colour=(0.5, 0.5, 0.2))
 mat1.setReflectivity(0.0)
-box2 = Box(L=(-20.0, -20.0, -10.0), H=(20.0, 20.0, -12.0), material=mat1)
-rt.addShape(box2)
+wall = Box(L=(-20.0, -20.0, -10.0), H=(20.0, 20.0, -12.0), material=mat1)
+rt.addShape(wall)
 
 light = Light(position=(-5.0, 3.0, 10.0), colour=(0.8, 0.5, 0.6))
-#light.castshadows(0)
 rt.addLight(light)
 
 light2 = Light(position=(4.0, 1.0, 10.0), colour=(0.5, 0.55, 0.7))
-#light2.castshadows(0)
 rt.addLight(light2)
 
 top.optimize()
 
 sampler = DefaultSampler(800, 600)
+sampler.setOversample(1)
 rt.setSampler(sampler)
 rt.render()
 sampler.getPixmap().writePNG('bunny.png')
--- a/demos/render_nff.py	Thu May 08 09:21:25 2008 +0200
+++ b/demos/render_nff.py	Sat May 10 14:29:37 2008 +0200
@@ -18,10 +18,12 @@
 mat = Material(colour=(1.0, 1.0, 1.0))
 
 f = sys.stdin
-fbase = "render_nff"
+fout = "render_nff.png"
 if len(sys.argv) > 1:
 	f = open(sys.argv[1])
-	fbase = sys.argv[1].rsplit('.',1)[0]
+	fout = sys.argv[1].rsplit('.',1)[0] + '.png'
+if len(sys.argv) > 2:
+	fout = sys.argv[2]
 
 while True:
 	line = f.readline()
@@ -104,4 +106,4 @@
 sampler = DefaultSampler(imagesize)
 rt.setSampler(sampler)
 rt.render()
-sampler.getPixmap().writePNG(fbase+'.png')
+sampler.getPixmap().writePNG(fout)
--- a/demos/spheres_ao.py	Thu May 08 09:21:25 2008 +0200
+++ b/demos/spheres_ao.py	Sat May 10 14:29:37 2008 +0200
@@ -5,7 +5,7 @@
 rt = Raytracer()
 top = KdTree()
 rt.setTop(top)
-rt.setCamera(Camera())
+rt.setCamera(Camera(eye=(0,1,6)))
 rt.ambientOcclusion(samples=100, distance=16.0, angle=0.5)
 
 light1 = Light(position=(0.0, 5.0, -5.0), colour=(0.7, 0.3, 0.6))
@@ -17,7 +17,6 @@
 rt.addLight(light2)
 
 mat0 = Material(colour=(0.7, 0.7, 0.7))
-
 box = Box(L=(-20.0, -1.2, -20.0), H=(20.0, -1.0, 20.0), material=mat0)
 rt.addShape(box)
 
--- a/demos/spheres_glass.py	Thu May 08 09:21:25 2008 +0200
+++ b/demos/spheres_glass.py	Sat May 10 14:29:37 2008 +0200
@@ -5,30 +5,30 @@
 rt = Raytracer()
 top = KdTree()
 rt.setTop(top)
-rt.setCamera(Camera())
+rt.setCamera(Camera(eye=(10,4,-6),lookat=(10,3.5,0)))
 
-light1 = Light(position=(0.0, 4.0, -3.0), colour=(0.9, 0.3, 0.6))
+light1 = Light(position=(10.0, 7.0, 3.0), colour=(0.9, 0.3, 0.6))
 rt.addLight(light1)
 
-#light2 = SphericalLight(position=(-2.0, 5.0, 1.0), radius=3.0, colour=(0.7, 1.0, 0.3))
-light2 = Light(position=(-2.0, -4.0, -1.0), colour=(0.7, 1.0, 0.3))
+light2 = Light(position=(8.0, 5.0, 1.0), colour=(0.7, 1.0, 0.3))
 rt.addLight(light2)
 
-light2 = Light(position=(2.0, 5.0, 1.0), colour=(0.8, 0.9, 1.0))
-rt.addLight(light2)
+light3 = Light(position=(12.0, 8.0, -1.0), colour=(0.8, 0.9, 1.0))
+rt.addLight(light3)
 
 mat0 = Material(colour=(0.1, 0.2, 0.9))
-box = Box(L=(-20.0, -2.2, -20.0), H=(20.0, -2.0, 20.0), material=mat0)
+mat0.setReflectivity(0.0)
+box = Box(L=(-10.0, 0.0, 50.0), H=(30.0, 1.0, -1.0), material=mat0)
 rt.addShape(box)
 
 mat1 = Material(colour=(1.0, 0.2, 0.1))
 mat1.setReflectivity(0.7)
-bigsphere = Sphere(centre=(2.0, 1.0, -5.0), radius=2.5, material=mat1)
+bigsphere = Sphere(centre=(12.0, 4.0, 6.0), radius=2.5, material=mat1)
 rt.addShape(bigsphere)
 
 mat2 = Material(colour=(0.1, 0.4, 0.2))
 mat2.setReflectivity(0.6)
-smallsphere = Sphere(centre=(-4.5, 0.5, -8.0), radius=2.0, material=mat2)
+smallsphere = Sphere(centre=(6.5, 3.5, 8.0), radius=2.0, material=mat2)
 rt.addShape(smallsphere)
 
 mat3 = Material(colour=(0.9, 0.9, 1.0))
@@ -36,7 +36,7 @@
 mat3.setTransmissivity(0.88)
 mat3.setReflectivity(0.1)
 for i in range(10):
-	sph = Sphere(centre=(-5.0+i, -1.5, -4.0), radius=0.5, material=mat3)
+	sph = Sphere(centre=(5.0+i, 1.5, 4.0), radius=0.5, material=mat3)
 	rt.addShape(sph)
 
 top.optimize()
--- a/demos/spheres_shadow.py	Thu May 08 09:21:25 2008 +0200
+++ b/demos/spheres_shadow.py	Sat May 10 14:29:37 2008 +0200
@@ -5,7 +5,7 @@
 rt = Raytracer()
 top = KdTree()
 rt.setTop(top)
-rt.setCamera(Camera())
+rt.setCamera(Camera(eye=(0,1,6)))
 
 light1 = Light(position=(0.0, 5.0, -5.0), colour=(0.7, 0.3, 0.6))
 rt.addLight(light1)
@@ -33,6 +33,7 @@
 top.optimize()
 
 sampler = DefaultSampler(800, 600)
+sampler.setOversample(1)
 rt.setSampler(sampler)
 rt.render()
 sampler.getPixmap().writePNG('spheres_shadow.png')
--- a/demos/triangles_monkey.py	Thu May 08 09:21:25 2008 +0200
+++ b/demos/triangles_monkey.py	Sat May 10 14:29:37 2008 +0200
@@ -6,7 +6,7 @@
 rt = Raytracer()
 top = KdTree()
 rt.setTop(top)
-rt.setCamera(Camera())
+rt.setCamera(Camera(eye=(0,0,6)))
 
 light = Light(position=(-5.0, 2.0, 8.0), colour=(0.9, 0.3, 0.6))
 rt.addLight(light)
--- a/demos/triangles_sphere.py	Thu May 08 09:21:25 2008 +0200
+++ b/demos/triangles_sphere.py	Sat May 10 14:29:37 2008 +0200
@@ -6,7 +6,7 @@
 rt = Raytracer()
 top = KdTree()
 rt.setTop(top)
-rt.setCamera(Camera())
+rt.setCamera(Camera(eye=(0,0,6)))
 
 light1 = Light(position=(0.0, 2.0, 6.0), colour=(0.9, 0.3, 0.6))
 light1.castShadows(False);
--- a/include/common.h	Thu May 08 09:21:25 2008 +0200
+++ b/include/common.h	Sat May 10 14:29:37 2008 +0200
@@ -45,7 +45,7 @@
 # define PI M_PI
 #else
 # define Float float
-# define Eps 1e-6f
+# define Eps 2e-6f
 # define Inf FLT_MAX
 # define PI (float)M_PI
 #endif
@@ -53,8 +53,14 @@
 // enable M_* constants in MSVC
 #define _USE_MATH_DEFINES
 
+#ifdef MSVC
+#define NORETURN __declspec(noreturn)
+#else
+#define NORETURN __attribute__((noreturn))
+#endif
+
 /* verbosity level:
-0: only errors (E)
+0: only errors and warnings (E, W)
 1: major status messages (*)
 2: minor status, progress (-)
 3: debug messages (D)
--- a/include/container.h	Thu May 08 09:21:25 2008 +0200
+++ b/include/container.h	Sat May 10 14:29:37 2008 +0200
@@ -47,20 +47,20 @@
 	Container(): bbox(), shapes() {};
 	virtual ~Container() {};
 
-	virtual void addShape(Shape* aShape);
-	//void addShapeNoExtend(Shape* aShape) { shapes.push_back(aShape); };
-	virtual Shape *nearest_intersection(const Shape *origin_shape, const Ray &ray,
+	virtual void addShape(const Shape* aShape);
+	//void addShapeNoExtend(const Shape* aShape) { shapes.push_back(aShape); };
+	virtual const Shape *nearest_intersection(const Shape *origin_shape, const Ray &ray,
 		Float &nearest_distance);
 
 	virtual void optimize() {};
 
 	ShapeList & getShapes() { return shapes; };
 
-	virtual ostream & dump(ostream &st);
+	virtual ostream & dump(ostream &st) const;
 
 #ifndef NO_SIMD
 	virtual void packet_intersection(const Shape* const* origin_shapes, const RayPacket &rays,
-		Float *nearest_distances, Shape** nearest_shapes);
+		Float *nearest_distances, const Shape** nearest_shapes);
 #endif
 };
 
--- a/include/kdtree.h	Thu May 08 09:21:25 2008 +0200
+++ b/include/kdtree.h	Sat May 10 14:29:37 2008 +0200
@@ -69,7 +69,7 @@
 	KdNode* getRightChild() const { return (KdNode*)(((size_t)children & ~3) + 16); };
 
 	ShapeList* getShapes() const { return (ShapeList*)((size_t)shapes & ~3); };
-	void addShape(Shape* aShape) { getShapes()->push_back(aShape); };
+	void addShape(const Shape* aShape) { getShapes()->push_back(aShape); };
 };
 
 /**
@@ -88,19 +88,19 @@
 	KdTree(): Container(), mempool(64), root(NULL), max_depth(32), built(false) {};
 	KdTree(int maxdepth): Container(), mempool(64), root(NULL), max_depth(maxdepth), built(false) {};
 	~KdTree() { if (root) delete root; };
-	void addShape(Shape* aShape) { Container::addShape(aShape); built = false; };
-	Shape *nearest_intersection(const Shape *origin_shape, const Ray &ray,
+	void addShape(const Shape* aShape) { Container::addShape(aShape); built = false; };
+	const Shape *nearest_intersection(const Shape *origin_shape, const Ray &ray,
 		Float &nearest_distance);
 #ifndef NO_SIMD
 	void packet_intersection(const Shape* const* origin_shapes, const RayPacket &rays,
-		Float *nearest_distances, Shape **nearest_shapes);
+		Float *nearest_distances, const Shape **nearest_shapes);
 #endif
 	void optimize() { build(); };
 	void build();
 	bool isBuilt() const { return built; };
 	KdNode *getRootNode() const { return root; };
 
-	ostream & dump(ostream &st);
+	ostream & dump(ostream &st) const;
 	istream & load(istream &st, Material *mat);
 };
 
--- a/include/octree.h	Thu May 08 09:21:25 2008 +0200
+++ b/include/octree.h	Sat May 10 14:29:37 2008 +0200
@@ -61,7 +61,7 @@
 	void makeChildren() { children = new OctreeNode[8]; assert(!isLeaf()); }; // this also cleans leaf bit
 	OctreeNode *getChild(const int num) { assert(!isLeaf()); return children + num; };
 
-	void addShape(Shape* aShape) { getShapes()->push_back(aShape); };
+	void addShape(const Shape* aShape) { getShapes()->push_back(aShape); };
 	ShapeList *getShapes() { return (ShapeList*)((size_t)shapes & ~(size_t)1); };
 	void setShapes(ShapeList *const ashapes) { shapes = ashapes; assert(!isLeaf()); setLeaf(); };
 
@@ -80,8 +80,8 @@
 	Octree() : Container(), root(NULL), max_depth(10), built(false) {};
 	Octree(int maxdepth) : Container(), root(NULL), max_depth(maxdepth), built(false) {};
 	~Octree() { if (root) delete root; };
-	void addShape(Shape* aShape) { Container::addShape(aShape); built = false; };
-	Shape *nearest_intersection(const Shape *origin_shape, const Ray &ray,
+	void addShape(const Shape* aShape) { Container::addShape(aShape); built = false; };
+	const Shape *nearest_intersection(const Shape *origin_shape, const Ray &ray,
 		Float &nearest_distance);
 	void optimize() { build(); };
 	void build();
--- a/include/pixmap.h	Thu May 08 09:21:25 2008 +0200
+++ b/include/pixmap.h	Sat May 10 14:29:37 2008 +0200
@@ -48,8 +48,22 @@
 	Pixmap(Float *afdata, int aw, int ah):
 		fdata(afdata), w(aw), h(ah), refdata(true) {};
 	Pixmap(int aw, int ah):
-		data(new Colour[aw*ah]), w(aw), h(ah), refdata(false) {};
-	virtual ~Pixmap() { if (!refdata) delete[] data; };
+		w(aw), h(ah), refdata(false)
+	{
+#ifdef NO_SIMD
+		data = new Colour[aw*ah];
+#else
+		data = (Colour*)_mm_malloc(aw*ah*sizeof(Colour), 16);
+#endif
+	};
+	virtual ~Pixmap()
+	{
+#ifdef NO_SIMD
+		if (!refdata) delete[] data;
+#else
+		if (!refdata) _mm_free(data);
+#endif
+	};
 
 	void setData(Float *afdata, int aw, int ah)
 		{ fdata = afdata; w = aw; h = ah; };
--- a/include/raytracer.h	Thu May 08 09:21:25 2008 +0200
+++ b/include/raytracer.h	Sat May 10 14:29:37 2008 +0200
@@ -64,12 +64,15 @@
 		const Vector &P, const Vector &N, const Vector &V);
 	void lightScatter(const Ray &ray, const Shape *shape, int depth,
 		const Vector &P, const Vector &normal, bool from_inside, Colour &col);
+
 #ifndef NO_SIMD
 	VectorPacket PhongShader_packet(const Shape* const* shapes,
 		const VectorPacket &P, const VectorPacket &N, const VectorPacket &V);
 	void raytracePacket(RayPacket &rays, Colour *results);
 #endif
-	static void *raytrace_worker(void *d);
+
+	NORETURN static void *raytrace_worker(void *d);
+
 public:
 	Raytracer(): top(NULL), camera(NULL), lights(), bg_colour(0., 0., 0.),
 		ao_samples(0), num_threads(4), max_depth(3), use_packets(true)
--- a/include/scene.h	Thu May 08 09:21:25 2008 +0200
+++ b/include/scene.h	Sat May 10 14:29:37 2008 +0200
@@ -94,6 +94,11 @@
 		v = cross(p, u);
 	};
 
+#ifndef NO_SIMD
+	void *operator new(size_t size) { return _mm_malloc(size, 16); };
+	void operator delete(void *p) { _mm_free(p); };
+#endif
+
 	const Vector &getEye() const { return eye; };
 	const Vector &getp() const { return p; };
 	const Vector &getu() const { return u; };
@@ -122,18 +127,18 @@
 		mfloat4 m;
 
 		// m1(xyz) = u * samples[i].x
-		m1x = mset1(u.x);
-		m1y = mset1(u.y);
-		m1z = mset1(u.z);
+		m1x = mshuffle(u.mf4, u.mf4, mShuffle0); // u.x
+		m1y = mshuffle(u.mf4, u.mf4, mShuffle1); // u.y
+		m1z = mshuffle(u.mf4, u.mf4, mShuffle2); // u.z
 		m = mset(samples[3].x, samples[2].x, samples[1].x, samples[0].x);
 		m1x = mmul(m1x, m);
 		m1y = mmul(m1y, m);
 		m1z = mmul(m1z, m);
 
 		// m2(xyz) = v * samples[i].y
-		m2x = mset1(v.x);
-		m2y = mset1(v.y);
-		m2z = mset1(v.z);
+		m2x = mshuffle(v.mf4, v.mf4, mShuffle0); // v.x
+		m2y = mshuffle(v.mf4, v.mf4, mShuffle1); // v.y
+		m2z = mshuffle(v.mf4, v.mf4, mShuffle2); // v.z
 		m = mset(samples[3].y, samples[2].y, samples[1].y, samples[0].y);
 		m2x = mmul(m2x, m);
 		m2y = mmul(m2y, m);
@@ -151,20 +156,20 @@
 		m1z = mmul(m1z, m);
 
 		// m1(xyz) = p - m1 = p - (u*samples[i].x + v*samples[i].y)*F = dir
-		m2x = mset1(p.x);
-		m2y = mset1(p.y);
-		m2z = mset1(p.z);
+		m2x = mshuffle(p.mf4, p.mf4, mShuffle0); // p.x
+		m2y = mshuffle(p.mf4, p.mf4, mShuffle1); // p.y
+		m2z = mshuffle(p.mf4, p.mf4, mShuffle2); // p.z
 		rays.dir.mx = msub(m2x, m1x);
 		rays.dir.my = msub(m2y, m1y);
 		rays.dir.mz = msub(m2z, m1z);
 
 		// copy origin
-		rays.o.mx = mset1(eye.x);
-		rays.o.my = mset1(eye.y);
-		rays.o.mz = mset1(eye.z);
+		rays.o.mx = mshuffle(eye.mf4, eye.mf4, mShuffle0); // eye.x
+		rays.o.my = mshuffle(eye.mf4, eye.mf4, mShuffle1); // eye.y
+		rays.o.mz = mshuffle(eye.mf4, eye.mf4, mShuffle2); // eye.z
 
 		rays.dir.normalize();
-		rays.invdir = mOne/rays.dir;
+		rays.invdir = mOne / rays.dir;
 	};
 #endif
 };
--- a/include/serialize.h	Thu May 08 09:21:25 2008 +0200
+++ b/include/serialize.h	Sat May 10 14:29:37 2008 +0200
@@ -42,7 +42,7 @@
  */
 class Indexer
 {
-	map <void *, int> indexmap;
+	map <const void *, int> indexmap;
 	int index;
 public:
 	Indexer(): indexmap(), index(0) {};
@@ -55,9 +55,9 @@
 	 * @retval true        if object was found
 	 * @retval false       if new index was made for the object
 	 */
-	bool get(void *o, int &retidx);
+	bool get(const void *o, int &retidx);
 
-	const int &operator[](void *o) { return indexmap[o]; };
+	const int &operator[](const void *o) { return indexmap[o]; };
 };
 
 extern Indexer vertex_index, shape_index;
@@ -65,9 +65,8 @@
 void resetSerializer();
 Shape *loadShape(istream &st, Material *mat);
 
-ostream & operator<<(ostream &st, Shape &o);
-ostream & operator<<(ostream &st, Vertex &o);
-ostream & operator<<(ostream &st, Container &o);
-istream & operator>>(istream &st, Vector &v);
+ostream & operator<<(ostream &st, const Shape &o);
+ostream & operator<<(ostream &st, const Vertex &o);
+ostream & operator<<(ostream &st, const Container &o);
 
 #endif
--- a/include/shapes.h	Thu May 08 09:21:25 2008 +0200
+++ b/include/shapes.h	Sat May 10 14:29:37 2008 +0200
@@ -86,7 +86,7 @@
 /**
  * list of shapes
  */
-typedef vector<Shape*> ShapeList;
+typedef vector<const Shape*> ShapeList;
 
 /**
  * sphere shape
--- a/include/simd.h	Thu May 08 09:21:25 2008 +0200
+++ b/include/simd.h	Sat May 10 14:29:37 2008 +0200
@@ -34,9 +34,9 @@
 
 #include <xmmintrin.h>
 
-typedef __m128 mfloat4;
+#define mfloat4 __m128
 
-#define mZero  _mm_set_ps1(0.0f)
+#define mZero  _mm_setzero_ps()
 #define mOne   _mm_set_ps1(1.0f)
 #define mTwo   _mm_set_ps1(2.0f)
 #define mEps   _mm_set_ps1(Eps)
@@ -66,6 +66,12 @@
 #define mmax _mm_max_ps
 #define mmovemask _mm_movemask_ps
 
+#define mShuffle0 _MM_SHUFFLE(0,0,0,0)
+#define mShuffle1 _MM_SHUFFLE(1,1,1,1)
+#define mShuffle2 _MM_SHUFFLE(2,2,2,2)
+#define mShuffle3 _MM_SHUFFLE(3,3,3,3)
+#define mshuffle _mm_shuffle_ps
+
 inline const mfloat4 mselect(const mfloat4& mask, const mfloat4& a, const mfloat4& b)
 {
 	return _mm_or_ps(_mm_and_ps(mask, a), _mm_andnot_ps(mask, b));
--- a/include/vector.h	Thu May 08 09:21:25 2008 +0200
+++ b/include/vector.h	Sat May 10 14:29:37 2008 +0200
@@ -36,8 +36,10 @@
 
 using namespace std;
 
+#define NO_SIMD_VECTOR
+
 /**
- * three cell vector
+ * three (four) cell vector
  */
 class Vector
 {
@@ -92,7 +94,7 @@
 	/** Accumulate. Useful for colors. */
 	Vector operator+=(const Vector &v)
 	{
-#ifdef NO_SIMD
+#ifdef NO_SIMD_VECTOR
 		x += v.x;
 		y += v.y;
 		z += v.z;
@@ -124,7 +126,7 @@
 	/** Sum of two vectors */
 	friend Vector operator+(const Vector &a, const Vector &b)
 	{
-#ifdef NO_SIMD
+#ifdef NO_SIMD_VECTOR
 		return Vector(a.x + b.x, a.y + b.y, a.z + b.z);
 #else
 		return Vector(madd(a.mf4, b.mf4));
@@ -134,10 +136,9 @@
 	/** Difference of two vectors */
 	friend Vector operator-(const Vector &a, const Vector &b)
 	{
-#if defined(NO_SIMD) || defined(MSVC)
+#ifdef NO_SIMD_VECTOR
 		return Vector(a.x - b.x, a.y - b.y, a.z - b.z);
 #else
-		// this faults in MSVC, for unknown reason
 		return Vector(msub(a.mf4, b.mf4));
 #endif
 	};
@@ -178,7 +179,7 @@
 	/** Get f/v, i.e. inverted vector multiplied by scalar */
 	friend Vector operator/(const Float &f, const Vector &v)
 	{
-#ifdef NO_SIMD
+#ifdef NO_SIMD_VECTOR
 		return Vector(f / v.x, f / v.y, f / v.z);
 #else
 		return Vector(mdiv(mset1(f), v.mf4));
@@ -200,7 +201,7 @@
 	/** Cell by cell product (only useful for colors) */
 	friend Vector operator*(const Vector &a, const Vector &b)
 	{
-#ifdef NO_SIMD
+#ifdef NO_SIMD_VECTOR
 		return Vector(a.x * b.x, a.y * b.y, a.z * b.z);
 #else
 		return Vector(mmul(a.mf4, b.mf4));
--- a/src/container.cc	Thu May 08 09:21:25 2008 +0200
+++ b/src/container.cc	Sat May 10 14:29:37 2008 +0200
@@ -28,7 +28,7 @@
 #include "container.h"
 #include "serialize.h"
 
-void Container::addShape(Shape* aShape)
+void Container::addShape(const Shape* aShape)
 {
 	const Float e = Eps;
 	if (shapes.size() == 0) {
@@ -49,10 +49,10 @@
 	shapes.push_back(aShape);
 };
 
-Shape *Container::nearest_intersection(const Shape *origin_shape, const Ray &ray,
+const Shape *Container::nearest_intersection(const Shape *origin_shape, const Ray &ray,
         Float &nearest_distance)
 {
-	Shape *nearest_shape = NULL;
+	const Shape *nearest_shape = NULL;
 	ShapeList::iterator shape;
 	for (shape = shapes.begin(); shape != shapes.end(); shape++)
 		if (*shape != origin_shape && (*shape)->intersect(ray, nearest_distance))
@@ -62,7 +62,7 @@
 
 #ifndef NO_SIMD
 void Container::packet_intersection(const Shape* const* origin_shapes, const RayPacket &rays,
-	Float *nearest_distances, Shape **nearest_shapes)
+	Float *nearest_distances, const Shape **nearest_shapes)
 {
 	for (int i = 0; i < 4; i++)
 		nearest_shapes[i] = nearest_intersection(origin_shapes[i], rays[i],
@@ -70,10 +70,10 @@
 }
 #endif
 
-ostream & Container::dump(ostream &st)
+ostream & Container::dump(ostream &st) const
 {
 	st << "(container," << shapes.size();
-	ShapeList::iterator shape;
+	ShapeList::const_iterator shape;
 	for (shape = shapes.begin(); shape != shapes.end(); shape++)
 	{
 		int idx;
--- a/src/kdtree.cc	Thu May 08 09:21:25 2008 +0200
+++ b/src/kdtree.cc	Sat May 10 14:29:37 2008 +0200
@@ -35,10 +35,10 @@
 class ShapeBound
 {
 public:
-	Shape *shape;
+	const Shape *shape;
 	Float pos;
 	bool end;
-	ShapeBound(Shape *ashape, const Float apos, const bool aend):
+	ShapeBound(const Shape *ashape, const Float apos, const bool aend):
 		shape(ashape), pos(apos), end(aend) {};
 	friend bool operator<(const ShapeBound& a, const ShapeBound& b)
 	{
@@ -204,7 +204,7 @@
 }
 
 /* algorithm by Vlastimil Havran, Heuristic Ray Shooting Algorithms, appendix C */
-Shape *KdTree::nearest_intersection(const Shape *origin_shape, const Ray &ray,
+const Shape *KdTree::nearest_intersection(const Shape *origin_shape, const Ray &ray,
 	Float &nearest_distance)
 {
 	Float a, b; /* entry/exit signed distance */
@@ -309,7 +309,7 @@
 		}
 
 		/* current node is the leaf . . . empty or full */
-		Shape *nearest_shape = NULL;
+		const Shape *nearest_shape = NULL;
 		Float dist = stack[exit].t;
 		ShapeList::iterator shape;
 		for (shape = node->getShapes()->begin(); shape != node->getShapes()->end(); shape++)
@@ -346,7 +346,7 @@
 };
 
 void KdTree::packet_intersection(const Shape* const* origin_shapes, const RayPacket &rays,
-		Float *nearest_distances, Shape **nearest_shapes)
+		Float *nearest_distances, const Shape **nearest_shapes)
 {
 	mfloat4 a, b; /* entry/exit signed distance */
 	mfloat4 t;    /* signed distance to the splitting plane */
@@ -545,13 +545,13 @@
 	return st;
 }
 
-ostream & KdTree::dump(ostream &st)
+ostream & KdTree::dump(ostream &st) const
 {
 	if (!built)
 		return Container::dump(st);
 
 	st << "(kdtree," << shapes.size();
-	ShapeList::iterator shape;
+	ShapeList::const_iterator shape;
 	for (shape = shapes.begin(); shape != shapes.end(); shape++)
 	{
 		int idx;
--- a/src/octree.cc	Thu May 08 09:21:25 2008 +0200
+++ b/src/octree.cc	Sat May 10 14:29:37 2008 +0200
@@ -148,7 +148,7 @@
 	}
 }
 
-Shape * Octree::nearest_intersection(const Shape *origin_shape, const Ray &ray,
+const Shape * Octree::nearest_intersection(const Shape *origin_shape, const Ray &ray,
 		Float &nearest_distance)
 {
 	/* if we have no tree, fall back to naive test */
@@ -217,7 +217,7 @@
 	node = root;
 	st_cur->next = -1;
 
-	Shape *nearest_shape = NULL;
+	const Shape *nearest_shape = NULL;
 	for (;;)
 	{
 		if (st_cur->next == -1)
--- a/src/pixmap.cc	Thu May 08 09:21:25 2008 +0200
+++ b/src/pixmap.cc	Sat May 10 14:29:37 2008 +0200
@@ -47,13 +47,22 @@
 #else
 	__m128 cmax = _mm_set_ps1(255.0);
 	__m64 m;
-	for (unsigned char *cd = cdata; cd < cdata + w*h*3; cd += 4, fd += 4)
-	{
-		m = _mm_cvtps_pi16(_mm_mul_ps(cmax,
-			_mm_min_ps(mOne, _mm_load_ps(fd))));
-		for (int i = 0; i < 4; i++)
-			cd[i] = ((unsigned char *)&m)[i<<1];
-	}
+	if (!refdata)
+		for (unsigned char *cd = cdata; cd < cdata + w*h*3; cd += 4, fd += 4)
+		{
+			m = _mm_cvtps_pi16(_mm_mul_ps(cmax,
+				_mm_min_ps(mOne, _mm_load_ps(fd))));
+			for (int i = 0; i < 4; i++)
+				cd[i] = ((unsigned char *)&m)[i<<1];
+		}
+	else // must use _mm_loadu_ instead of _mm_load_ as we do not know the alignment
+		for (unsigned char *cd = cdata; cd < cdata + w*h*3; cd += 4, fd += 4)
+		{
+			m = _mm_cvtps_pi16(_mm_mul_ps(cmax,
+				_mm_min_ps(mOne, _mm_loadu_ps(fd))));
+			for (int i = 0; i < 4; i++)
+				cd[i] = ((unsigned char *)&m)[i<<1];
+		}
 #endif
 
 	return cdata;
@@ -62,6 +71,7 @@
 int Pixmap::writePNG(const char *fname) const
 {
 #ifndef HAVE_PNG
+	dbgmsg(0, "W %s not written: PNG support is disabled\n", fname);
 	return -3;
 #else
 	int y;
--- a/src/raytracer.cc	Thu May 08 09:21:25 2008 +0200
+++ b/src/raytracer.cc	Sat May 10 14:29:37 2008 +0200
@@ -152,7 +152,7 @@
 	// ambient
 	acc = colour * ambient;
 
-	Shape *shadow_shapes[4];
+	const Shape *shadow_shapes[4];
 	vector<Light*>::iterator light;
 	for (light = lights.begin(); light != lights.end(); light++)
 	{
@@ -200,14 +200,6 @@
 		Float refl = shape->material->reflectivity;
 		const Float cos_i = - dot(normal, ray.dir);
 
-		// reflection
-		if (refl > 0.01)
-		{
-			Vector newdir = ray.dir + 2.0f * cos_i * normal;
-			Ray newray = Ray(P, newdir);
-			refl_col = raytrace(newray, depth + 1, shape);
-		}
-
 		// refraction
 		if (trans > 0.01)
 		{
@@ -245,6 +237,15 @@
 				trans_col = raytrace(newray, depth + 1, NULL);
 			}
 		}
+
+		// reflection
+		if (refl > 0.01)
+		{
+			Vector newdir = ray.dir + 2.0f * cos_i * normal;
+			Ray newray = Ray(P, newdir);
+			refl_col = raytrace(newray, depth + 1, shape);
+		}
+
 		col = (1-refl-trans)*col + refl*refl_col + trans*trans_col;
 	}
 
@@ -257,7 +258,7 @@
 			Vector dir = SphereDistribute(i, ao_samples, ao_angle, normal);
 			Ray ao_ray = Ray(P, dir);
 			Float dist = ao_distance;
-			Shape *shape_in_way = top->nearest_intersection(shape, ao_ray, dist);
+			const Shape *shape_in_way = top->nearest_intersection(shape, ao_ray, dist);
 			if (shape_in_way == NULL)
 				miss += 1.0;
 			else
@@ -271,7 +272,7 @@
 Colour Raytracer::raytrace(Ray &ray, int depth, const Shape *origin_shape)
 {
 	Float nearest_distance = Inf;
-	Shape *nearest_shape = top->nearest_intersection(origin_shape, ray, nearest_distance);
+	const Shape *nearest_shape = top->nearest_intersection(origin_shape, ray, nearest_distance);
 
 	if (nearest_shape == NULL)
 		return bg_colour;
@@ -303,7 +304,7 @@
 		mfloat4 m_nearest_distances;
 	};
 	mfloat4 mask;
-	Shape *nearest_shapes[4];
+	const Shape *nearest_shapes[4];
 	static const Shape *origin_shapes[4] = {NULL, NULL, NULL, NULL};
 	m_nearest_distances = mInf;
 
@@ -351,12 +352,7 @@
 }
 #endif
 
-#ifdef MSVC
-__declspec(noreturn)
-#else
-__attribute__((noreturn))
-#endif
-void *Raytracer::raytrace_worker(void *d)
+NORETURN void *Raytracer::raytrace_worker(void *d)
 {
 	static const int my_queue_size = 256;
 	Raytracer *rt = (Raytracer*)d;
--- a/src/sampler.cc	Thu May 08 09:21:25 2008 +0200
+++ b/src/sampler.cc	Sat May 10 14:29:37 2008 +0200
@@ -29,6 +29,40 @@
 #include "common.h"
 #include "scene.h"
 
+/* grid oversampling look up tables */
+static const int gridsamples[] = {1,4,9,16};
+static const Float osa4x[] = { -0.25f, +0.25f, +0.25f, -0.25f };
+static const Float osa4y[] = { -0.25f, -0.25f, +0.25f, +0.25f };
+static const Float osa9x[] =
+{
+	-0.34f,  0.00f, +0.34f,
+	-0.34f,  0.00f, +0.34f,
+	-0.34f,  0.00f, +0.34f
+};
+static const Float osa9y[] =
+{
+	-0.34f, -0.34f, -0.34f,
+	 0.00f,  0.00f,  0.00f,
+	+0.34f, +0.34f, +0.34f
+};
+static const Float osa16x[] =
+{
+	-0.375f, -0.125f, +0.125f, +0.375f,
+	-0.375f, -0.125f, +0.125f, +0.375f,
+	-0.375f, -0.125f, +0.125f, +0.375f,
+	-0.375f, -0.125f, +0.125f, +0.375f
+};
+static const Float osa16y[] =
+{
+	-0.375f, -0.375f, -0.375f, -0.375f,
+	-0.125f, -0.125f, -0.125f, -0.125f,
+	+0.125f, +0.125f, +0.125f, +0.125f,
+	+0.375f, +0.375f, +0.375f, +0.375f
+};
+static const Float *osaSx[] = {NULL, osa4x, osa9x, osa16x};
+static const Float *osaSy[] = {NULL, osa4y, osa9y, osa16y};
+
+
 void DefaultSampler::init()
 {
 	phase = 0;
@@ -37,7 +71,6 @@
 
 int DefaultSampler::initSampleSet()
 {
-	static const int gridsamples[] = {1,5,9,16};
 	const int samples = gridsamples[oversample];
 	const int &w = pixmap.getWidth(), &h = pixmap.getHeight();
 	Float *&buffer = pixmap.getFloatData();
@@ -184,21 +217,6 @@
 	else if (phase == 2)
 	{
 		/* grid oversampling */
-		static const int gridsamples[] = {1,4,9,16};
-		static const Float osa4x[] = {-0.25f, +0.25f, +0.25f, -0.25f};
-		static const Float osa4y[] = {-0.25f, -0.25f, +0.25f, +0.25f};
-		static const Float osa9x[] = {-0.34f,  0.00f, +0.34f,
-			-0.34f,  0.00f, +0.34f, -0.34f,  0.00f, +0.34f};
-		static const Float osa9y[] = {-0.34f, -0.34f, -0.34f,
-				0.00f,  0.00f,  0.00f, +0.34f, +0.34f, +0.34f};
-		static const Float osa16x[] = {-0.375f, -0.125f, +0.125f, +0.375f,
-			-0.375f, -0.125f, +0.125f, +0.375f, -0.375f, -0.125f, +0.125f, +0.375f,
-			-0.375f, -0.125f, +0.125f, +0.375f};
-		static const Float osa16y[] = {-0.375f, -0.375f, -0.375f, -0.375f,
-			-0.125f, -0.125f, -0.125f, -0.125f, +0.125f, +0.125f, +0.125f, +0.125f,
-			+0.375f, +0.375f, +0.375f, +0.375f};
-		static const Float *osaSx[] = {NULL, osa4x, osa9x, osa16x};
-		static const Float *osaSy[] = {NULL, osa4y, osa9y, osa16y};
 		const int samples = gridsamples[oversample];
 		const Float *osax = osaSx[oversample];
 		const Float *osay = osaSy[oversample];
--- a/src/serialize.cc	Thu May 08 09:21:25 2008 +0200
+++ b/src/serialize.cc	Sat May 10 14:29:37 2008 +0200
@@ -39,9 +39,9 @@
 	vertices.clear();
 }
 
-bool Indexer::get(void *o, int &retidx)
+bool Indexer::get(const void *o, int &retidx)
 {
-	map <void *, int>::iterator i;
+	map <const void *, int>::iterator i;
 	i = indexmap.find(o);
 	if (i == indexmap.end())
 	{
@@ -130,17 +130,17 @@
 	}
 }
 
-ostream & operator<<(ostream &st, Shape &o)
+ostream & operator<<(ostream &st, const Shape &o)
 {
 	return o.dump(st);
 }
 
-ostream & operator<<(ostream &st, Vertex &o)
+ostream & operator<<(ostream &st, const Vertex &o)
 {
 	return o.dump(st);
 }
 
-ostream & operator<<(ostream &st, Container &o)
+ostream & operator<<(ostream &st, const Container &o)
 {
 	return o.dump(st);
 }