/*
 * vector.h: Vector class with Colour alias
 *
 * This file is part of Pyrit Ray Tracer.
 *
 * Copyright 2006, 2007  Radek Brich
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#ifndef VECTOR_H
#define VECTOR_H

#include <math.h>
#include <iostream>

#include "common.h"

using namespace std;

/**
 * three cell vector
 */
class Vector
{
public:
	// data
	union {
#ifndef NO_SSE
		__m128 mps;
#endif
		Float cell[4];
		struct { Float x, y, z, w; };
		struct { Float r, g, b, a; };
	};

	// constructors
#ifndef NO_SSE
	Vector(__m128 m): mps(m) {};
#endif
	Vector(): x(0.0f), y(0.0f), z(0.0f), w(1.0) {};
	Vector(Float ax, Float ay, Float az): x(ax), y(ay), z(az), w(1.0) {};

	// index operator
	const Float &operator[](int index) const { return cell[index]; };

	bool operator==(Vector &v) const { return x==v.x && y==v.y && z==v.z; };

	// normalize
	Vector normalize()
	{
		const Float f = 1.0f / mag();
		*this *= f;
		return *this;
	};

	// get normalized copy
	friend Vector normalize(const Vector &v)
	{
		const Float f = 1.0f / v.mag();
		return v * f;
	};

	// square magnitude, magnitude
	Float mag2() const	{ return dot(*this, *this); };
	Float mag() const	{ return sqrtf(mag2()); };

	// negative
	Vector operator-() const { return Vector(-x, -y, -z); };

	// accumulate
	Vector operator+=(const Vector &v)
	{
#ifdef NO_SSE
		x += v.x;
		y += v.y;
		z += v.z;
#else
		mps = _mm_add_ps(mps, v.mps);
#endif
		return *this;
	};

	// multiply
	Vector operator*=(const Float &f)
	{
		x *= f;
		y *= f;
		z *= f;
		return *this;
	};


	// cut
	Vector operator/=(const Float &f)
	{
		Float finv = 1./f;
		x *= finv;
		y *= finv;
		z *= finv;
		return *this;
	};

	// sum
	friend Vector operator+(const Vector &a, const Vector &b)
	{
#ifdef NO_SSE
		return Vector(a.x + b.x, a.y + b.y, a.z + b.z);
#else
		return Vector(_mm_add_ps(a.mps, b.mps));
#endif
	};

	// difference
	friend Vector operator-(const Vector &a, const Vector &b)
	{
#ifdef NO_SSE
		return Vector(a.x - b.x, a.y - b.y, a.z - b.z);
#else
		return Vector(_mm_sub_ps(a.mps, b.mps));
#endif
	};

	// dot product
	friend Float dot(const Vector &a, const Vector &b)
	{
		return a.x * b.x + a.y * b.y + a.z * b.z;
	};

	// cross product
	friend Vector cross(const Vector &a, const Vector &b)
	{
		return Vector(a.y * b.z - a.z * b.y,
			a.z * b.x - a.x * b.z,
			a.x * b.y - a.y * b.x);
	};

	// product of vector and scalar
	friend Vector operator*(const Vector &v, const Float &f)
	{
		return Vector(f * v.x, f * v.y, f * v.z);
	};

	friend Vector operator*(const Float &f, const Vector &v)
	{
		return v * f;
	};

	// scalar division
	friend Vector operator/(const Vector &v, const Float &f)
	{
		const Float finv = 1./f;
		return Vector(v.x * finv, v.y * finv, v.z * finv);
	};

	friend Vector operator/(const Float &f, const Vector &v)
	{
#ifdef NO_SSE
		return Vector(f / v.x, f / v.y, f / v.z);
#else
		return Vector(_mm_div_ps(_mm_set_ps1(f), v.mps));
#endif
	};

	// vector plus scalar
	friend Vector operator+(const Vector &v, const Float &f)
	{
		return Vector(v.x + f, v.y + f, v.z + f);
	};

	// vector minus scalar
	friend Vector operator-(const Vector &v, const Float &f)
	{
		return Vector(v.x - f, v.y - f, v.z - f);
	};

	// cell by cell product (only usable for colours)
	friend Vector operator*(const Vector &a,  const Vector &b)
	{
#ifdef NO_SSE
		return Vector(a.x * b.x, a.y * b.y, a.z * b.z);
#else
		return Vector(_mm_mul_ps(a.mps, b.mps));
#endif
	};

	// write
	friend ostream & operator<<(ostream &st, const Vector &v)
	{
		return st << "(" << v.x << "," << v.y  << "," << v.z << ")";
	};

	// read
	friend istream & operator>>(istream &st, Vector &v)
	{
		char s[10];
		st.getline(s, 10, '(');
		st >> v.x;
		st.getline(s, 10, ',');
		st >> v.y;
		st.getline(s, 10, ',');
		st >> v.z;
		st.getline(s, 10, ')');
		return st;
	};
};

typedef Vector Colour;

#ifndef NO_SSE
class VectorPacket
{
public:
	union {
		__m128 ma[3];
		struct {
			__m128 mx;
			__m128 my;
			__m128 mz;
		};
		struct {
			float x[4];
			float y[4];
			float z[4];
		};
	};

	VectorPacket() {};
	VectorPacket(__m128 ax, __m128 ay, __m128 az):
		mx(ax), my(ay), mz(az) {};
	VectorPacket(const Vector &v):
		mx(_mm_set_ps1(v.x)), my(_mm_set_ps1(v.y)), mz(_mm_set_ps1(v.z)) {};

	Vector getVector(int i) const
	{
		return Vector(x[i], y[i], z[i]);
	};

	void setVector(int i, const Vector &v)
	{
		x[i] = v.x; y[i] = v.y; z[i] = v.z;
	};

	void normalize()
	{
		__m128 m,x,y,z;
		x = _mm_mul_ps(mx, mx); // x*x
		y = _mm_mul_ps(my, my); // y*y
		z = _mm_mul_ps(mz, mz); // z*z
		m = _mm_add_ps(x, y);
		m = _mm_add_ps(m, z);     // x*x + y*y + z*z
		m = _mm_sqrt_ps(m);
		m = _mm_div_ps(mOne, m);   // m = 1/sqrt(m)
		mx = _mm_mul_ps(mx, m);
		my = _mm_mul_ps(my, m);
		mz = _mm_mul_ps(mz, m);
	};

	// accumulate
	VectorPacket operator+=(const VectorPacket &v)
	{
		mx = _mm_add_ps(mx, v.mx);
		my = _mm_add_ps(my, v.my);
		mz = _mm_add_ps(mz, v.mz);
		return *this;
	};

	// add to non-masked components
	VectorPacket selectiveAdd(__m128 mask, const VectorPacket &v)
	{
		mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, v.mx)),
			_mm_andnot_ps(mask, mx));
		my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, v.my)),
			_mm_andnot_ps(mask, my));
		mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, v.mz)),
			_mm_andnot_ps(mask, mz));
		return *this;
	};

	// add scalar to non-masked components
	VectorPacket selectiveAdd(__m128 mask, const __m128 m)
	{
		mx = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mx, m)),
			_mm_andnot_ps(mask, mx));
		my = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(my, m)),
			_mm_andnot_ps(mask, my));
		mz = _mm_or_ps(_mm_and_ps(mask, _mm_add_ps(mz, m)),
			_mm_andnot_ps(mask, mz));
		return *this;
	};

	// dot product
	friend __m128 dot(const VectorPacket &a, const VectorPacket &b)
	{
		return _mm_add_ps(_mm_add_ps(
			_mm_mul_ps(a.mx, b.mx),
			_mm_mul_ps(a.my, b.my)),
			_mm_mul_ps(a.mz, b.mz));
	};

	friend VectorPacket operator+(const VectorPacket &a, const VectorPacket &b)
	{
		return VectorPacket(
			_mm_add_ps(a.mx, b.mx),
			_mm_add_ps(a.my, b.my),
			_mm_add_ps(a.mz, b.mz));
	};

	friend VectorPacket operator-(const VectorPacket &a, const VectorPacket &b)
	{
		return VectorPacket(
			_mm_sub_ps(a.mx, b.mx),
			_mm_sub_ps(a.my, b.my),
			_mm_sub_ps(a.mz, b.mz));
	};

	friend VectorPacket operator*(const VectorPacket &v,  const __m128 &m)
	{
		return VectorPacket(
			_mm_mul_ps(v.mx, m),
			_mm_mul_ps(v.my, m),
			_mm_mul_ps(v.mz, m));
	};

	friend VectorPacket operator/(const __m128 &m, const VectorPacket &v)
	{
		return VectorPacket(
			_mm_div_ps(m, v.mx),
			_mm_div_ps(m, v.my),
			_mm_div_ps(m, v.mz));
	};

	// cell by cell product (only usable for colours)
	friend VectorPacket operator*(const VectorPacket &a,  const VectorPacket &b)
	{
		return VectorPacket(
			_mm_mul_ps(a.mx, b.mx),
			_mm_mul_ps(a.my, b.my),
			_mm_mul_ps(a.mz, b.mz));
	};

	// write to character stream
	friend ostream & operator<<(ostream &st, const VectorPacket &v)
	{
		return st << "[" << v.getVector(0) << "," << v.getVector(1)
			<< "," << v.getVector(2) << "," << v.getVector(3) << ")";
	};

};
#endif

#endif
