//	Roast+ License v0.1

//	SIMD

#ifndef __SFJP_OPENMGL_roast_simd_int_xmm_register_HPP__
#define __SFJP_OPENMGL_roast_simd_int_xmm_register_HPP__

#define _MAKE_INTRIN_(X)	_mm_ ## X ## _epi32
#define _MAKE_INTRIN(X)		_MAKE_INTRIN_(X)


namespace roast
{
	namespace simd 
	{

		/** int̃ev[gꉻ ********************************************/

		template <int _PERMIT_SIMD>
		class xmm_register<int,_PERMIT_SIMD> : public ::roast::simd::simd_types
		{
		protected:
			int _SIMD_TYPE;
			typedef xmm_register<float,_PERMIT_SIMD> FLOAT_xmm_register;
			typedef xmm_register<int,_PERMIT_SIMD> INT_xmm_register;
			typedef xmm_register<int,_PERMIT_SIMD> THIS_xmm_register;
			typedef __m128i ___m128;

			/*int IS_ON_SSE4(){
				return ( _PERMIT_SIMD >= ROAST_SIMD_TYPE_SSE4 && _PERMIT_SIMD <= ROAST_SIMD_TYPE_SSE_END );
			}
			int IS_ON_SSE2(){
				return ( _PERMIT_SIMD >= ROAST_SIMD_TYPE_SSE2 && _PERMIT_SIMD <= ROAST_SIMD_TYPE_SSE_END );
			}*/
			int IS_ON_SSE4(){
				return ( _ROAST_IS_ON_SSE4 );
			}
			int IS_ON_SSE2(){
				return ( _ROAST_IS_ON_SSE2 );
			}

		protected:
			___m128 m_m128;
		public:
			//	RXgN^EfXgN^
			xmm_register() : _SIMD_TYPE(_PERMIT_SIMD) {}
			xmm_register( const INT_xmm_register& reg ) : _SIMD_TYPE(_PERMIT_SIMD) {
				//m_m128 = reg.m_m128;
				set(reg);
			}
			xmm_register( const FLOAT_xmm_register& reg ) : _SIMD_TYPE(_PERMIT_SIMD) {
				//m_m128 = reg.m_m128;
				set(reg);
			}
			xmm_register( int n ) : _SIMD_TYPE(_PERMIT_SIMD) {
				//m_m128 = reg.m_m128;
				set(n);
			}
			xmm_register( ___m128 m ) : _SIMD_TYPE(_PERMIT_SIMD) {
				m_m128 = m;
			}
			virtual ~xmm_register(){}

			///////////////////////////////////////////

			//	ݒƎ擾

			FLOAT_xmm_register to_floats () const {
				return FLOAT_xmm_register( m128_int4_to_float4(m_m128) );
			}

			void set_from_froats ( const FLOAT_xmm_register &param_reg ){
				m_m128 = m128_float4_to_int4( param_reg.get_m128() );
			}

			void zero(){
				m_m128 = _mm_setzero_si128();
			}

			void set (___m128 m){
				m_m128 = m;
			}

			void set (int n){
				m_m128 = _mm_set1_epi32(n);
			}

			void set ( const INT_xmm_register& param_reg ){
				m_m128 = param_reg.get_m128();
			}

			void set ( const FLOAT_xmm_register &param_reg ){	//	from FLOAT_xmm_register
				m_m128 = m128_float4_to_int4( param_reg.get_m128() );
			}

			xmm_register& operator = (___m128 m){
				set(m);
				return *this;
			}

			xmm_register& operator = (const INT_xmm_register &param_reg){
				set(param_reg);
				return *this;
			}

			xmm_register& operator = (const FLOAT_xmm_register &param_reg){	//	from FLOAT_xmm_register
				set(param_reg);
				return *this;
			}

			___m128 get_m128 () const {
				return m_m128;
			}
			___m128 get_m128i () const {
				return m_m128;
			}

			operator ___m128 () const {
				return m_m128;
			}

			operator FLOAT_xmm_register () const {
				return to_floats();
			}

			//	[]Iy[^

			int& operator[] (int i)
			{
				return m_m128.m128i_i32[ i ];
			}

			////////////////////////////////////////////////////////

			//	lZ

			_ROAST_FORCE_INLINE xmm_register operator + (const xmm_register &param_reg){
				xmm_register r;
				r.m_m128 = _MAKE_INTRIN(add) (m_m128, param_reg.m_m128);
				return r;
			}

			_ROAST_FORCE_INLINE xmm_register& operator += (const xmm_register &param_reg){
				m_m128 = _MAKE_INTRIN(add) (m_m128, param_reg.m_m128);
				return *this;
			}

			_ROAST_FORCE_INLINE xmm_register operator - (const xmm_register &param_reg){
				xmm_register r;
				r.m_m128 = _MAKE_INTRIN(sub) (m_m128, param_reg.m_m128);
				return r;
			}

			_ROAST_FORCE_INLINE xmm_register& operator -= (const xmm_register &param_reg){
				m_m128 = _MAKE_INTRIN(sub) (m_m128, param_reg.m_m128);
				return *this;
			}

			_ROAST_FORCE_INLINE xmm_register operator * (const xmm_register &param_reg){
				//return xmm_register ( _mm_mul_epi32( m_m128, param_reg.m_m128 ));
				//return xmm_register ( _mm_mul_epu32( m_m128, param_reg.m_m128 ));
				//return xmm_register ( _mm_mullo_epi32( m_m128, param_reg.m_m128 ));
				xmm_register r(m_m128);
				r *= param_reg;
				return r;
			}

			_ROAST_FORCE_INLINE xmm_register& operator *= (const xmm_register &param_reg)
			{
				if ( IS_ON_SSE4() )
					m_m128 = _mm_mullo_epi32 (m_m128, param_reg.m_m128);
				else{
					m_m128.m128i_i32[0] *= param_reg.m_m128.m128i_i32[0];
					m_m128.m128i_i32[1] *= param_reg.m_m128.m128i_i32[1];
					m_m128.m128i_i32[2] *= param_reg.m_m128.m128i_i32[2];
					m_m128.m128i_i32[3] *= param_reg.m_m128.m128i_i32[3];
				}
				return *this;
			}

			/*
			xmm_register mul64_12 (const xmm_register &param_reg){
				___m128 m = _mm_shuffle_epi32( m_m128, imm8 );
				return xmm_register ( _mm_mul_epu32( m, param_reg.m_m128 ));
			}

			xmm_register mul64_34 (const xmm_register &param_reg){
				___m128 m = _mm_shuffle_epi32( m_m128, imm8 );
				return xmm_register ( _mm_mul_epu32( m, param_reg.m_m128 ));
			}

			xmm_register mul64_13 (const xmm_register &param_reg){
				___m128 m = _mm_shuffle_epi32( m_m128, imm8 );
				return xmm_register ( _mm_mul_epu32( m, param_reg.m_m128 ));
			}*/

			_ROAST_FORCE_INLINE xmm_register mul64_24 (const xmm_register &param_reg){
				return xmm_register ( _mm_mul_epu32( m_m128, param_reg.m_m128 ));
			}

			_ROAST_FORCE_INLINE xmm_register operator / (const xmm_register &param_reg){
				/*xmm_register r;
				r.m_m128 = _MAKE_INTRIN(div) (m_m128, param_reg.m_m128);
				return r;*/
				xmm_register r(m_m128);
				r /= param_reg;
				return r;
			}

			_ROAST_FORCE_INLINE xmm_register& operator /= (const xmm_register &param_reg){
				/*m_m128 = _MAKE_INTRIN(div) (m_m128, param_reg.m_m128);*/

				if ( _ROAST_IS_ON_SSE )
				{
					xmm_register<float,_PERMIT_SIMD> fRegThis(*this);
					xmm_register<float,_PERMIT_SIMD> fRegParam(param_reg);
					fRegThis /= fRegParam;
					set( fRegThis.to_ints().m_m128 );
				}				
				else
				{
					m_m128.m128i_i32[0] /= param_reg.m_m128.m128i_i32[0];
					m_m128.m128i_i32[1] /= param_reg.m_m128.m128i_i32[1];
					m_m128.m128i_i32[2] /= param_reg.m_m128.m128i_i32[2];
					m_m128.m128i_i32[3] /= param_reg.m_m128.m128i_i32[3];
				}

				return *this;
			}

			////////////////////////////////////////////////////////

			//	rZ

			_ROAST_FORCE_INLINE bool operator == (const xmm_register &param_reg)
			{
				//if ( _PERMIT_SIMD >= ROAST_SIMD_TYPE_SSE4 )
				//	return _mm_testz_si128(m_m128, param_reg.m_m128) != 0 ? true : false;
				//else
				//{

				__m128i m_work = _MAKE_INTRIN(cmpeq) (m_m128, param_reg.m_m128);
				//if ( _mm_movemask_epi8(m_work) == 0xf )
				//printf("%d", _mm_movemask_epi8(m_work));
				if ( _mm_movemask_epi8(m_work) == 0xffff )
					return true;
				else
					return false;

				//}
			}

			_ROAST_FORCE_INLINE bool operator != (const xmm_register &param_reg)
			{
				//return _mm_cmpneq_ps(m_m128, param_reg.m_m128) != 0 ? true : false;
				
				/*	Ȃ炵
				__m128i m_work = _MAKE_INTRIN(cmpneq) (m_m128, param_reg.m_m128);
				if ( _mm_movemask_epi8(m_work) )
					return true;
				else
					return false;
				*/

				/*__m128i m_work = _MAKE_INTRIN(cmpeq) (m_m128, param_reg.m_m128);
				if ( _mm_movemask_epi8(m_work) != 0xffff )
					return true;
				else
					return false;*/

				return ( operator == ( param_reg ) ? false : true );
			}

			////

			//	<
			_ROAST_FORCE_INLINE xmm_register<bool,_PERMIT_SIMD>
				operator < (const xmm_register &param_reg)
			{
				xmm_register r;
				r.m_m128 = _MAKE_INTRIN(cmplt) (m_m128, param_reg.m_m128);
				return r;
			}

			//	<=
			_ROAST_FORCE_INLINE xmm_register<bool,_PERMIT_SIMD>
				operator <= (const xmm_register &param_reg)
			{
				xmm_register r;
				r.m_m128 = _MAKE_INTRIN(cmple) (m_m128, param_reg.m_m128);
				return r;
			}

			//	>
			_ROAST_FORCE_INLINE xmm_register<bool,_PERMIT_SIMD>
				operator > (const xmm_register &param_reg)
			{
				xmm_register r;
				r.m_m128 = _MAKE_INTRIN(cmpgt) (m_m128, param_reg.m_m128);
				return r;
			}

			//	>=
			_ROAST_FORCE_INLINE xmm_register<bool,_PERMIT_SIMD>
				operator >= (const xmm_register &param_reg)
			{
				xmm_register r;
				r.m_m128 = _MAKE_INTRIN(cmpge) (m_m128, param_reg.m_m128);
				return r;
			}

			////

			//	! (this < param_reg)
			_ROAST_FORCE_INLINE xmm_register<bool,_PERMIT_SIMD>
				not_lt (const xmm_register &param_reg)
			{
				THIS_xmm_register r;
				r.m_m128 = _MAKE_INTRIN(cmpnlt) (m_m128, param_reg.m_m128);
				return r;
			}

			//	! (this <= param_reg)
			_ROAST_FORCE_INLINE xmm_register<bool,_PERMIT_SIMD>
				not_lteq (const xmm_register &param_reg)
			{
				xmm_register r;
				r.m_m128 = _MAKE_INTRIN(cmpnle) (m_m128, param_reg.m_m128);
				return r;
			}

			//	! (this > param_reg)
			_ROAST_FORCE_INLINE xmm_register<bool,_PERMIT_SIMD>
				not_gt (const xmm_register &param_reg)
			{
				xmm_register r;
				r.m_m128 = _MAKE_INTRIN(cmpngt) (m_m128, param_reg.m_m128);
				return r;
			}

			//	! (this >= param_reg)
			_ROAST_FORCE_INLINE xmm_register<bool,_PERMIT_SIMD>
				not_gteq (const xmm_register &param_reg)
			{
				xmm_register r;
				r.m_m128 = _MAKE_INTRIN(cmpnge) (m_m128, param_reg.m_m128);
				return r;
			}

			/////////////////////////////////////////////////////////

			//	rbgZ
			_ROAST_FORCE_INLINE xmm_register operator & (const xmm_register &param_reg){
				xmm_register r;
				r.m_m128 = _mm_and_si128 (m_m128, param_reg.m_m128);
				return r;
			}

			_ROAST_FORCE_INLINE xmm_register& operator &= (const xmm_register &param_reg){
				m_m128 = _mm_and_si128 (m_m128, param_reg.m_m128);
				return *this;
			}

			_ROAST_FORCE_INLINE xmm_register operator | (const xmm_register &param_reg){
				xmm_register r;
				r.m_m128 = _mm_or_si128 (m_m128, param_reg.m_m128);
				return r;
			}

			_ROAST_FORCE_INLINE xmm_register& operator |= (const xmm_register &param_reg){
				m_m128 = _mm_or_si128 (m_m128, param_reg.m_m128);
				return *this;
			}

			//	XOR
			_ROAST_FORCE_INLINE xmm_register operator ^ (const xmm_register &param_reg){
				xmm_register r;
				r.m_m128 = _mm_xor_si128 (m_m128, param_reg.m_m128);
				return r;
			}

			_ROAST_FORCE_INLINE xmm_register& operator ^= (const xmm_register &param_reg){
				m_m128 = _mm_xor_si128 (m_m128, param_reg.m_m128);
				return *this;
			}

			//	AndNot
			_ROAST_FORCE_INLINE xmm_register andnot (const xmm_register &param_reg){
				xmm_register r;
				r.m_m128 = _mm_andnot_si128 (m_m128, param_reg.m_m128);
				return r;
			}

			////////////////////////////////////////////////////////////////////

			//	Ȗ

#undef min
#undef max
			//	4lꂼɂāAقg܂
			_ROAST_FORCE_INLINE xmm_register min(const xmm_register &param_reg){
				xmm_register r;
				r.m_m128 = _mm_min_dp (m_m128, param_reg.m_m128);
				return r;
			}

			//	4lꂼɂāA傫قg܂
			_ROAST_FORCE_INLINE xmm_register max(const xmm_register &param_reg){
				xmm_register r;
				r.m_m128 = _mm_max_dp (m_m128, param_reg.m_m128);
				return r;
			}

			//	4lꂼ̕߂܂
			_ROAST_FORCE_INLINE xmm_register sqrt(const xmm_register &param_reg){
				xmm_register r;
				r.m_m128 = _mm_sqrt_dp (m_m128, param_reg.m_m128);
				return r;
			}

			//	4lꂼ̋̕t߂܂
			/*_ROAST_FORCE_INLINE xmm_register rsqrt(const xmm_register &param_reg){
				xmm_register r;
				r.m_m128 = _mm_rsqrt_ps(m_m128, param_reg.m_m128);
				return r;
			}*/

			//	4lꂼ̋t߂܂
			/*_ROAST_FORCE_INLINE xmm_register reciprocal(const xmm_register &param_reg){
				xmm_register r;
				r.m_m128 = _mm_rcp_ps(m_m128, param_reg.m_m128);
				return r;
			}*/

			//	1,3̒lZA2,3̒lZ܂
			_ROAST_FORCE_INLINE xmm_register sub13add24(const xmm_register &param_reg){
				xmm_register r;
				if ( _PERMIT_SIMD >= ROAST_SIMD_TYPE_SSE3 && _PERMIT_SIMD <= ROAST_SIMD_TYPE_SSE_END )
				{
					r.m_m128 = _MAKE_INTRIN(addsub) (m_m128, param_reg.m_m128);
				}
				return r;
			}

			//////////////////////////////////////////////////////////////////////////////////////////

			_ROAST_FORCE_INLINE void shuffle (unsigned char bits){
				switch(bits)
				{
				case 0: m_m128 = _mm_shuffle_epi32( m_m128, 0 ); break;

				//	1`251͎
				_ROAST_MAKE_SHUFFLES( _mm_shuffle_epi32, m_m128, m_m128 )
					
				case 252: m_m128 = _mm_shuffle_epi32( m_m128, 252 ); break;
				case 253: m_m128 = _mm_shuffle_epi32( m_m128, 253 ); break;
				case 254: m_m128 = _mm_shuffle_epi32( m_m128, 254 ); break;
				case 255: m_m128 = _mm_shuffle_epi32( m_m128, 255 ); break;
				}
			}

			_ROAST_FORCE_INLINE void shuffle (const INT_xmm_register& from, unsigned char bits){
				switch(bits)
				{
				case 0: m_m128 = _mm_shuffle_epi32( from.get_m128i(), 0 ); break;

				//	1`251͎
				_ROAST_MAKE_SHUFFLES( _mm_shuffle_epi32, m_m128, from.get_m128i() )
					
				case 252: m_m128 = _mm_shuffle_epi32( from.get_m128i(), 252 ); break;
				case 253: m_m128 = _mm_shuffle_epi32( from.get_m128i(), 253 ); break;
				case 254: m_m128 = _mm_shuffle_epi32( from.get_m128i(), 254 ); break;
				case 255: m_m128 = _mm_shuffle_epi32( from.get_m128i(), 255 ); break;
				}
			}

			_ROAST_FORCE_INLINE xmm_register get_shuffle (unsigned char bits){
				xmm_register r;
				switch(bits)
				{
				case 0: r.m_m128 = _mm_shuffle_epi32( m_m128, 0 ); break;

				//	1`251͎
				_ROAST_MAKE_SHUFFLES( _mm_shuffle_epi32, r.m_m128, m_m128 )
					
				case 252: r.m_m128 = _mm_shuffle_epi32( m_m128, 252 ); break;
				case 253: r.m_m128 = _mm_shuffle_epi32( m_m128, 253 ); break;
				case 254: r.m_m128 = _mm_shuffle_epi32( m_m128, 254 ); break;
				case 255: r.m_m128 = _mm_shuffle_epi32( m_m128, 255 ); break;
				}

				return r;
			}
		};

	}
}

#undef _MAKE_INTRIN
#undef _MAKE_INTRIN_


#endif//__SFJP_OPENMGL_roast_simd_int_xmm_register_HPP__
