//	Roast+ License

//	SIMD

#ifndef __SFJP_OPENMGL_roast__simd__sse2_char_HPP__
#define __SFJP_OPENMGL_roast__simd__sse2_char_HPP__

#include "sse2_ibase.hpp"

namespace roast
{
	namespace simd 
	{

		/*=========================================================*/
		
		#define ROAST_SIMD_M128I_I8_PRINT_FMT		"%d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d"
		#define ROAST_SIMD_M128I_I8_PRINT_3dFMT		"%3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d"
		#define ROAST_SIMD_M128I_I8_PRINT_3DFMT		"%3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d, %3d"
		#define ROAST_SIMD_M128I_I8_PRINT_03dFMT	"%03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d"
		#define ROAST_SIMD_M128I_I8_PRINT_03DFMT	"%03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d, %03d"
		inline void m128i_i8_print(__m128i m, const char* fmt = ROAST_SIMD_M128I_I8_PRINT_FMT)
		{
			printf(fmt
				, m.m128i_i8	[0]
				, m.m128i_i8	[1]
				, m.m128i_i8	[2]
				, m.m128i_i8	[3]
				, m.m128i_i8	[4]
				, m.m128i_i8	[5]
				, m.m128i_i8	[6]
				, m.m128i_i8	[7]
				, m.m128i_i8	[8]
				, m.m128i_i8	[9]
				, m.m128i_i8	[10]
				, m.m128i_i8	[11]
				, m.m128i_i8	[12]
				, m.m128i_i8	[13]
				, m.m128i_i8	[14]
				, m.m128i_i8	[15]
			);
		}
		
		#define ROAST_SIMD_M128I_I8_PRINTU_FMT		"%u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u, %u"
		#define ROAST_SIMD_M128I_I8_PRINTU_02xFMT	"%02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x, %02x"
		#define ROAST_SIMD_M128I_I8_PRINTU_02XFMT	"%02X, %02X, %02X, %02X, %02X, %02X, %02X, %02X, %02X, %02X, %02X, %02X, %02X, %02X, %02X, %02X"
		inline void m128i_i8_printu(__m128i m, const char* fmt = ROAST_SIMD_M128I_I8_PRINTU_FMT)
		{
			printf(fmt
				, (unsigned char)m.m128i_i8	[0]
				, (unsigned char)m.m128i_i8	[1]
				, (unsigned char)m.m128i_i8	[2]
				, (unsigned char)m.m128i_i8	[3]
				, (unsigned char)m.m128i_i8	[4]
				, (unsigned char)m.m128i_i8	[5]
				, (unsigned char)m.m128i_i8	[6]
				, (unsigned char)m.m128i_i8	[7]
				, (unsigned char)m.m128i_i8	[8]
				, (unsigned char)m.m128i_i8	[9]
				, (unsigned char)m.m128i_i8	[10]
				, (unsigned char)m.m128i_i8	[11]
				, (unsigned char)m.m128i_i8	[12]
				, (unsigned char)m.m128i_i8	[13]
				, (unsigned char)m.m128i_i8	[14]
				, (unsigned char)m.m128i_i8	[15]
			);
		}

		/*  SSE2: 8bit integer base  */
		class _sse2_i8 : public _sse2_i
		{
		public:
			//__m128i zerofill(){ return _mm_setzero_si128(); }
			__m128i fill(char c){ return _mm_set1_epi8(c); }
			
			void print(__m128i m, const char* fmt = ROAST_SIMD_M128I_I8_PRINT_FMT){
				m128i_i8_print(m, fmt); }
			void printu(__m128i m, const char* fmt = ROAST_SIMD_M128I_I8_PRINT_FMT){
				m128i_i8_printu(m, fmt); }
			
			//	==
			_REG_RET cmpeq(_REG_PARAM m1, _REG_PARAM m2){ return _cmpeq_epi8(m1,m2); }
			//	<
			_REG_RET cmplt(_REG_PARAM m1, _REG_PARAM m2){ return _mm_cmplt_epi8(m1,m2); }
			//	>
			_REG_RET cmpgt(_REG_PARAM m1, _REG_PARAM m2){ return _mm_cmpgt_epi8(m1,m2); }
		};
		
		/*=========================================================*/

		/*  SSE2: unsigned char  */
		class _sse2_uchar : public _sse2_i8
		{
		public:
			_REG_T add(_REG_T m1, _REG_T m2){ return _mm_add_epi8(m1,m2); }
			_REG_T sub(_REG_T m1, _REG_T m2){ return _mm_sub_epi8(m1,m2); }
			/*_REG_T mul(_REG_T m1, _REG_T m2){ return _mm_mul_pi8(m1,m2); }*/
			/*_REG_T div(_REG_T m1, _REG_T m2){ return _mm_div_pi8(m1,m2); }*/
		};
		
		/*=========================================================*/

		/*  SSE2: saturated signed char  */
		class _sse2_satu_char : public _sse2_i8
		{
		public:
			_REG_T add(_REG_T m1, _REG_T m2){ return _mm_adds_epi8(m1,m2); }
			_REG_T sub(_REG_T m1, _REG_T m2){ return _mm_subs_epi8(m1,m2); }
			/*_REG_T mul(_REG_T m1, _REG_T m2){ return _mm_mul_pi8(m1,m2); }*/
			/*_REG_T div(_REG_T m1, _REG_T m2){ return _mm_div_pi8(m1,m2); }*/
		};

		/*=========================================================*/

		/*  SSE2: saturated unsigned char  */
		class _sse2_satu_uchar : public _sse2_i8
		{
		public:
			_REG_T add(_REG_T m1, _REG_T m2){ return _mm_adds_epu8(m1,m2); }
			_REG_T sub(_REG_T m1, _REG_T m2){ return _mm_subs_epu8(m1,m2); }
			/*_REG_T mul(_REG_T m1, _REG_T m2){ return _mm_mul_pi8(m1,m2); }*/
			/*_REG_T div(_REG_T m1, _REG_T m2){ return _mm_div_pi8(m1,m2); }*/
			
			_REG_T add_div2(_REG_T m1, _REG_T m2){ return _mm_avg_epu8(m1,m2); }
			_REG_T bigger(_REG_T m1, _REG_T m2){ return _mm_max_epu8(m1,m2); }
			_REG_T smaller(_REG_T m1, _REG_T m2){ return _mm_min_epu8(m1,m2); }
			
			/*========================================================*/
			/*
				SAD (=Sum of Absolute Differences) calculation.
			
				Separated 0-7,8-15. Double SAD calculation.
			
				#define abs(X)	( X < 0 ? X * -1 : X )
				ret.m128i_i64[0] = abs(m1.m128i_i8[0] - m2.m128i_i8[0]) +
				                   abs(m1.m128i_i8[1] - m2.m128i_i8[1]) +
				                   ...
				                   abs(m1.m128i_i8[7] - m2.m128i_i8[7]);
				                   
				ret.m128i_i64[1] = abs(m1.m128i_i8[8] - m2.m128i_i8[8]) +
				                   abs(m1.m128i_i8[9] - m2.m128i_i8[9]) +
				                   ...
				                   abs(m1.m128i_i8[15] - m2.m128i_i8[15]);
			*/
			_REG_T sad(_REG_T m1, _REG_T m2){ return _mm_sad_epu8(m1,m2); }
		};

		/*=========================================================*/
	}
}

#endif//__SFJP_OPENMGL_roast__simd__sse2_char_HPP__
