#version 430 compatibility

#extension GL_ARB_compute_shader : enable
#extension GL_ARB_shader_storage_buffer_object : enable

/*-------------------- LBM model data -------------------------------------------------------------------------*/
#define width 640
#define height 320
#define NUM_VECTORS 9
#define tau 1.2 //0.566
#define omega (1.0/tau)
#define ex0 0
#define ex1 1
#define ex2 0
#define ex3 -1
#define ex4 0
#define ex5 1
#define ex6 -1
#define ex7 -1
#define ex8 1
#define ey0 0
#define ey1 0
#define ey2 1
#define ey3 0
#define ey4 -1
#define ey5 1
#define ey6 1
#define ey7 -1
#define ey8 -1
#define inv0 0
#define inv1 3
#define inv2 4
#define inv3 1
#define inv4 2
#define inv5 7
#define inv6 8
#define inv7 5
#define inv8 6
#define w0 (4.0/9.0)
#define w1 (1.0/9.0)
#define w2 (1.0/9.0)
#define w3 (1.0/9.0)
#define w4 (1.0/9.0)
#define w5 (1.0/36.0)
#define w6 (1.0/36.0)
#define w7 (1.0/36.0)
#define w8 (1.0/36.0)
#define C_FLD 1
#define C_BND 0
/*-------------------------------------------------------------------------------------------------------*/



layout( binding=0 ) buffer dcF { int F[  ]; };
layout( binding=1 ) buffer dcU { float U[  ]; };
layout( binding=2 ) buffer dcV { float V[  ]; };
layout( binding=3 ) buffer dcR { float R[  ]; };
layout( binding=4 ) buffer df { float f0[  ]; };

layout(location = 10) uniform float devFx;
layout(location = 11) uniform float devFy;

layout( local_size_x = 32, local_size_y = 32, local_size_z = 1 ) in;

void main()
{
	uint i = gl_GlobalInvocationID.x;
	uint j = gl_GlobalInvocationID.y;
			 
	uint idx = i+j*width;
	
	float feq;	
	float rho = 0;
	float u = 0;
	float v = 0;


	if( F[ idx ] == C_FLD )
	{	
		rho = f0[idx*NUM_VECTORS] + f0[1+idx*NUM_VECTORS] + f0[2+idx*NUM_VECTORS] + f0[3+idx*NUM_VECTORS] + f0[4+idx*NUM_VECTORS] + f0[5+idx*NUM_VECTORS] + f0[6+idx*NUM_VECTORS] + f0[7+idx*NUM_VECTORS] + f0[8+idx*NUM_VECTORS];
		u  = f0[idx*NUM_VECTORS]*ex0 + f0[1+idx*NUM_VECTORS]*ex1 + f0[2+idx*NUM_VECTORS]*ex2 + f0[3+idx*NUM_VECTORS]*ex3 + f0[4+idx*NUM_VECTORS]*ex4 + f0[5+idx*NUM_VECTORS]*ex5 + f0[6+idx*NUM_VECTORS]*ex6 + f0[7+idx*NUM_VECTORS]*ex7 + f0[8+idx*NUM_VECTORS]*ex8;
		v  = f0[idx*NUM_VECTORS]*ey0 + f0[1+idx*NUM_VECTORS]*ey1 + f0[2+idx*NUM_VECTORS]*ey2 + f0[3+idx*NUM_VECTORS]*ey3 + f0[4+idx*NUM_VECTORS]*ey4 + f0[5+idx*NUM_VECTORS]*ey5 + f0[6+idx*NUM_VECTORS]*ey6 + f0[7+idx*NUM_VECTORS]*ey7 + f0[8+idx*NUM_VECTORS]*ey8;

		u = u / rho;
		v = v / rho;

		U[idx] = u;
		V[idx] = v;		

		if( F[ idx ] == C_FLD )
		{
			u = u + 0.5 * devFx ;
			v = v + 0.5 * devFy ;
		}

		feq =  w0 * rho *	(1.0 - (3.0/2.0) * (u*u + v*v) + 3.0 * (ex0 * u + ey0*v) + (9.0/2.0) * (ex0 * u + ey0*v) * (ex0 * u + ey0*v));
		f0[idx*NUM_VECTORS] = (1-omega) * f0[idx*NUM_VECTORS] + omega*feq;	
		feq =  w1 * rho *	(1.0 - (3.0/2.0) * (u*u + v*v) + 3.0 * (ex1 * u + ey0*v) + (9.0/2.0) * (ex1 * u + ey1*v) * (ex1 * u + ey1*v));
		f0[1+idx*NUM_VECTORS] = (1-omega) * f0[1+idx*NUM_VECTORS] + omega*feq;
		feq =  w2 * rho *	(1.0 - (3.0/2.0) * (u*u + v*v) + 3.0 * (ex2 * u + ey2*v) + (9.0/2.0) * (ex2 * u + ey2*v) * (ex2 * u + ey2*v));
		f0[2+idx*NUM_VECTORS] = (1-omega) * f0[2+idx*NUM_VECTORS] + omega*feq;			
		feq =  w3 * rho *	(1.0 - (3.0/2.0) * (u*u + v*v) + 3.0 * (ex3 * u + ey3*v) + (9.0/2.0) * (ex3 * u + ey3*v) * (ex3 * u + ey3*v));
		f0[3+idx*NUM_VECTORS] = (1-omega) * f0[3+idx*NUM_VECTORS] + omega*feq;			
		feq =  w4 * rho *	(1.0 - (3.0/2.0) * (u*u + v*v) + 3.0 * (ex4 * u + ey4*v) + (9.0/2.0) * (ex4 * u + ey4*v) * (ex4 * u + ey4*v));
		f0[4+idx*NUM_VECTORS] = (1-omega) * f0[4+idx*NUM_VECTORS] + omega*feq;			
		feq =  w5 * rho *	(1.0 - (3.0/2.0) * (u*u + v*v) + 3.0 * (ex5 * u + ey5*v) + (9.0/2.0) * (ex5 * u + ey5*v) * (ex5 * u + ey5*v));
		f0[5+idx*NUM_VECTORS] = (1-omega) * f0[5+idx*NUM_VECTORS] + omega*feq;			
		feq =  w6 * rho *	(1.0 - (3.0/2.0) * (u*u + v*v) + 3.0 * (ex6 * u + ey6*v) + (9.0/2.0) * (ex6 * u + ey6*v) * (ex6 * u + ey6*v));
		f0[6+idx*NUM_VECTORS] = (1-omega) * f0[6+idx*NUM_VECTORS] + omega*feq;			
		feq =  w7 * rho *	(1.0 - (3.0/2.0) * (u*u + v*v) + 3.0 * (ex7 * u + ey7*v) + (9.0/2.0) * (ex7 * u + ey7*v) * (ex7 * u + ey7*v));
		f0[7+idx*NUM_VECTORS] = (1-omega) * f0[7+idx*NUM_VECTORS] + omega*feq;			
		feq =  w8 * rho *	(1.0 - (3.0/2.0) * (u*u + v*v) + 3.0 * (ex8 * u + ey8*v) + (9.0/2.0) * (ex8 * u + ey8*v) * (ex8 * u + ey8*v));
		f0[8+idx*NUM_VECTORS] = (1-omega) * f0[8+idx*NUM_VECTORS] + omega*feq;
	}
	
}

