#pragma OPENCL EXTENSION cl_amd_printf : enable

const sampler_t smp = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_REPEAT | CLK_FILTER_LINEAR;

__kernel void ApplyCurl(__global float4 *positions,__global float4 *forces, __read_only image2d_t vol, float4 power)
{
	uint index = get_global_id(0);
	
	float3 p = positions[index].xyz * 0.5 + 0.5;
	//if(index == 0) printf("0 %v4f\n", (float4)(p.x,p.y,p.z,0));
	float d = (1 / 256.0f);

	float fx = read_imagef(vol, smp, p.yz).x;
	//if(index == 0) printf("1 %f\n", fx);
	float fy = read_imagef(vol, smp, p.xz).y;
	//if(index == 0) printf("2 %f\n", fy);
	float fz = read_imagef(vol, smp, p.xy).z;
	//if(index == 0) printf("3 %f\n", fz);

	float fxdy = read_imagef(vol, smp, p.yz + (float2)(d, 0)).x - fx;
	//if(index == 0) printf("4 %f\n", fxdy);
	float fxdz = read_imagef(vol, smp, p.yz + (float2)(0, d)).x - fx;

	float fydx = read_imagef(vol, smp, p.xz + (float2)(d, 0)).y - fy;
	//if(index == 0) printf("6 %f\n", fydx);
	float fydz = read_imagef(vol, smp, p.xz + (float2)(0, d)).y - fy;
	//if(index == 0) printf("7 %f\n", fydz);

	float fzdx = read_imagef(vol, smp, p.xy + (float2)(d, 0)).z - fz;
	//if(index == 0) printf("8 %f\n", fzdx);
	float fzdy = read_imagef(vol, smp, p.xy + (float2)(0, d)).z - fz;
	//if(index == 0) printf("9 %f\n", fzdy);

	float4 vel = (float4)(fzdy - fydz, fxdz - fzdx, fydx - fxdy, 0) * 256 * power;
	//if(index == 0) printf("10 %v4f\n", vel);

	forces[index] += vel;
}