
#define warp_size 32
#define warp_count 8

#ifdef compute

// nicer names (wtf ARB, why gl_loqdwjqiicvoajiojqwiojtlakstjiqowjtioqtjiqo)
#define thread int(gl_LocalInvocationID)
#define gthread int(gl_GlobalInvocationID)

#define block_threads (warp_size * warp_count)

layout (local_size_x = block_threads) in;

buffer position_buffer {
	vec4 position[];
};

buffer velocity_buffer {
	vec4 velocity[];
};

/*buffer draw_buffer {
	vec4 particles[];
};*/

// shmem optimizations, #ELITE
//shared vec4 output_draw[2*block_threads];

uniform sampler3D noise;
uniform int N;
uniform float time;

vec3 perlin(vec3 pos) {
	vec4 grad = texture(noise, pos*.006123)+texture(noise, pos*.007317);
	grad.xyz = normalize(grad.xyz);
	vec3 outward = abs(dot(grad.xyz,vec3(1.0, .0, .0)))>abs(dot(grad.xyz, vec3(.0, 1.0, .0)))?vec3(1.0, .0, .0):vec3(.0, 1.0, .0);
	vec3 tangent = normalize(cross(grad.xyz, outward));
	return cos(grad.a*(time+1.0))*grad.xyz+sin(grad.a*(time+1.0))*tangent;
}

vec3 potential(vec3 pos) {
	vec3 ret = perlin(pos+.1*vec3(time, time, time));
	return ret;
}

vec3 curl(vec3 pos) {
	vec2 eps = vec2(.0, 4.0);
	vec3 dx = potential(pos+eps.yxx)-potential(pos-eps.yxx)/(2.0*eps.y);
	vec3 dy = potential(pos+eps.xyx)-potential(pos-eps.xyx)/(2.0*eps.y);
	vec3 dz = potential(pos+eps.xxy)-potential(pos-eps.xxy)/(2.0*eps.y);
	return vec3(dy.z-dz.y, dz.x-dx.z, dx.y-dy.x);
}

float dt = .0002;

void main() {
	vec4 pos = position[gthread];
	vec4 vel = velocity[gthread];

	//leapfrog master race -- never settle for euler
	pos.xyz += dt*.5*vel.xyz;
	if(time<160.0)
		vel.xyz = .96*vel.xyz+4000.0*curl(pos.xyz)/(.02+length(vel.xyz));
	else
		vel.xyz = .92*vel.xyz+6.0*curl(4.0*pos.xyz)-vec3(.0, 5.5, .0);

	pos.xyz += dt*.5*vel.xyz;

	pos.w -= .015;
	if(pos.w<.0f) {
		pos.xyz = vec3(10000.0f);
		vel.xyz = vec3(.0);
	}

	position[gthread] = pos;
	velocity[gthread] = vel;

	/*vel = normalize(vel)*max(.01, length(vel));
	output_draw[thread*2  ] = pos+dt*.5*vel;
	output_draw[thread*2+1] = pos-dt*.5*vel;

	barrier(); memoryBarrier();

	particles[gl_WorkGroupID.x*2*block_threads+thread] = output_draw[thread];
	particles[(gl_WorkGroupID.x*2+1)*block_threads+thread] = output_draw[gthread+block_threads];*/
}

#endif
