
#ifndef RAYTRACE_COMMONS_H
#define RAYTRACE_COMMONS_H

#include <shaders/commons_hlsl.glsl>
#include "raytrace_setup.glsl"

uint rt_calculate_grid_cell_idx(uint cx, uint cy, uint cz)
{
	return cx + cy * GRID_RES + cz * GRID_RES  * GRID_RES;
}

uint rt_calculate_grid_high_cell_idx(uint cx, uint cy, uint cz)
{
	return (cx/4) + (cy/4) * (GRID_RES/4) + (cz/4) * (GRID_RES/4)  * (GRID_RES/4);
}

#ifndef RT_READ_ONLY
void mark_high_level_cell_occupied(uint cx, uint cy, uint cz)
{
	// this is lazy for now. maybe we can reuse this data?
	// NOTE: maybe count faces and use this count as possible occlusion value?
	uint idx = rt_calculate_grid_high_cell_idx(cx, cy, cz);
	in_buckets.sizes_high[idx] = 1;
}

void mark_high_level_cell_empty(uint cx, uint cy, uint cz)
{
	// this is lazy for now. maybe we can reuse this data?
	// NOTE: maybe count faces and use this count as possible occlusion value?
	uint idx = rt_calculate_grid_high_cell_idx(cx, cy, cz);
	in_buckets.sizes_high[idx] = 0;
}
#endif

uint is_high_level_cell_occupied(uint cx, uint cy, uint cz)
{
	uint idx = rt_calculate_grid_high_cell_idx(cx, cy, cz);
	return in_buckets.sizes_high[idx];
}

// -- faces link list handling --------------------------------------------------------------------------------------
// NOTE: This split is just to workaround nvidia ogl retardation. It fails to handle large arrays in SSBOs.

struct link_list_node
{
    uint value;
    uint next;
};

// each link list starts at index assigned to the cell index
layout (std430) BUFF_ATTR buffer FacesLinkedListBuffer {
    
	uint buffer_counter;                                    // global 'allocation' counter
    link_list_node node_buffer[];                           // take next one to 'allocate' nodes
} in_faces_list_data;

layout (std430) BUFF_ATTR buffer FacesLinkedListTailsBuffer {
    uint tails[];											// tail for each list. head starts at index x + y * GRID_RES + z * GRID_RES * GRID_RES;
} in_faces_list_tails_data;

#ifndef RT_READ_ONLY
void link_list_init(uint list_index)
{
    // this is called in separate pass so lets be lazy. maybe it will be worth to optimize this
    in_faces_list_tails_data.tails[list_index] = list_index;
}

void link_list_push_value(uint list_index, uint value)
{
    // allocate
    uint cntr = atomicAdd(in_faces_list_data.buffer_counter, 1);
    // we still need to provide proper linking atomically.
    // if there is a race betwee allocate and insertion then it will still work. just the indices will not grow but might be mixed
    uint tail = atomicExchange(in_faces_list_tails_data.tails[list_index], cntr);
    // new value
    in_faces_list_data.node_buffer[tail].next = cntr;
    in_faces_list_data.node_buffer[tail].value = value;    

    // counter for entries
    atomicAdd(in_buckets.sizes[list_index], 1);
}
#endif

// ------------------------------------------------------------------------------------------------------------------

vec3 rt_get_vertex(uint idx)
{
	uint coord_offset = idx * VERTEX_COORD_STRIDE_FLOATS + VERTEX_COORD_OFFSET_FLOATS;
	vec3 p = vec3(in_vtx_data.data[coord_offset + 0], in_vtx_data.data[coord_offset + 1], in_vtx_data.data[coord_offset + 2]);

	return p;
}

// NOTE: Needs to match put_normal_raytrace() in 0texture_geometry_transformation.gsh
vec3 rt_get_vertex_normal(uint idx)
{
	uint normal_offset = idx * VERTEX_NORMAL_STRIDE_FLOATS + VERTEX_NORMAL_OFFSET_FLOATS;
	uint n1 = asuint(in_vtx_data.data[normal_offset + 0]);
	uint n2 = asuint(in_vtx_data.data[normal_offset + 1]);
	vec3 p = vec3(unpackSnorm2x16(n1).xy,unpackSnorm2x16(n2).x); 
	return p;
}

vec2 rt_get_vertex_uv0(uint idx)
{
	uint uv0_offset = idx * VERTEX_UV0_STRIDE_FLOATS + VERTEX_UV0_OFFSET_FLOATS;
	vec2 p = vec2(in_vtx_data.data[uv0_offset + 0], in_vtx_data.data[uv0_offset + 1]);
	return p;
}

// NOTE: We are currently storing material per face, so TODO: Rename these consts
int rt_get_triangle_material(uint idx)
{
	uint material_offset = idx * VERTEX_MATERIAL_STRIDE_FLOATS + VERTEX_MATERIAL_OFFSET_FLOATS;
	int p = int(in_vtx_data.data[material_offset]);
	return p;
}

// ------------------------------------------------------------------------------------------------------------------

vec3 rt_barycentric_xyz(vec3 p, vec3 a, vec3 b, vec3 c)
{
	vec3 bc;
	vec3 v0 = b - a, v1 = c - a, v2 = p - a;
	float d00 = dot(v0, v0);
	float d01 = dot(v0, v1);
	float d11 = dot(v1, v1);
	float d20 = dot(v2, v0);
	float d21 = dot(v2, v1);
	float denom = d00 * d11 - d01 * d01;
	bc.y = (d11 * d20 - d01 * d21) / denom;
	bc.z = (d00 * d21 - d01 * d20) / denom;
	bc.x = 1.0f - bc.z - bc.y;
	return bc;
}

vec2 rt_barycentric_yz(vec3 p, vec3 a, vec3 b, vec3 c)
{
	vec3 bc;
	vec3 v0 = b - a, v1 = c - a, v2 = p - a;
	float d00 = dot(v0, v0);
	float d01 = dot(v0, v1);
	float d11 = dot(v1, v1);
	float d20 = dot(v2, v0);
	float d21 = dot(v2, v1);
	float denom = d00 * d11 - d01 * d01;
	bc.y = (d11 * d20 - d01 * d21) / denom;
	bc.z = (d00 * d21 - d01 * d20) / denom;
	return bc.yz;
}

// ------------------------------------------------------------------------------------------------------------------
// Try Min() for color and MaX() for occlusion. This should reduce light leaking (maybe)

void rt_store_voxel_color(uint cx, uint cy, uint cz, vec4 v)
{
	uint idx = rt_calculate_grid_cell_idx(cx, cy, cz);
	uint vi;
	float v_mag = ceil(length(v.xyz));
	v.xyz = v.xyz / v_mag;
	v_mag = min(127.0, v_mag);

	// encode
	vi = uint(v.x * 255.0) | (uint(v.y * 255.0) << 8) |( uint(v.z * 255.0) << 16);
	vi |= uint(v_mag) * (1 << 24); 
	vi |= 1 << 31; // marker that it was used, 1 bit
	uint prev_vi = atomicCompSwap(in_voxel_light_data.color[idx], 0, vi);
	if (prev_vi != 0)
		atomicMin(in_voxel_light_data.color[idx], vi);
}

void rt_store_voxel_occlusion(uint cx, uint cy, uint cz, float v)
{
	uint idx = rt_calculate_grid_cell_idx(cx, cy, cz);
	atomicMax(in_voxel_occlusion_data.occlusion[idx], uint(v * 1024.0));
}

#if 0
void rt_store_voxel_normal(uint cx, uint cy, uint cz, vec3 v)
{
	uint idx = rt_calculate_grid_cell_idx(cx, cy, cz);
	uint vi = encode_normal_32bit(v);
	atomicMax(in_voxel_normal_data.normal[idx], vi);
}
#endif

vec4 rt_voxel_color_decode(uint vi)
{
	vec4 v = vec4(0.0);

	if ((vi >> 31) > 0)
	{
		uint v_mag = (vi >> 24) & 0x7f;
		float v_mag_rcp = (1.0 / 255.0) * float(v_mag);

		v.x = float((vi >> 0) & 0xff) * v_mag_rcp;
		v.y = float((vi >> 8) & 0xff) * v_mag_rcp;
		v.z = float((vi >> 16) & 0xff) * v_mag_rcp;
		v.w = 1.0;
	}
	return v;
}

vec4 rt_get_voxel_color(uint cx, uint cy, uint cz)
{
	uint idx = rt_calculate_grid_cell_idx(cx, cy, cz);
	uint vi = in_voxel_light_data.color[idx];
	vec4 v = rt_voxel_color_decode(vi);
	return v;
}

vec4 rt_get_voxel_color(int idx)
{
	uint vi = in_voxel_light_data.color[idx];
	vec4 v = rt_voxel_color_decode(vi);
	return v;
}

float rt_get_voxel_occlusion(uint cx, uint cy, uint cz)
{
	uint idx = rt_calculate_grid_cell_idx(cx, cy, cz);
	uint vi = in_voxel_occlusion_data.occlusion[idx];
	float v = float(vi) / 1024.0;
	return v;
}

float rt_get_voxel_occlusion(int idx)
{
	uint vi = in_voxel_occlusion_data.occlusion[idx];
	float v = float(vi) / 1024.0;
	return v;
}

// ------------------------------------------------------------------------------------------------------------------

float rt_hash(vec2 p)
{
	float h = dot(p,vec2(127.1,311.7));
	return -1.0 + 2.0*fract(sin(h)*43758.5453123);
}

vec3 randomSpherePoint(vec3 rand)
{
	float ang1 = (rand.x + 1.0) * PI; // [-1..1) -> [0..2*PI)
	float u = rand.y; // [-1..1), cos and acos(2v-1) cancel each other out, so we arrive at [-1..1)
	float u2 = u * u;
	float sqrt1MinusU2 = sqrt(1.0 - u2);
	float x = sqrt1MinusU2 * cos(ang1);
	float y = sqrt1MinusU2 * sin(ang1);
	float z = u;
	return vec3(x, y, z);
}

vec3 randomHemispherePoint(vec3 rand, vec3 n)
{
	/**
	 * Generate random sphere point and swap vector along the normal, if it
	 * points to the wrong of the two hemispheres.
	 * This method provides a uniform distribution over the hemisphere, 
	 * provided that the sphere distribution is also uniform.
	*/
	vec3 v = randomSpherePoint(rand);
	return v * sign(dot(v, n));
}

#endif // RAYTRACE_COMMONS_H