
// Requires s_BlueNoise import

#include <shaders/materials/commons_sphere_sampling.glsl>

const vec3 CONES[] = 
{
	vec3(0.57735, 0.57735, 0.57735),
	vec3(0.57735, -0.57735, -0.57735),
	vec3(-0.57735, 0.57735, -0.57735),
	vec3(-0.57735, -0.57735, 0.57735),
	vec3(-0.903007, -0.182696, -0.388844),
	vec3(-0.903007, 0.182696, 0.388844),
	vec3(0.903007, -0.182696, 0.388844),
	vec3(0.903007, 0.182696, -0.388844),
	vec3(-0.388844, -0.903007, -0.182696),
	vec3(0.388844, -0.903007, 0.182696),
	vec3(0.388844, 0.903007, -0.182696),
	vec3(-0.388844, 0.903007, 0.182696),
	vec3(-0.182696, -0.388844, -0.903007),
	vec3(0.182696, 0.388844, -0.903007),
	vec3(-0.182696, 0.388844, 0.903007),
	vec3(0.182696, -0.388844, 0.903007)
};

#define NOISE_SPEED 33.0

vec4 rt_cone_trace_occlusion(
	ivec2 screen_pos,
	sampler3D voxel_color_texture,
	sampler3D voxel_occupancy_texture,
	vec3 grid_size,
	int grid_res,
	vec3 grid_origin,
	vec3 origin,
	vec3 surface_normal,
	vec3 dir,
	float range,
	float lightTraceDistanceScale,
	float16_t roughness,
	int rays,
	float color_falloff,
	float occlusion_falloff,
	float trace_range)
{
	if (range <= 0.0)
		return vec4(0.0);

	//screen_pos = screen_pos & ivec2(7, 7);

#if 0
	{
		vec3 grid_p = (origin - grid_origin) / grid_size;

		//if ((grid_p.z + 0.5) / grid_res < 0.0)
		//	return vec4(1.0, 0.0, 0.0, 0.0);

		//return vec4((grid_p.zzz + 0.5) / grid_res, 1.0);
		return textureLod(voxel_texture, (grid_p + 0.5) / grid_res, 0.0);
	}
#endif

	float16_t occ = 0.0;
	f16vec3 occ_color = f16vec3(0.0);

	// diffuse
	origin += surface_normal * range * 3.0; // TODO: this has to depend also on the mip level being sampled

	mat3 vecSpace = matrixFromVector(dir);

	for(int si = 0; si < rays; si+=1)
	{
		vec3 p = origin;
		f16vec3 c_color = f16vec3(0.0);
		float16_t c_occlusion = float16_t(0.0);
		float base_d = 0.0;

		#if 1
			const float golden_ratio = 1.61803398875;
			int frame = g_monotonic;
			#if 0
				vec2 hash = fract(texelFetch(s_BlueNoise, ivec3(screen_pos.xy, 0), 0).rg + frame * golden_ratio);
				vec3 d = randomHemispherePoint(vec3(hash.x, hash.y, 0.0), dir);
				//vec3 d = CosineSampleHemisphere(hash.x * roughness, hash.y);
				//d = normalize(mix(d, dir, 1.0 - roughness));
				//base_d = 0.5 + hash.x; // this one should reduce popup when switching between voxels
				//d = vecSpace * d;

			#else // cosine weighted sampling
				
				vec2 hash = fract(texelFetch(s_BlueNoise, ivec3(screen_pos.xy, si), 0).rg + frame * golden_ratio);
				//vec2 hash = texelFetch(s_BlueNoise, ivec3(screen_pos.xy, int(si + g_global_time * NOISE_SPEED) % 16), 0).rg;
				//vec2 hash = textureLod(s_BlueNoise, vec3(screen_pos.xy / vec2(64.0), si), 0.0).rg;
				vec3 d = CosineSampleHemisphere(hash.x * roughness, hash.y);
				//vec3 d = vec3(0.0, 0.0, -1.0);
				d = vecSpace * d;
				base_d = 0.5 + hash.x; // this one should reduce popup when switching between voxels

				//d = dir;

			#endif

		#else

			vec2 hash = texelFetch(s_BlueNoise, ivec3(screen_pos.xy, (si + int(g_global_time * NOISE_SPEED)) % 15), 0).rg;
			vec3 d = CONES[int(hash.x * 16.0)];//randomHemispherePoint(vec3(hash.x, hash.y, 0.0), dir);
			d = normalize(mix(d, dir, 1.0 - roughness));
			//d = vecSpace * d;

		#endif

		float lod = 1.5;
		float l = base_d * 0.5;
		float max_l = 0.0;

	#if 0
		for(int j = 0; j < 16; j++)
		{
			for(int i = 0; i < 4; i++)
			{
				vec3 grid_p = (p + d * l - grid_origin) / grid_size;
				vec3 sample_p = (grid_p + 0.5) / grid_res;
				vec4 gc = textureLod(voxel_colors_texture, sample_p, min(1.45, lod));

				if (gc.a > 0.75)
					gc.a = 1.0;

				float blend = 1.0 - c.a;
				c.a += gc.a * blend;
				c.rgb += gc.rgb * blend;
				l += range;
				//lod = clamp(lod * 1.275, 0.0, 1.5);

//				if (c.a >= 0.99)
//					break;
			}

			if (c.a >= 0.99)
			{
				max_l = l;
				break;
			}
		}
	#else
		float coneAperture = tan(roughness * M_PI * 0.5f * 0.1f);
		float range_rcp = 1.0 / range;
		int steps = 0;
		const float occlusion_threshold = 0.75;

		bool is_inside = false;

		// NOTE: Adding hard bound for number of steps make it much slower... 
		while (l < trace_range && c_occlusion < float16_t(1.0))
		{
			float diameter = max(range, 2.0 * coneAperture * l);
			float mip = min(3.5, log2(diameter * range_rcp));

			vec3 grid_p = (p + d * l - grid_origin) / grid_size;
			vec3 sample_p = (grid_p + 0.5) / grid_res;

			if (all(greaterThanEqual(sample_p, vec3(0.0))) && all(lessThan(sample_p, vec3(1.0))))
			{
				is_inside = true;
			}

			if (is_inside && (all(lessThan(sample_p, vec3(0.0))) || all(greaterThanEqual(sample_p, vec3(1.0)))))
			{
				is_inside = false;
				break;
			}

			if (is_inside)
			{
				// Idea is that because of filtering we have accumulated alpha values, so this check seems to work fine
				f16vec4 gc = f16vec4(textureLod(voxel_occupancy_texture, sample_p, min(3.5, mip + 1.0)).rrrr); // check lower mip to see if we would have anything

				if (gc.a > 0.004)
				{
					gc.a = float16_t(textureLod(voxel_occupancy_texture, sample_p, mip).r);
					gc.rgb = f16vec3(color_convert_rgbm_rgb(textureLod(voxel_color_texture, sample_p, mip).rgba));
				}

				if (gc.a > occlusion_threshold)
					gc.a = 1.0;

				float16_t blend = 1.0 - c_occlusion;
				c_occlusion = min(1.0, c_occlusion + gc.a); // * blend;
				c_color += gc.rgb * blend;
			}

			l += diameter * lightTraceDistanceScale;
			//max_l = max(l, max_l);

			if (c_occlusion >= 0.995)
				break;
		}
	#endif

		c_occlusion = min(1.0, c_occlusion);

		float power = 0.75;
		// occ += c_occlusion / pow(l * occlusion_falloff, power);
		occ += float16_t(c_occlusion / pow(l * occlusion_falloff, power));
		// 1: basic formula. modulate by angle and falloff (distance)
		occ_color += c_color * float16_t(dot(d, dir) / pow(l * color_falloff, power));
		//occ_color += c_color * float16_t(dot(d, dir) / pow(l * color_falloff, power));
		//occ_color += c_color / pow(l * color_falloff, power); // modulate by angle
	}

	occ = occ / float16_t(rays);
	occ_color = occ_color / float16_t(rays);

	// 'artsy' way starts here. We are probably reaching some crappy rounding here...
	// TODO: Make the clamp configurable either here or in the composite pass
	if (occ > 1.0)
		occ = 1.0;

	//occ = occ * occ;

	return vec4(occ_color, occ);
}

vec3 rt_cone_trace_specular(vec2 screen_pos, sampler3D voxel_texture, vec3 grid_size, int grid_res, vec3 grid_origin, vec3 origin, vec3 normal, vec3 dir, float range, float roughness)
{
	float occ = 0.0;
	vec3 occ_color = vec3(0.0);

	origin -= normal * length(grid_size) * 2.75; // TODO: this has to depend also on the mip level being sampled

	float hash1 = rt_hash(screen_pos.xy);
	float hash2 = rt_hash(screen_pos.yx);

	const uint RAYS = 1;

	for(int si = 0; si < RAYS; si+=1)
	{
		vec3 p = origin;
		vec4 c = vec4(0.0);
		vec3 d = randomHemispherePoint(vec3(hash1, hash2, 0.0), dir);
		d = dir;//normalize(mix(d, dir, roughness));

		float lod = 0.0;

		hash1 = rt_hash(screen_pos.xy + vec2(hash1, hash2));
		hash2 = rt_hash(screen_pos.yx + vec2(hash2, hash1));

		for(int i = 0; i < 16; i++)
		{
			vec3 grid_p = (p - grid_origin) / grid_size;
			vec4 gc = textureLod(voxel_texture, (grid_p + 0.5) / grid_res, min(lod, 2.0));

			//if (gc.a > 0.5)
			//	gc.a = 1.0;

			float blend = 1.0 - c.a;
			c.a   += gc.a * max(0.0, blend);
			//c.a = min(1.0, c.a);

			c.rgb += gc.rgb * max(0.0, blend);
			//if (c.a >= 0.5)
			//	break;
			p = origin + i * d * range;
			lod *= 1.05;

		}
		occ += c.a;
		occ_color += c.rgb;

		//if (c.a > 0.0)
			occ += c.a;

	}

	return vec3(occ_color / RAYS);
	#undef RAYS
}