#include "HDR.fx"

cbuffer CSConstants : register(b0)
{
	float g_Exposure : packoffset(c0.x);
};

Texture2D<float4>	g_TextureInput	: register(t0);
RWTexture2D<float4>	g_TextureOutput : register(u0);

SamplerState PointSampler : register(s0);

static const int MaxSampleRadius	= 10;
static const int GridSize			= 128;
static const int ApronSize			= MaxSampleRadius;
static const int TGSize				= GridSize + (ApronSize * 2);

groupshared float4 Samples[TGSize];

[numthreads(TGSize, 1, 1)]
void CSDOFBlur(uint3 GroupID : SV_GroupID, uint3 DispatchThreadID : SV_DispatchThreadID, uint3 GroupThreadID : SV_GroupThreadID, uint GroupIndex : SV_GroupIndex)
{

	int gridStartX	= GroupID.x * GridSize;
	int gridX		= GroupThreadID.x - ApronSize;

	int sampleX		= gridStartX + gridX;
	int sampleY		= GroupID.y;

	int inputWidth, inputHeight;
	g_TextureInput.GetDimensions(inputWidth, inputHeight);

	float2 textureSize	= float2(inputWidth, inputHeight);
	float2 samplePos	= (float2(sampleX, sampleY) + 0.5f) / textureSize;

	Samples[GroupThreadID.x] = g_TextureInput.SampleLevel( PointSampler, samplePos, 0 );  

	GroupMemoryBarrierWithGroupSync();

	if ( gridX >= 0 && gridX < GridSize && sampleX < inputWidth)
	{
		float4 p = 0;
		for (int i = -MaxSampleRadius; i <= MaxSampleRadius; i++)
		{
			p += Samples[GroupThreadID.x + i];
		}
		p /= MaxSampleRadius * 2 + 1;

		g_TextureOutput[int2(sampleX,sampleY)] = p;
	}

}

