#version 330 core

layout(location = 0, index = 0) out vec4 outcol;

uniform sampler2D tex0;
uniform vec4 blurdir;
uniform vec4 tex0siz; //xy=size in pixels, zw = 1/xy

in vec2 texcoord0_nm;

//triangle
// const float w0 = 16.0f / 46.0f;
// const float w1 = 8.0f / 46.0f;
// const float w2 = 4.0f / 46.0f;
// const float w3 = 2.0f / 46.0f;
// const float w4 = 1.0f / 46.0f;

//gauss
const float w0 = 70.0f / 256.0f;
const float w1 = 56.0f / 256.0f;
const float w2 = 28.0f / 256.0f;
const float w3 = 8.0f  / 256.0f;
const float w4 = 1.0f  / 256.0f;

//step
// const float w0 = 1.0f / 9.0f;
// const float w1 = 1.0f / 9.0f;
// const float w2 = 1.0f / 9.0f;
// const float w3 = 1.0f / 9.0f;
// const float w4 = 1.0f / 9.0f;

const vec4 ofs_px = vec4( 1, 2, 3, 4 );

//TODO: use bilin filtering
//TODO: use tex-gather
void main()
{
	vec2 mp = blurdir.xy * tex0siz.zw;
	vec4 ofs01_nm = ofs_px.xxyy * mp.xyxy;
	vec4 ofs23_nm = ofs_px.zzww * mp.xyxy;

	vec2 tc0 = texcoord0_nm;

	vec4 colsum = vec4( 0.0f );
	colsum += w3 * texture( tex0, tc0 - ofs23_nm.xy );
	colsum += w2 * texture( tex0, tc0 - ofs01_nm.zw );
	colsum += w4 * texture( tex0, tc0 - ofs23_nm.zw );
	colsum += w1 * texture( tex0, tc0 - ofs01_nm.xy );
	colsum += w0 * texture( tex0, tc0 );
	colsum += w1 * texture( tex0, tc0 + ofs01_nm.xy );
	colsum += w2 * texture( tex0, tc0 + ofs01_nm.zw );
	colsum += w3 * texture( tex0, tc0 + ofs23_nm.xy );
	colsum += w4 * texture( tex0, tc0 + ofs23_nm.zw );

	outcol = colsum;
}
