#define FXAA_PRESET 5

// Copyright (c) 2010 NVIDIA Corporation. All rights reserved.
//
// TO  THE MAXIMUM  EXTENT PERMITTED  BY APPLICABLE  LAW, THIS SOFTWARE  IS PROVIDED
// *AS IS*  AND NVIDIA AND  ITS SUPPLIERS DISCLAIM  ALL WARRANTIES,  EITHER  EXPRESS
// OR IMPLIED, INCLUDING, BUT NOT LIMITED  TO, IMPLIED WARRANTIES OF MERCHANTABILITY
// AND FITNESS FOR A PARTICULAR PURPOSE.  IN NO EVENT SHALL  NVIDIA OR ITS SUPPLIERS
// BE  LIABLE  FOR  ANY  SPECIAL,  INCIDENTAL,  INDIRECT,  OR  CONSEQUENTIAL DAMAGES
// WHATSOEVER (INCLUDING, WITHOUT LIMITATION,  DAMAGES FOR LOSS OF BUSINESS PROFITS,
// BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR ANY OTHER PECUNIARY LOSS)
// ARISING OUT OF THE  USE OF OR INABILITY  TO USE THIS SOFTWARE, EVEN IF NVIDIA HAS
// BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

/*============================================================================
                                 SRGB KNOBS
------------------------------------------------------------------------------
FXAA_SRGB_ROP - Set to 1 when applying FXAA to an sRGB back buffer (DX10/11).
                This will do the sRGB to linear transform, 
                as ROP will expect linear color from this shader,
                and this shader works in non-linear color.
============================================================================*/
#define FXAA_SRGB_ROP 0

#version 120
#extension GL_EXT_gpu_shader4 : enable
#define Bool2Float(a) mix(0.0, 1.0, (a))
#define mix(f, t, b) mix((f), (t), (b))
#define sampler2D sampler2D


vec4 sampler2DLod0(sampler2D tex, vec2 pos)
{
	return texture2DLod(tex, pos.xy, 0.0);
}

vec4 sampler2DGrad(sampler2D tex, vec2 pos, vec2 grad)
{
	return texture2DGrad(tex, pos.xy, grad, grad);
}

vec4 sampler2DOff(sampler2D tex, vec2 pos, ivec2 off, vec2 rcpFrame)
{
	return texture2DLodOffset(tex, pos.xy, 0.0, off.xy);
}



/*============================================================================
                              COMPILE-IN KNOBS
------------------------------------------------------------------------------
FXAA_PRESET - Choose compile-in knob preset 0-5.
------------------------------------------------------------------------------
FXAA_EDGE_THRESHOLD - The minimum amount of local contrast required 
                      to apply algorithm.
                      1.0/3.0  - too little
                      1.0/4.0  - good start
                      1.0/8.0  - applies to more edges
                      1.0/16.0 - overkill
------------------------------------------------------------------------------
FXAA_EDGE_THRESHOLD_MIN - Trims the algorithm from processing darks.
                          Perf optimization.
                          1.0/32.0 - visible limit (smaller isn't visible)
                          1.0/16.0 - good compromise
                          1.0/12.0 - upper limit (seeing artifacts)
------------------------------------------------------------------------------
FXAA_SEARCH_STEPS - Maximum number of search steps for end of span.
------------------------------------------------------------------------------
FXAA_SEARCH_ACCELERATION - How much to accelerate search,
                           1 - no acceleration
                           2 - skip by 2 pixels
                           3 - skip by 3 pixels
                           4 - skip by 4 pixels
------------------------------------------------------------------------------
FXAA_SEARCH_THRESHOLD - Controls when to stop searching.
                        1.0/4.0 - seems to be the best quality wise
------------------------------------------------------------------------------
FXAA_SUBPIX_FASTER - Turn on lower quality but faster subpix path.
                     Not recomended, but used in preset 0.
------------------------------------------------------------------------------
FXAA_SUBPIX - Toggle subpix filtering.
              0 - turn off
              1 - turn on
              2 - turn on full (ignores FXAA_SUBPIX_TRIM and CAP)
------------------------------------------------------------------------------
FXAA_SUBPIX_TRIM - Controls sub-pixel aliasing removal.
                   1.0/2.0 - low removal
                   1.0/3.0 - medium removal
                   1.0/4.0 - default removal
                   1.0/8.0 - high removal
                   0.0 - complete removal
------------------------------------------------------------------------------
FXAA_SUBPIX_CAP - Insures fine detail is not completely removed.
                  This is important for the transition of sub-pixel detail,
                  like fences and wires.
                  3.0/4.0 - default (medium amount of filtering)
                  7.0/8.0 - high amount of filtering
                  1.0 - no capping of sub-pixel aliasing removal
============================================================================*/

/*--------------------------------------------------------------------------*/
#if (FXAA_PRESET == 0)
    #define FXAA_EDGE_THRESHOLD      (1.0/4.0)
    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/12.0)
    #define FXAA_SEARCH_STEPS        2
    #define FXAA_SEARCH_ACCELERATION 4
    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
    #define FXAA_SUBPIX              1
    #define FXAA_SUBPIX_FASTER       1
    #define FXAA_SUBPIX_CAP          (2.0/3.0)
    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
#endif
/*--------------------------------------------------------------------------*/
#if (FXAA_PRESET == 1)
    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/16.0)
    #define FXAA_SEARCH_STEPS        4
    #define FXAA_SEARCH_ACCELERATION 3
    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
    #define FXAA_SUBPIX              1
    #define FXAA_SUBPIX_FASTER       0
    #define FXAA_SUBPIX_CAP          (3.0/4.0)
    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
#endif
/*--------------------------------------------------------------------------*/
#if (FXAA_PRESET == 2)
    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/24.0)
    #define FXAA_SEARCH_STEPS        8
    #define FXAA_SEARCH_ACCELERATION 2
    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
    #define FXAA_SUBPIX              1
    #define FXAA_SUBPIX_FASTER       0
    #define FXAA_SUBPIX_CAP          (3.0/4.0)
    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
#endif
/*--------------------------------------------------------------------------*/
#if (FXAA_PRESET == 3)
    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/24.0)
    #define FXAA_SEARCH_STEPS        16
    #define FXAA_SEARCH_ACCELERATION 1
    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
    #define FXAA_SUBPIX              1
    #define FXAA_SUBPIX_FASTER       0
    #define FXAA_SUBPIX_CAP          (3.0/4.0)
    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
#endif
/*--------------------------------------------------------------------------*/
#if (FXAA_PRESET == 4)
    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/24.0)
    #define FXAA_SEARCH_STEPS        24
    #define FXAA_SEARCH_ACCELERATION 1
    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
    #define FXAA_SUBPIX              1
    #define FXAA_SUBPIX_FASTER       0
    #define FXAA_SUBPIX_CAP          (3.0/4.0)
    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
#endif
/*--------------------------------------------------------------------------*/
#if (FXAA_PRESET == 5)
    #define FXAA_EDGE_THRESHOLD      (1.0/8.0)
    #define FXAA_EDGE_THRESHOLD_MIN  (1.0/24.0)
    #define FXAA_SEARCH_STEPS        32
    #define FXAA_SEARCH_ACCELERATION 1
    #define FXAA_SEARCH_THRESHOLD    (1.0/4.0)
    #define FXAA_SUBPIX              1
    #define FXAA_SUBPIX_FASTER       0
    #define FXAA_SUBPIX_CAP          (3.0/4.0)
    #define FXAA_SUBPIX_TRIM         (1.0/4.0)
#endif
/*--------------------------------------------------------------------------*/
#define FXAA_SUBPIX_TRIM_SCALE (1.0/(1.0 - FXAA_SUBPIX_TRIM))

float FxaaLuma(vec3 rgb)
{
	return rgb.y * (0.587/0.299) + rgb.x;
}

/*--------------------------------------------------------------------------*/
vec3 FxaaLerp3(vec3 a, vec3 b, float amountOfA)
{
	return (vec3(-amountOfA) * b) + ((a * vec3(amountOfA)) + b);
}


vec3 FxaaFilterReturn(vec3 rgb)
{
    #if FXAA_SRGB_ROP
        // Do sRGB encoded value to linear conversion.
		
        return mix(rgb * vec3(1.0/12.92), 
            pow(rgb * vec3(1.0/1.055) + vec3(0.055/1.055), 
                vec3(2.4)),
            rgb > vec3(0.04045)); 
    #else
        return rgb;
    #endif
}
 


vec3 FxaaPixelShader(vec2 pos, sampler2D tex, vec2 rcpFrame)
{
	vec3 rgbN = sampler2DOff(tex, pos.xy, ivec2( 0,-1), rcpFrame).xyz;
	vec3 rgbW = sampler2DOff(tex, pos.xy, ivec2(-1, 0), rcpFrame).xyz;
	vec3 rgbM = sampler2DOff(tex, pos.xy, ivec2( 0, 0), rcpFrame).xyz;
	vec3 rgbE = sampler2DOff(tex, pos.xy, ivec2( 1, 0), rcpFrame).xyz;
	vec3 rgbS = sampler2DOff(tex, pos.xy, ivec2( 0, 1), rcpFrame).xyz;
	float lumaN = FxaaLuma(rgbN);
	float lumaW = FxaaLuma(rgbW);
	float lumaM = FxaaLuma(rgbM);
	float lumaE = FxaaLuma(rgbE);
	float lumaS = FxaaLuma(rgbS);
	float rangeMin = min(lumaM, min(min(lumaN, lumaW), min(lumaS, lumaE)));
	float rangeMax = max(lumaM, max(max(lumaN, lumaW), max(lumaS, lumaE)));
	float range = rangeMax - rangeMin;
	if(range < max(FXAA_EDGE_THRESHOLD_MIN, rangeMax * FXAA_EDGE_THRESHOLD)) {
		return FxaaFilterReturn(rgbM); }
	#if FXAA_SUBPIX > 0
		#if FXAA_SUBPIX_FASTER
			vec3 rgbL = (rgbN + rgbW + rgbE + rgbS + rgbM) * 
				vec3(1.0/5.0);
		#else
			vec3 rgbL = rgbN + rgbW + rgbM + rgbE + rgbS;
		#endif
	#endif        
    

	#if FXAA_SUBPIX != 0
		float lumaL = (lumaN + lumaW + lumaE + lumaS) * 0.25;
		float rangeL = abs(lumaL - lumaM);
	#endif        
	#if FXAA_SUBPIX == 1
		float blendL = max(0.0, 
			(rangeL / range) - FXAA_SUBPIX_TRIM) * FXAA_SUBPIX_TRIM_SCALE; 
		blendL = min(FXAA_SUBPIX_CAP, blendL);
	#endif
	#if FXAA_SUBPIX == 2
		float blendL = rangeL / range; 
	#endif
    

	vec3 rgbNW = sampler2DOff(tex, pos.xy, ivec2(-1,-1), rcpFrame).xyz;
	vec3 rgbNE = sampler2DOff(tex, pos.xy, ivec2( 1,-1), rcpFrame).xyz;
	vec3 rgbSW = sampler2DOff(tex, pos.xy, ivec2(-1, 1), rcpFrame).xyz;
	vec3 rgbSE = sampler2DOff(tex, pos.xy, ivec2( 1, 1), rcpFrame).xyz;
	#if (FXAA_SUBPIX_FASTER == 0) && (FXAA_SUBPIX > 0)
		rgbL += (rgbNW + rgbNE + rgbSW + rgbSE);
		rgbL *= vec3(1.0/9.0);
	#endif
	float lumaNW = FxaaLuma(rgbNW);
	float lumaNE = FxaaLuma(rgbNE);
	float lumaSW = FxaaLuma(rgbSW);
	float lumaSE = FxaaLuma(rgbSE);
	float edgeVert = 
		abs((0.25 * lumaNW) + (-0.5 * lumaN) + (0.25 * lumaNE)) +
		abs((0.50 * lumaW ) + (-1.0 * lumaM) + (0.50 * lumaE )) +
		abs((0.25 * lumaSW) + (-0.5 * lumaS) + (0.25 * lumaSE));
	float edgeHorz = 
		abs((0.25 * lumaNW) + (-0.5 * lumaW) + (0.25 * lumaSW)) +
		abs((0.50 * lumaN ) + (-1.0 * lumaM) + (0.50 * lumaS )) +
		abs((0.25 * lumaNE) + (-0.5 * lumaE) + (0.25 * lumaSE));
	bool horzSpan = edgeHorz >= edgeVert;
	float lengthSign = horzSpan ? -rcpFrame.y : -rcpFrame.x;
	if(!horzSpan) lumaN = lumaW;
	if(!horzSpan) lumaS = lumaE;
	float gradientN = abs(lumaN - lumaM);
	float gradientS = abs(lumaS - lumaM);
	lumaN = (lumaN + lumaM) * 0.5;
	lumaS = (lumaS + lumaM) * 0.5;
    

	bool pairN = gradientN >= gradientS;
	if(!pairN) lumaN = lumaS;
	if(!pairN) gradientN = gradientS;
	if(!pairN) lengthSign *= -1.0;
	vec2 posN;
	posN.x = pos.x + (horzSpan ? 0.0 : lengthSign * 0.5);
	posN.y = pos.y + (horzSpan ? lengthSign * 0.5 : 0.0);

	gradientN *= FXAA_SEARCH_THRESHOLD;

	vec2 posP = posN;
	vec2 offNP = horzSpan ? 
		vec2(rcpFrame.x, 0.0) :
		vec2(0.0f, rcpFrame.y); 
	float lumaEndN = lumaN;
	float lumaEndP = lumaN;
	bool doneN = false;
	bool doneP = false;
	#if FXAA_SEARCH_ACCELERATION == 1
		posN += offNP * vec2(-1.0, -1.0);
		posP += offNP * vec2( 1.0,  1.0);
	#endif
	#if FXAA_SEARCH_ACCELERATION == 2
		posN += offNP * vec2(-1.5, -1.5);
		posP += offNP * vec2( 1.5,  1.5);
		offNP *= vec2(2.0, 2.0);
	#endif
	#if FXAA_SEARCH_ACCELERATION == 3
		posN += offNP * vec2(-2.0, -2.0);
		posP += offNP * vec2( 2.0,  2.0);
		offNP *= vec2(3.0, 3.0);
	#endif
	#if FXAA_SEARCH_ACCELERATION == 4
		posN += offNP * vec2(-2.5, -2.5);
		posP += offNP * vec2( 2.5,  2.5);
		offNP *= vec2(4.0, 4.0);
	#endif
	for(int i = 0; i < FXAA_SEARCH_STEPS; i++) {
		#if FXAA_SEARCH_ACCELERATION == 1
			if(!doneN) lumaEndN = 
				FxaaLuma(sampler2DLod0(tex, posN.xy).xyz);
			if(!doneP) lumaEndP = 
				FxaaLuma(sampler2DLod0(tex, posP.xy).xyz);
		#else
			if(!doneN) lumaEndN = 
				FxaaLuma(sampler2DGrad(tex, posN.xy, offNP).xyz);
			if(!doneP) lumaEndP = 
				FxaaLuma(sampler2DGrad(tex, posP.xy, offNP).xyz);
		#endif
		doneN = doneN || (abs(lumaEndN - lumaN) >= gradientN);
		doneP = doneP || (abs(lumaEndP - lumaN) >= gradientN);
		if(doneN && doneP) break;
		if(!doneN) posN -= offNP;
		if(!doneP) posP += offNP; }

	float dstN = horzSpan ? pos.x - posN.x : pos.y - posN.y;
	float dstP = horzSpan ? posP.x - pos.x : posP.y - pos.y;
	bool directionN = dstN < dstP;
	lumaEndN = directionN ? lumaEndN : lumaEndP;

	if(((lumaM - lumaN) < 0.0) == ((lumaEndN - lumaN) < 0.0)) 

		lengthSign = 0.0;
 
	float spanLength = (dstP + dstN);
	dstN = directionN ? dstN : dstP;
	float subPixelOffset = (0.5 + (dstN * (-1.0/spanLength))) * lengthSign;
	vec3 rgbF = sampler2DLod0(tex, vec2(
		pos.x + (horzSpan ? 0.0 : subPixelOffset),
		pos.y + (horzSpan ? subPixelOffset : 0.0))).xyz;
	#if FXAA_SUBPIX == 0
		return FxaaFilterReturn(rgbF); 
	#else        
		return FxaaFilterReturn(FxaaLerp3(rgbL, rgbF, blendL)); 
	#endif
}



uniform sampler2D _MainTex;
vec2 _MainTex_TexelSize;
uniform float w;
uniform float h;

void main()
{
	_MainTex_TexelSize = vec2(1.0/w, 1.0/h);
	gl_FragData[0] = vec4(FxaaPixelShader(gl_TexCoord[0].xy, _MainTex, _MainTex_TexelSize).xyz, 1.0);
}

