Well well well.
Timothy Lottes has been busy working on FXAA some more, and recently released FXAA v3.9. It it purported to be a bit faster, and with better handling of some edge cases. Please try this shader, and see if you can spot any positive difference between this and the one built into the engine.
To use this, save it as fxaa-f.sdr in FS2/data/effects (or, if you have a previous version of this shader, overwrite it with this one).
#extension GL_EXT_gpu_shader4 : enable
/*==========================================================================*/
#define FXAA_EARLY_EXIT 1
#define FXAA_DISCARD 1
#define FXAA_LINEAR 0
/*--------------------------------------------------------------------------*/
#ifndef FXAA_FAST_PIXEL_OFFSET
#ifdef GL_EXT_gpu_shader4
#define FXAA_FAST_PIXEL_OFFSET 1
#endif
#ifdef GL_NV_gpu_shader5
#define FXAA_FAST_PIXEL_OFFSET 1
#endif
#ifdef GL_ARB_gpu_shader5
#define FXAA_FAST_PIXEL_OFFSET 1
#endif
#ifndef FXAA_FAST_PIXEL_OFFSET
#define FXAA_FAST_PIXEL_OFFSET 0
#endif
#endif
#ifndef FXAA_GATHER4_ALPHA
//
// 1 = API supports gather4 on alpha channel.
// 0 = API does not support gather4 on alpha channel.
//
#if (FXAA_HLSL_5 == 1)
#define FXAA_GATHER4_ALPHA 1
#endif
#ifdef GL_ARB_gpu_shader5
#define FXAA_GATHER4_ALPHA 1
#endif
#ifdef GL_NV_gpu_shader5
#define FXAA_GATHER4_ALPHA 1
#endif
#ifndef FXAA_GATHER4_ALPHA
#define FXAA_GATHER4_ALPHA 0
#endif
#endif
/*--------------------------------------------------------------------------*/
/*============================================================================
TUNING KNOBS
============================================================================*/
float EDGE_SHARPNESS = 0.0;
float EDGE_THRESHOLD = 0.0;
float EDGE_THRESHOLD_MIN = 0.0;
float SUBPIX_CAP = 0.0;
float SUBPIX_TRIM = 0.0;
void fxaa_choose_preset(int preset) {
if (preset == 0) {
EDGE_SHARPNESS = 8.0;
EDGE_THRESHOLD = 1.0/8.0;
EDGE_THRESHOLD_MIN = 0.05;
} else if (preset == 1) {
EDGE_SHARPNESS = 7.0;
EDGE_THRESHOLD = 1.0/7.0;
EDGE_THRESHOLD_MIN = 0.04;
} else if (preset == 2) {
EDGE_SHARPNESS = 6.0;
EDGE_THRESHOLD = 1.0/6.0;
EDGE_THRESHOLD_MIN = 0.03;
} else if (preset == 3) {
EDGE_SHARPNESS = 5.0;
EDGE_THRESHOLD = 1.0/5.0;
EDGE_THRESHOLD_MIN = 0.02;
} else if (preset == 4) {
EDGE_SHARPNESS = 4.0;
EDGE_THRESHOLD = 1.0/4.0;
EDGE_THRESHOLD_MIN = 0.01;
} else if (preset == 5) {
EDGE_THRESHOLD = 1.0/3.0;
EDGE_THRESHOLD_MIN = 1.0/16.0;
SUBPIX_CAP = 3.0/4.0;
} else if (preset == 6) {
EDGE_THRESHOLD = 1.0/5.0;
EDGE_THRESHOLD_MIN = 1.0/20.0;
SUBPIX_CAP = 4.0/5.0;
} else if (preset == 7) {
EDGE_THRESHOLD = 1.0/7.0;
EDGE_THRESHOLD_MIN = 1.0/24.0;
SUBPIX_CAP = 5.0/6.0;
} else if (preset == 8) {
EDGE_THRESHOLD = 1.0/10.0;
EDGE_THRESHOLD_MIN = 1.0/28.0;
SUBPIX_CAP = 7.0/8.0;
} else {
EDGE_THRESHOLD = 1.0/12.0;
EDGE_THRESHOLD_MIN = 1.0/32.0;
SUBPIX_CAP = 1.0;
}
}
/*============================================================================
API PORTING
============================================================================*/
//Choose API
#if SHADER_MODEL == 2
#define FXAA_GLSL_120 1
#define FXAA_GLSL_130 0
#endif
#if SHADER_MODEL > 2
#define FXAA_GLSL_130 1
#define FXAA_GLSL_120 0
#endif
#if FXAA_GLSL_120
// Requires,
// #version 120
// And at least,
// #extension GL_EXT_gpu_shader4 : enable
// (or set FXAA_FAST_PIXEL_OFFSET 1 to work like DX9)
#define half float
#define half2 vec2
#define half3 vec3
#define half4 vec4
#define int2 ivec2
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define FxaaInt2 ivec2
#define FxaaFloat2 vec2
#define FxaaFloat3 vec3
#define FxaaFloat4 vec4
#define FxaaDiscard discard
#define FxaaDot3(a, b) dot(a, b)
#define FxaaSat(x) clamp(x, 0.0, 1.0)
#define FxaaLerp(x,y,s) mix(x,y,s)
#define FxaaTex sampler2D
#define FxaaTexTop(t, p) texture2DLod(t, p, 0.0)
#if (FXAA_FAST_PIXEL_OFFSET == 1)
#define FxaaTexOff(t, p, o, r) texture2DLodOffset(t, p, 0.0, o)
#else
#define FxaaTexOff(t, p, o, r) texture2DLod(t, p + (o * r), 0.0)
#endif
#if (FXAA_GATHER4_ALPHA == 1)
// use #extension GL_ARB_gpu_shader5 : enable
#define FxaaTexAlpha4(t, p, r) textureGather(t, p, 3)
#define FxaaTexOffAlpha4(t, p, o, r) textureGatherOffset(t, p, o, 3)
#endif
#endif
/*--------------------------------------------------------------------------*/
#if FXAA_GLSL_130
// Requires "#version 130" or better
#define half float
#define half2 vec2
#define half3 vec3
#define half4 vec4
#define int2 ivec2
#define float2 vec2
#define float3 vec3
#define float4 vec4
#define FxaaInt2 ivec2
#define FxaaFloat2 vec2
#define FxaaFloat3 vec3
#define FxaaFloat4 vec4
#define FxaaDiscard discard
#define FxaaDot3(a, b) dot(a, b)
#define FxaaSat(x) clamp(x, 0.0, 1.0)
#define FxaaLerp(x,y,s) mix(x,y,s)
#define FxaaTex sampler2D
#define FxaaTexTop(t, p) textureLod(t, p, 0.0)
#define FxaaTexOff(t, p, o, r) textureLodOffset(t, p, 0.0, o)
#if (FXAA_GATHER4_ALPHA == 1)
// use #extension GL_ARB_gpu_shader5 : enable
#define FxaaTexAlpha4(t, p, r) textureGather(t, p, 3)
#define FxaaTexOffAlpha4(t, p, o, r) textureGatherOffset(t, p, o, 3)
#endif
#endif
float getLuma(vec4 color) {
return dot(color.rgb, float3(0.299, 0.587, 0.114)); // compute luma
}
/*============================================================================
FXAA3 CONSOLE - PC PIXEL SHADER
------------------------------------------------------------------------------
Using a modified version of the PS3 version here to best target old hardware.
============================================================================*/
/*--------------------------------------------------------------------------*/
half4 FxaaPixelShader(
// {xy} = center of pixel
float2 pos,
// {xy__} = upper left of pixel
// {__zw} = lower right of pixel
float4 posPos,
// {rgb_} = color in linear or perceptual color space
// {___a} = alpha output is junk value
FxaaTex tex,
// This must be from a constant/uniform.
// {xy} = rcpFrame not used on PC version of FXAA Console
float2 rcpFrame,
// This must be from a constant/uniform.
// {x___} = 2.0/screenWidthInPixels
// {_y__} = 2.0/screenHeightInPixels
// {__z_} = 0.5/screenWidthInPixels
// {___w} = 0.5/screenHeightInPixels
float4 rcpFrameOpt
) {
/*--------------------------------------------------------------------------*/
half4 dir;
dir.y = 0.0;
half4 lumaNe = FxaaTexTop(tex, posPos.zy);
lumaNe.w += half(1.0/384.0);
dir.x = -lumaNe.w;
dir.z = -lumaNe.w;
/*--------------------------------------------------------------------------*/
half4 lumaSw = FxaaTexTop(tex, posPos.xw);
dir.x += lumaSw.w;
dir.z += lumaSw.w;
/*--------------------------------------------------------------------------*/
half4 lumaNw = FxaaTexTop(tex, posPos.xy);
dir.x -= lumaNw.w;
dir.z += lumaNw.w;
/*--------------------------------------------------------------------------*/
half4 lumaSe = FxaaTexTop(tex, posPos.zw);
dir.x += lumaSe.w;
dir.z -= lumaSe.w;
/*==========================================================================*/
#if (FXAA_EARLY_EXIT == 1)
half4 rgbyM = FxaaTexTop(tex, pos.xy);
/*--------------------------------------------------------------------------*/
half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w));
half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w));
/*--------------------------------------------------------------------------*/
half lumaMinM = min(lumaMin, rgbyM.w);
half lumaMaxM = max(lumaMax, rgbyM.w);
/*--------------------------------------------------------------------------*/
if((lumaMaxM - lumaMinM) < max(EDGE_THRESHOLD_MIN, lumaMax * EDGE_THRESHOLD))
#if (FXAA_DISCARD == 1)
FxaaDiscard;
#else
return rgbyM;
#endif
#endif
/*==========================================================================*/
half4 dir1_pos;
dir1_pos.xy = normalize(dir.xyz).xz;
half dirAbsMinTimesC = min(abs(dir1_pos.x), abs(dir1_pos.y)) * half(EDGE_SHARPNESS);
/*--------------------------------------------------------------------------*/
half4 dir2_pos;
dir2_pos.xy = clamp(dir1_pos.xy / dirAbsMinTimesC, half(-2.0), half(2.0));
dir1_pos.zw = pos.xy;
dir2_pos.zw = pos.xy;
half4 temp1N;
temp1N.xy = dir1_pos.zw - dir1_pos.xy * rcpFrameOpt.zw;
/*--------------------------------------------------------------------------*/
temp1N = FxaaTexTop(tex, temp1N.xy);
half4 rgby1;
rgby1.xy = dir1_pos.zw + dir1_pos.xy * rcpFrameOpt.zw;
/*--------------------------------------------------------------------------*/
rgby1 = FxaaTexTop(tex, rgby1.xy);
rgby1 = (temp1N + rgby1) * 0.5;
/*--------------------------------------------------------------------------*/
half4 temp2N;
temp2N.xy = dir2_pos.zw - dir2_pos.xy * rcpFrameOpt.xy;
temp2N = FxaaTexTop(tex, temp2N.xy);
/*--------------------------------------------------------------------------*/
half4 rgby2;
rgby2.xy = dir2_pos.zw + dir2_pos.xy * rcpFrameOpt.xy;
rgby2 = FxaaTexTop(tex, rgby2.xy);
rgby2 = (temp2N + rgby2) * 0.5;
/*--------------------------------------------------------------------------*/
#if (FXAA_EARLY_EXIT == 0)
half lumaMin = min(min(lumaNw.w, lumaSw.w), min(lumaNe.w, lumaSe.w));
half lumaMax = max(max(lumaNw.w, lumaSw.w), max(lumaNe.w, lumaSe.w));
#endif
rgby2 = (rgby2 + rgby1) * 0.5;
/*--------------------------------------------------------------------------*/
bool twoTapLt = rgby2.w < lumaMin;
bool twoTapGt = rgby2.w > lumaMax;
/*--------------------------------------------------------------------------*/
if(twoTapLt || twoTapGt) rgby2 = rgby1;
/*--------------------------------------------------------------------------*/
return rgby2; }
/*==========================================================================*/
float4 FxaaPixelShaderPC(
// {xy} = center of pixel
float2 pos,
// {xyzw} = not used on FXAA3 Quality
float4 posPos,
// {rgb_} = color in linear or perceptual color space
// {___a} = luma in perceptual color space (not linear)
FxaaTex tex,
// This must be from a constant/uniform.
// {x_} = 1.0/screenWidthInPixels
// {_y} = 1.0/screenHeightInPixels
float2 rcpFrame,
// {xyzw} = not used on FXAA3 Quality
float4 rcpFrameOpt
) {
/*--------------------------------------------------------------------------*/
float2 posM;
posM.x = pos.x;
posM.y = pos.y;
#if (FXAA_GATHER4_ALPHA == 1)
#if (FXAA_DISCARD == 0)
float4 rgbyM = FxaaTexTop(tex, posM);
#define lumaM rgbyM.w
#endif
float4 luma4A = FxaaTexAlpha4(tex, posM, rcpFrame.xy);
float4 luma4B = FxaaTexOffAlpha4(tex, posM, FxaaInt2(-1, -1), rcpFrame.xy);
#if (FXAA_DISCARD == 1)
#define lumaM luma4A.w
#endif
#define lumaE luma4A.z
#define lumaS luma4A.x
#define lumaSE luma4A.y
#define lumaNW luma4B.w
#define lumaN luma4B.z
#define lumaW luma4B.x
#else
float4 rgbyM = FxaaTexTop(tex, posM);
#define lumaM rgbyM.w
float lumaS = FxaaTexOff(tex, posM, FxaaInt2( 0, 1), rcpFrame.xy).w;
float lumaE = FxaaTexOff(tex, posM, FxaaInt2( 1, 0), rcpFrame.xy).w;
float lumaN = FxaaTexOff(tex, posM, FxaaInt2( 0,-1), rcpFrame.xy).w;
float lumaW = FxaaTexOff(tex, posM, FxaaInt2(-1, 0), rcpFrame.xy).w;
#endif
/*--------------------------------------------------------------------------*/
float maxSM = max(lumaS, lumaM);
float minSM = min(lumaS, lumaM);
float maxESM = max(lumaE, maxSM);
float minESM = min(lumaE, minSM);
float maxWN = max(lumaN, lumaW);
float minWN = min(lumaN, lumaW);
float rangeMax = max(maxWN, maxESM);
float rangeMin = min(minWN, minESM);
float rangeMaxScaled = rangeMax * EDGE_THRESHOLD;
float range = rangeMax - rangeMin;
float rangeMaxClamped = max(EDGE_THRESHOLD_MIN, rangeMaxScaled);
bool earlyExit = range < rangeMaxClamped;
/*--------------------------------------------------------------------------*/
if(earlyExit)
#if (FXAA_DISCARD == 1)
FxaaDiscard;
#else
return rgbyM;
#endif
/*--------------------------------------------------------------------------*/
#if (FXAA_GATHER4_ALPHA == 0)
float lumaNW = FxaaTexOff(tex, posM, FxaaInt2(-1,-1), rcpFrame.xy).w;
float lumaSE = FxaaTexOff(tex, posM, FxaaInt2( 1, 1), rcpFrame.xy).w;
float lumaNE = FxaaTexOff(tex, posM, FxaaInt2( 1,-1), rcpFrame.xy).w;
float lumaSW = FxaaTexOff(tex, posM, FxaaInt2(-1, 1), rcpFrame.xy).w;
#else
float lumaNE = FxaaTexOff(tex, posM, FxaaInt2(1, -1), rcpFrame.xy).w;
float lumaSW = FxaaTexOff(tex, posM, FxaaInt2(-1, 1), rcpFrame.xy).w;
#endif
/*--------------------------------------------------------------------------*/
float lumaNS = lumaN + lumaS;
float lumaWE = lumaW + lumaE;
float subpixRcpRange = 1.0/range;
float subpixNSWE = lumaNS + lumaWE;
float edgeHorz1 = (-2.0 * lumaM) + lumaNS;
float edgeVert1 = (-2.0 * lumaM) + lumaWE;
/*--------------------------------------------------------------------------*/
float lumaNESE = lumaNE + lumaSE;
float lumaNWNE = lumaNW + lumaNE;
float edgeHorz2 = (-2.0 * lumaE) + lumaNESE;
float edgeVert2 = (-2.0 * lumaN) + lumaNWNE;
/*--------------------------------------------------------------------------*/
float lumaNWSW = lumaNW + lumaSW;
float lumaSWSE = lumaSW + lumaSE;
float edgeHorz4 = (abs(edgeHorz1) * 2.0) + abs(edgeHorz2);
float edgeVert4 = (abs(edgeVert1) * 2.0) + abs(edgeVert2);
float edgeHorz3 = (-2.0 * lumaW) + lumaNWSW;
float edgeVert3 = (-2.0 * lumaS) + lumaSWSE;
float edgeHorz = abs(edgeHorz3) + edgeHorz4;
float edgeVert = abs(edgeVert3) + edgeVert4;
/*--------------------------------------------------------------------------*/
float subpixNWSWNESE = lumaNWSW + lumaNESE;
float lengthSign = rcpFrame.x;
bool horzSpan = edgeHorz >= edgeVert;
float subpixA = subpixNSWE * 2.0 + subpixNWSWNESE;
/*--------------------------------------------------------------------------*/
if(!horzSpan) lumaN = lumaW;
if(!horzSpan) lumaS = lumaE;
if(horzSpan) lengthSign = rcpFrame.y;
float subpixB = (subpixA * (1.0/12.0)) - lumaM;
/*--------------------------------------------------------------------------*/
float gradientN = lumaN - lumaM;
float gradientS = lumaS - lumaM;
float lumaNN = lumaN + lumaM;
float lumaSS = lumaS + lumaM;
bool pairN = abs(gradientN) >= abs(gradientS);
float gradient = max(abs(gradientN), abs(gradientS));
if(pairN) lengthSign = -lengthSign;
float subpixC = FxaaSat(abs(subpixB) * subpixRcpRange);
/*--------------------------------------------------------------------------*/
float2 posB;
posB.x = posM.x;
posB.y = posM.y;
float2 offNP;
offNP.x = (!horzSpan) ? 0.0 : rcpFrame.x;
offNP.y = ( horzSpan) ? 0.0 : rcpFrame.y;
if(!horzSpan) posB.x += lengthSign * 0.5;
if( horzSpan) posB.y += lengthSign * 0.5;
/*--------------------------------------------------------------------------*/
float2 posN;
posN.x = posB.x - offNP.x;
posN.y = posB.y - offNP.y;
float2 posP;
posP.x = posB.x + offNP.x;
posP.y = posB.y + offNP.y;
float subpixD = ((-2.0)*subpixC) + 3.0;
float lumaEndN = FxaaTexTop(tex, posN).w;
float subpixE = subpixC * subpixC;
float lumaEndP = FxaaTexTop(tex, posP).w;
/*--------------------------------------------------------------------------*/
if(!pairN) lumaNN = lumaSS;
float gradientScaled = gradient * 1.0/4.0;
float lumaMM = lumaM - lumaNN * 0.5;
float subpixF = subpixD * subpixE;
bool lumaMLTZero = lumaMM < 0.0;
/*--------------------------------------------------------------------------*/
lumaEndN -= lumaNN * 0.5;
lumaEndP -= lumaNN * 0.5;
bool doneN = abs(lumaEndN) >= gradientScaled;
bool doneP = abs(lumaEndP) >= gradientScaled;
if(!doneN) posN.x -= offNP.x * 1.5;
if(!doneN) posN.y -= offNP.y * 1.5;
bool doneNP = (!doneN) || (!doneP);
if(!doneP) posP.x += offNP.x * 1.5;
if(!doneP) posP.y += offNP.y * 1.5;
if(doneNP) {
/*--------------------------------------------------------------------------*/
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
doneN = abs(lumaEndN) >= gradientScaled;
doneP = abs(lumaEndP) >= gradientScaled;
if(!doneN) posN.x -= offNP.x * 2.0;
if(!doneN) posN.y -= offNP.y * 2.0;
doneNP = (!doneN) || (!doneP);
if(!doneP) posP.x += offNP.x * 2.0;
if(!doneP) posP.y += offNP.y * 2.0;
if(doneNP) {
/*--------------------------------------------------------------------------*/
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
doneN = abs(lumaEndN) >= gradientScaled;
doneP = abs(lumaEndP) >= gradientScaled;
if(!doneN) posN.x -= offNP.x * 2.0;
if(!doneN) posN.y -= offNP.y * 2.0;
doneNP = (!doneN) || (!doneP);
if(!doneP) posP.x += offNP.x * 2.0;
if(!doneP) posP.y += offNP.y * 2.0;
if(doneNP) {
/*--------------------------------------------------------------------------*/
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
doneN = abs(lumaEndN) >= gradientScaled;
doneP = abs(lumaEndP) >= gradientScaled;
if(!doneN) posN.x -= offNP.x * 4.0;
if(!doneN) posN.y -= offNP.y * 4.0;
doneNP = (!doneN) || (!doneP);
if(!doneP) posP.x += offNP.x * 4.0;
if(!doneP) posP.y += offNP.y * 4.0;
if(doneNP) {
/*--------------------------------------------------------------------------*/
if(!doneN) lumaEndN = FxaaTexTop(tex, posN.xy).w;
if(!doneP) lumaEndP = FxaaTexTop(tex, posP.xy).w;
if(!doneN) lumaEndN = lumaEndN - lumaNN * 0.5;
if(!doneP) lumaEndP = lumaEndP - lumaNN * 0.5;
doneN = abs(lumaEndN) >= gradientScaled;
doneP = abs(lumaEndP) >= gradientScaled;
if(!doneN) posN.x -= offNP.x * 2.0;
if(!doneN) posN.y -= offNP.y * 2.0;
if(!doneP) posP.x += offNP.x * 2.0;
if(!doneP) posP.y += offNP.y * 2.0; } } } }
/*--------------------------------------------------------------------------*/
float dstN = posM.x - posN.x;
float dstP = posP.x - posM.x;
if(!horzSpan) dstN = posM.y - posN.y;
if(!horzSpan) dstP = posP.y - posM.y;
/*--------------------------------------------------------------------------*/
bool goodSpanN = (lumaEndN < 0.0) != lumaMLTZero;
float spanLength = (dstP + dstN);
bool goodSpanP = (lumaEndP < 0.0) != lumaMLTZero;
float spanLengthRcp = 1.0/spanLength;
/*--------------------------------------------------------------------------*/
bool directionN = dstN < dstP;
float dst = min(dstN, dstP);
bool goodSpan = directionN ? goodSpanN : goodSpanP;
float subpixG = subpixF * subpixF;
float pixelOffset = (dst * (-spanLengthRcp)) + 0.5;
float subpixH = subpixG * SUBPIX_CAP;
/*--------------------------------------------------------------------------*/
float pixelOffsetGood = goodSpan ? pixelOffset : 0.0;
float pixelOffsetSubpix = max(pixelOffsetGood, subpixH);
if(!horzSpan) posM.x += pixelOffsetSubpix * lengthSign;
if( horzSpan) posM.y += pixelOffsetSubpix * lengthSign;
return FxaaTexTop(tex, posM); }
/*==========================================================================*/
uniform sampler2D tex0;
uniform int fxaa_preset;
varying vec2 v_rcpFrame;
varying vec4 v_rcpFrameOpt;
varying vec2 v_pos;
varying vec4 v_posPos;
void main() {
fxaa_choose_preset(fxaa_preset);
if (fxaa_preset < 4)
gl_FragColor = FxaaPixelShader(v_pos, v_posPos, tex0, v_rcpFrame, v_rcpFrameOpt);
else
gl_FragColor = FxaaPixelShaderPC(v_pos, v_posPos, tex0, v_rcpFrame, v_rcpFrameOpt);
}