#version 320 es #define FORCE_EARLY_Z layout(early_fragment_tests) in #define ATTRIBUTE_LOCATION(x) #define FRAGMENT_OUTPUT_LOCATION(x) #define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) #define UBO_BINDING(packing, x) layout(packing, binding = x) #define SAMPLER_BINDING(x) layout(binding = x) #define TEXEL_BUFFER_BINDING(x) layout(binding = x) #define SSBO_BINDING(x) layout(binding = x) #define IMAGE_BINDING(format, x) layout(format, binding = x) #define VARYING_LOCATION(x) #extension GL_ANDROID_extension_pack_es31a : enable #extension GL_EXT_blend_func_extended : enable #extension GL_EXT_shader_framebuffer_fetch: enable #define FRAGMENT_INOUT inout precision highp float; precision highp int; precision highp sampler2DArray; precision highp usamplerBuffer; precision highp sampler2DMS; precision highp image2DArray; #define API_OPENGL 1 #define float2 vec2 #define float3 vec3 #define float4 vec4 #define uint2 uvec2 #define uint3 uvec3 #define uint4 uvec4 #define int2 ivec2 #define int3 ivec3 #define int4 ivec4 #define frac fract #define lerp mix // Vertex UberShader struct Light { int4 color; float4 cosatt; float4 distatt; float4 pos; float4 dir; }; UBO_BINDING(std140, 2) uniform VSBlock { uint components; uint xfmem_dualTexInfo; uint xfmem_numColorChans; uint missing_color_hex; float4 missing_color_value; float4 cpnmtx[6]; float4 cproj[4]; int4 cmtrl[4]; Light clights[8]; float4 ctexmtx[24]; float4 ctrmtx[64]; float4 cnmtx[32]; float4 cpostmtx[64]; float4 cpixelcenter; float2 cviewport; uint4 xfmem_pack1[8]; #define xfmem_texMtxInfo(i) (xfmem_pack1[(i)].x) #define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y) #define xfmem_color(i) (xfmem_pack1[(i)].z) #define xfmem_alpha(i) (xfmem_pack1[(i)].w) }; struct VS_OUTPUT { float4 pos; float4 colors_0; float4 colors_1; float clipDist0; float clipDist1; }; #define dolphin_isnan(f) isnan(f) int4 CalculateLighting(uint index, uint attnfunc, uint diffusefunc, float3 pos, float3 normal) { float3 ldir, h, cosAttn, distAttn; float dist, dist2, attn; switch (attnfunc) { case 0x0u /* No attenuation */: case 0x2u /* Directional light attenuation */: ldir = normalize(clights[index].pos.xyz - pos.xyz); attn = 1.0; if (length(ldir) == 0.0) ldir = normal; break; case 0x1u /* Point light attenuation */: ldir = normalize(clights[index].pos.xyz - pos.xyz); attn = (dot(normal, ldir) >= 0.0) ? max(0.0, dot(normal, clights[index].dir.xyz)) : 0.0; cosAttn = clights[index].cosatt.xyz; if (diffusefunc == 0x0u /* None */) distAttn = clights[index].distatt.xyz; else distAttn = normalize(clights[index].distatt.xyz); attn = max(0.0, dot(cosAttn, float3(1.0, attn, attn*attn))) / dot(distAttn, float3(1.0, attn, attn*attn)); break; case 0x3u /* Spot light attenuation */: ldir = clights[index].pos.xyz - pos.xyz; dist2 = dot(ldir, ldir); dist = sqrt(dist2); ldir = ldir / dist; attn = max(0.0, dot(ldir, clights[index].dir.xyz)); attn = max(0.0, clights[index].cosatt.x + clights[index].cosatt.y * attn + clights[index].cosatt.z * attn * attn) / dot(clights[index].distatt.xyz, float3(1.0, dist, dist2)); break; default: attn = 1.0; ldir = normal; break; } switch (diffusefunc) { case 0x0u /* None */: return int4(round(attn * float4(clights[index].color))); case 0x1u /* Sign */: return int4(round(attn * dot(ldir, normal) * float4(clights[index].color))); case 0x2u /* Clamp */: return int4(round(attn * max(0.0, dot(ldir, normal)) * float4(clights[index].color))); default: return int4(0, 0, 0, 0); } } ATTRIBUTE_LOCATION(0) in float4 rawpos; ATTRIBUTE_LOCATION(1) in uint4 posmtx; ATTRIBUTE_LOCATION(2) in float3 rawnorm0; ATTRIBUTE_LOCATION(3) in float3 rawnorm1; ATTRIBUTE_LOCATION(4) in float3 rawnorm2; ATTRIBUTE_LOCATION(5) in float4 rawcolor0; ATTRIBUTE_LOCATION(6) in float4 rawcolor1; ATTRIBUTE_LOCATION(8) in float3 rawtex0; ATTRIBUTE_LOCATION(9) in float3 rawtex1; ATTRIBUTE_LOCATION(10) in float3 rawtex2; ATTRIBUTE_LOCATION(11) in float3 rawtex3; ATTRIBUTE_LOCATION(12) in float3 rawtex4; ATTRIBUTE_LOCATION(13) in float3 rawtex5; ATTRIBUTE_LOCATION(14) in float3 rawtex6; ATTRIBUTE_LOCATION(15) in float3 rawtex7; VARYING_LOCATION(0) out VertexData { centroid float4 pos; centroid float4 colors_0; centroid float4 colors_1; centroid float clipDist0; centroid float clipDist1; } vs; void main() { VS_OUTPUT o; // Position matrix float4 P0; float4 P1; float4 P2; // Normal matrix float3 N0; float3 N1; float3 N2; if ((components & 2u) != 0u) {// VB_HAS_POSMTXIDX // Vertex format has a per-vertex matrix int posidx = int(posmtx.r); P0 = ctrmtx[posidx]; P1 = ctrmtx[posidx+1]; P2 = ctrmtx[posidx+2]; int normidx = posidx >= 32 ? (posidx - 32) : posidx; N0 = cnmtx[normidx].xyz; N1 = cnmtx[normidx+1].xyz; N2 = cnmtx[normidx+2].xyz; } else { // One shared matrix P0 = cpnmtx[0]; P1 = cpnmtx[1]; P2 = cpnmtx[2]; N0 = cpnmtx[3].xyz; N1 = cpnmtx[4].xyz; N2 = cpnmtx[5].xyz; } float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0); o.pos = float4(dot(cproj[0], pos), dot(cproj[1], pos), dot(cproj[2], pos), dot(cproj[3], pos)); // Only the first normal gets normalized (TODO: why?) float3 _norm0 = float3(0.0, 0.0, 0.0); if ((components & 1024u) != 0u) // VB_HAS_NRM0 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0))); float3 _norm1 = float3(0.0, 0.0, 0.0); if ((components & 2048u) != 0u) // VB_HAS_NRM1 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1)); float3 _norm2 = float3(0.0, 0.0, 0.0); if ((components & 4096u) != 0u) // VB_HAS_NRM2 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2)); // xfmem.numColorChans controls the number of color channels available to TEV, // but we still need to generate all channels here, as it can be used in texgen. // Cel-damage is an example of this. float4 vertex_color_0, vertex_color_1; // To use color 1, the vertex descriptor must have color 0 and 1. // If color 1 is present but not color 0, it is used for lighting channel 0. bool use_color_1 = ((components & 24576u) == 24576u); // VB_HAS_COL0 | VB_HAS_COL1 for (uint color = 0u; color < 2u; color++) { if ((color == 0u || use_color_1) && (components & (8192u << color)) != 0u) { // Use color0 for channel 0, and color1 for channel 1 if both colors 0 and 1 are present. if (color == 0u) vertex_color_0 = rawcolor0; else vertex_color_1 = rawcolor1; } else if (color == 0u && (components & 16384u) != 0u) { // Use color1 for channel 0 if color0 is not present. vertex_color_0 = rawcolor1; } else { if (color == 0u) vertex_color_0 = missing_color_value; else vertex_color_1 = missing_color_value; } } // Lighting for (uint chan = 0u; chan < 2u; chan++) { uint colorreg = xfmem_color(chan); uint alphareg = xfmem_alpha(chan); int4 mat = cmtrl[chan + 2u]; int4 lacc = int4(255, 255, 255, 255); if (bitfieldExtract(uint(colorreg), 0, 1) != 0u) mat.xyz = int3(round(((chan == 0u) ? vertex_color_0.xyz : vertex_color_1.xyz) * 255.0)); if (bitfieldExtract(uint(alphareg), 0, 1) != 0u) mat.w = int(round(((chan == 0u) ? vertex_color_0.w : vertex_color_1.w) * 255.0)); else mat.w = cmtrl [chan + 2u].w; if (bitfieldExtract(uint(colorreg), 1, 1) != 0u) { if (bitfieldExtract(uint(colorreg), 6, 1) != 0u) lacc.xyz = int3(round(((chan == 0u) ? vertex_color_0.xyz : vertex_color_1.xyz) * 255.0)); else lacc.xyz = cmtrl [chan].xyz; uint light_mask = bitfieldExtract(uint(colorreg), 2, 4) | (bitfieldExtract(uint(colorreg), 11, 4) << 4u); uint attnfunc = bitfieldExtract(uint(colorreg), 9, 2); uint diffusefunc = bitfieldExtract(uint(colorreg), 7, 2); for (uint light_index = 0u; light_index < 8u; light_index++) { if ((light_mask & (1u << light_index)) != 0u) lacc.xyz += CalculateLighting(light_index, attnfunc, diffusefunc, pos.xyz, _norm0).xyz; } } if (bitfieldExtract(uint(alphareg), 1, 1) != 0u) { if (bitfieldExtract(uint(alphareg), 6, 1) != 0u) { if ((components & (8192u << chan)) != 0u) // VB_HAS_COL0 lacc.w = int(round(((chan == 0u) ? vertex_color_0.w : vertex_color_1.w) * 255.0)); else if ((components & 8192u) != 0u) // VB_HAS_COLO0 lacc.w = int(round(vertex_color_0.w * 255.0)); else lacc.w = 255; } else { lacc.w = cmtrl [chan].w; } uint light_mask = bitfieldExtract(uint(alphareg), 2, 4) | (bitfieldExtract(uint(alphareg), 11, 4) << 4u); uint attnfunc = bitfieldExtract(uint(alphareg), 9, 2); uint diffusefunc = bitfieldExtract(uint(alphareg), 7, 2); for (uint light_index = 0u; light_index < 8u; light_index++) { if ((light_mask & (1u << light_index)) != 0u) lacc.w += CalculateLighting(light_index, attnfunc, diffusefunc, pos.xyz, _norm0).w; } } lacc = clamp(lacc, 0, 255); // Hopefully GPUs that can support dynamic indexing will optimize this. float4 lit_color = float4((mat * (lacc + (lacc >> 7))) >> 8) / 255.0; switch (chan) { case 0u: o.colors_0 = lit_color; break; case 1u: o.colors_1 = lit_color; break; } } // The number of colors available to TEV is determined by numColorChans. // We have to provide the fields to match the interface, so set to zero // if it's not enabled. if (xfmem_numColorChans == 0u) o.colors_0 = float4(0.0, 0.0, 0.0, 0.0); if (xfmem_numColorChans <= 1u) o.colors_1 = float4(0.0, 0.0, 0.0, 0.0); o.pos.z = o.pos.w * cpixelcenter.w - o.pos.z * cpixelcenter.z; o.pos.z = o.pos.z * 2.0 - o.pos.w; o.pos.xy *= sign(cpixelcenter.xy * float2(1.0, -1.0)); o.pos.xy = o.pos.xy - o.pos.w * cpixelcenter.xy; vs.pos = o.pos; vs.colors_0 = o.colors_0; vs.colors_1 = o.colors_1; vs.clipDist0 = o.clipDist0; vs.clipDist1 = o.clipDist1; gl_Position = o.pos; } #version 320 es #define FORCE_EARLY_Z layout(early_fragment_tests) in #define ATTRIBUTE_LOCATION(x) #define FRAGMENT_OUTPUT_LOCATION(x) #define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) #define UBO_BINDING(packing, x) layout(packing, binding = x) #define SAMPLER_BINDING(x) layout(binding = x) #define TEXEL_BUFFER_BINDING(x) layout(binding = x) #define SSBO_BINDING(x) layout(binding = x) #define IMAGE_BINDING(format, x) layout(format, binding = x) #define VARYING_LOCATION(x) #extension GL_ANDROID_extension_pack_es31a : enable #extension GL_EXT_blend_func_extended : enable #extension GL_EXT_shader_framebuffer_fetch: enable #define FRAGMENT_INOUT inout precision highp float; precision highp int; precision highp sampler2DArray; precision highp usamplerBuffer; precision highp sampler2DMS; precision highp image2DArray; #define API_OPENGL 1 #define float2 vec2 #define float3 vec3 #define float4 vec4 #define uint2 uvec2 #define uint3 uvec3 #define uint4 uvec4 #define int2 ivec2 #define int3 ivec3 #define int4 ivec4 #define frac fract #define lerp mix // Pixel UberShader for 0 texgens int idot(int3 x, int3 y) { int3 tmp = x * y; return tmp.x + tmp.y + tmp.z; } int idot(int4 x, int4 y) { int4 tmp = x * y; return tmp.x + tmp.y + tmp.z + tmp.w; } int iround(float x) { return int (round(x)); } int2 iround(float2 x) { return int2(round(x)); } int3 iround(float3 x) { return int3(round(x)); } int4 iround(float4 x) { return int4(round(x)); } SAMPLER_BINDING(0) uniform sampler2DArray samp[8]; UBO_BINDING(std140, 1) uniform PSBlock { int4 color[4]; int4 k[4]; int4 alphaRef; int4 texdim[8]; int4 czbias[2]; int4 cindscale[2]; int4 cindmtx[6]; int4 cfogcolor; int4 cfogi; float4 cfogf; float4 cfogrange[3]; float4 czslope; float2 cefbscale; uint bpmem_genmode; uint bpmem_alphaTest; uint bpmem_fogParam3; uint bpmem_fogRangeBase; uint bpmem_dstalpha; uint bpmem_ztex_op; bool bpmem_late_ztest; bool bpmem_rgba6_format; bool bpmem_dither; bool bpmem_bounding_box; uint4 bpmem_pack1[16]; uint4 bpmem_pack2[8]; int4 konstLookup[32]; bool blend_enable; uint blend_src_factor; uint blend_src_factor_alpha; uint blend_dst_factor; uint blend_dst_factor_alpha; bool blend_subtract; bool blend_subtract_alpha; bool logic_op_enable; uint logic_op_mode; }; #define bpmem_combiners(i) (bpmem_pack1[(i)].xy) #define bpmem_tevind(i) (bpmem_pack1[(i)].z) #define bpmem_iref(i) (bpmem_pack1[(i)].w) #define bpmem_tevorder(i) (bpmem_pack2[(i)].x) #define bpmem_tevksel(i) (bpmem_pack2[(i)].y) #define samp_texmode0(i) (bpmem_pack2[(i)].z) #define samp_texmode1(i) (bpmem_pack2[(i)].w) int4 sampleTexture(uint texmap, in sampler2DArray tex, int2 uv, int layer) { float size_s = float(texdim[texmap].x * 128); float size_t = float(texdim[texmap].y * 128); float3 coords = float3(float(uv.x) / size_s, float(uv.y) / size_t, layer); return iround(255.0 * texture(tex, coords)); } FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 0) FRAGMENT_INOUT vec4 ocol0; FRAGMENT_OUTPUT_LOCATION_INDEXED(0, 1) out vec4 ocol1; VARYING_LOCATION(0) in VertexData { centroid float4 pos; centroid float4 colors_0; centroid float4 colors_1; centroid float clipDist0; centroid float clipDist1; }; int4 sampleTextureWrapper(uint texmap, int2 uv, int layer) { return sampleTexture(texmap, samp[texmap], uv, layer); } int4 Swizzle(uint s, int4 color) { // AKA: Color Channel Swapping int4 ret; ret.r = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u)), 0, 2)]; ret.g = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u)), 2, 2)]; ret.b = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u + 1u)), 0, 2)]; ret.a = color[bitfieldExtract(uint(bpmem_tevksel(s * 2u + 1u)), 2, 2)]; return ret; } int Wrap(int coord, uint mode) { if (mode == 0u) // ITW_OFF return coord; else if (mode < 6u) // ITW_256 to ITW_16 return coord & (0xfffe >> mode); else // ITW_0 return 0; } // TEV's Linear Interpolate, plus bias, add/subtract and scale int tevLerp(int A, int B, int C, int D, uint bias, bool op, bool alpha, uint shift) { // Scale C from 0..255 to 0..256 C += C >> 7; // Add bias to D if (bias == 1u) D += 128; else if (bias == 2u) D -= 128; int lerp = (A << 8) + (B - A)*C; if (shift != 3u) { lerp = lerp << shift; D = D << shift; } if ((shift == 3u) == alpha) lerp = lerp + (op ? 127 : 128); int result = lerp >> 8; // Add/Subtract D if (op) // Subtract result = D - result; else // Add result = D + result; // Most of the Shift was moved inside the lerp for improved precision // But we still do the divide by 2 here if (shift == 3u) result = result >> 1; return result; } // TEV's Linear Interpolate, plus bias, add/subtract and scale int3 tevLerp3(int3 A, int3 B, int3 C, int3 D, uint bias, bool op, bool alpha, uint shift) { // Scale C from 0..255 to 0..256 C += C >> 7; // Add bias to D if (bias == 1u) D += 128; else if (bias == 2u) D -= 128; int3 lerp = (A << 8) + (B - A)*C; if (shift != 3u) { lerp = lerp << shift; D = D << shift; } if ((shift == 3u) == alpha) lerp = lerp + (op ? 127 : 128); int3 result = lerp >> 8; // Add/Subtract D if (op) // Subtract result = D - result; else // Add result = D + result; // Most of the Shift was moved inside the lerp for improved precision // But we still do the divide by 2 here if (shift == 3u) result = result >> 1; return result; } // Implements operations 0-5 of TEV's compare mode, // which are common to both color and alpha channels bool tevCompare(uint op, int3 color_A, int3 color_B) { switch (op) { case 0u: // TevCompareMode::R8, TevComparison::GT return (color_A.r > color_B.r); case 1u: // TevCompareMode::R8, TevComparison::EQ return (color_A.r == color_B.r); case 2u: // TevCompareMode::GR16, TevComparison::GT int A_16 = (color_A.r | (color_A.g << 8)); int B_16 = (color_B.r | (color_B.g << 8)); return A_16 > B_16; case 3u: // TevCompareMode::GR16, TevComparison::EQ return (color_A.r == color_B.r && color_A.g == color_B.g); case 4u: // TevCompareMode::BGR24, TevComparison::GT int A_24 = (color_A.r | (color_A.g << 8) | (color_A.b << 16)); int B_24 = (color_B.r | (color_B.g << 8) | (color_B.b << 16)); return A_24 > B_24; case 5u: // TevCompareMode::BGR24, TevComparison::EQ return (color_A.r == color_B.r && color_A.g == color_B.g && color_A.b == color_B.b); default: return false; } } struct State { int4 Reg[4]; int4 TexColor; int AlphaBump; }; struct StageState { uint stage; uint order; uint cc; uint ac; }; int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1); int4 getKonstColor(State s, StageState ss); // Helper function for Alpha Test bool alphaCompare(int a, int b, uint compare) { if (compare < 4u) { if (compare < 2u) { if (compare < 1u) { return false; // Never (0) } else { return a < b; // Less (1) } } else { if (compare < 3u) { return a == b; // Equal (2) } else { return a <= b; // LEqual (3) } } } else { if (compare < 6u) { if (compare < 5u) { return a > b; // Greater (4) } else { return a != b; // NEqual (5) } } else { if (compare < 7u) { return a >= b; // GEqual (6) } else { return true; // Always (7) } } } } int3 selectColorInput(State s, StageState ss, float4 colors_0, float4 colors_1, uint index) { if (index < 8u) { if (index < 4u) { if (index < 2u) { if (index < 1u) { return s.Reg[0].rgb; // prev.rgb (0) } else { return s.Reg[0].aaa; // prev.aaa (1) } } else { if (index < 3u) { return s.Reg[1].rgb; // c0.rgb (2) } else { return s.Reg[1].aaa; // c0.aaa (3) } } } else { if (index < 6u) { if (index < 5u) { return s.Reg[2].rgb; // c1.rgb (4) } else { return s.Reg[2].aaa; // c1.aaa (5) } } else { if (index < 7u) { return s.Reg[3].rgb; // c2.rgb (6) } else { return s.Reg[3].aaa; // c2.aaa (7) } } } } else { if (index < 12u) { if (index < 10u) { if (index < 9u) { return s.TexColor.rgb; // tex.rgb (8) } else { return s.TexColor.aaa; // tex.aaa (9) } } else { if (index < 11u) { return getRasColor(s, ss, colors_0, colors_1).rgb; // ras.rgb (10) } else { return getRasColor(s, ss, colors_0, colors_1).aaa; // ras.aaa (11) } } } else { if (index < 14u) { if (index < 13u) { return int3(255, 255, 255); // ONE (12) } else { return int3(128, 128, 128); // HALF (13) } } else { if (index < 15u) { return getKonstColor(s, ss).rgb; // konst.rgb (14) } else { return int3(0, 0, 0); // ZERO (15) } } } } } int selectAlphaInput(State s, StageState ss, float4 colors_0, float4 colors_1, uint index) { if (index < 4u) { if (index < 2u) { if (index < 1u) { return s.Reg[0].a; // prev (0) } else { return s.Reg[1].a; // c0 (1) } } else { if (index < 3u) { return s.Reg[2].a; // c1 (2) } else { return s.Reg[3].a; // c2 (3) } } } else { if (index < 6u) { if (index < 5u) { return s.TexColor.a; // tex (4) } else { return getRasColor(s, ss, colors_0, colors_1).a; // ras (5) } } else { if (index < 7u) { return getKonstColor(s, ss).a; // konst (6) } else { return 0; // ZERO (7) } } } } int4 getTevReg(in State s, uint index) { if (index < 2u) { if (index < 1u) { return s.Reg[0]; // prev (0) } else { return s.Reg[1]; // c0 (1) } } else { if (index < 3u) { return s.Reg[2]; // c1 (2) } else { return s.Reg[3]; // c2 (3) } } } void setRegColor(inout State s, uint index, int3 color) { if (index < 2u) { if (index < 1u) { s.Reg[0].rgb = color; // prev (0) } else { s.Reg[1].rgb = color; // c0 (1) } } else { if (index < 3u) { s.Reg[2].rgb = color; // c1 (2) } else { s.Reg[3].rgb = color; // c2 (3) } } } void setRegAlpha(inout State s, uint index, int alpha) { if (index < 2u) { if (index < 1u) { s.Reg[0].a = alpha; // prev (0) } else { s.Reg[1].a = alpha; // c0 (1) } } else { if (index < 3u) { s.Reg[2].a = alpha; // c1 (2) } else { s.Reg[3].a = alpha; // c2 (3) } } } void main() { float4 rawpos = gl_FragCoord; #ifdef FB_FETCH_VALUE float4 initial_ocol0 = FB_FETCH_VALUE; #else float4 initial_ocol0 = ocol0; #endif int layer = 0; int3 tevcoord = int3(0, 0, 0); State s; s.TexColor = int4(0, 0, 0, 0); s.AlphaBump = 0; s.Reg[0] = color[0]; s.Reg[1] = color[1]; s.Reg[2] = color[2]; s.Reg[3] = color[3]; uint num_stages = bitfieldExtract(uint(bpmem_genmode), 10, 4); // Main tev loop for(uint stage = 0u; stage <= num_stages; stage++) { StageState ss; ss.stage = stage; ss.cc = bpmem_combiners(stage).x; ss.ac = bpmem_combiners(stage).y; ss.order = bpmem_tevorder(stage>>1); if ((stage & 1u) == 1u) ss.order = ss.order >> 12; // This is the Meat of TEV { // Color Combiner uint color_a = bitfieldExtract(uint(ss.cc), 12, 4); uint color_b = bitfieldExtract(uint(ss.cc), 8, 4); uint color_c = bitfieldExtract(uint(ss.cc), 4, 4); uint color_d = bitfieldExtract(uint(ss.cc), 0, 4); uint color_bias = bitfieldExtract(uint(ss.cc), 16, 2); bool color_op = bool(bitfieldExtract(uint(ss.cc), 18, 1)); bool color_clamp = bool(bitfieldExtract(uint(ss.cc), 19, 1)); uint color_shift = bitfieldExtract(uint(ss.cc), 20, 2); uint color_dest = bitfieldExtract(uint(ss.cc), 22, 2); uint color_compare_op = color_shift << 1 | uint(color_op); int3 color_A = selectColorInput(s, ss, colors_0, colors_1, color_a) & int3(255, 255, 255); int3 color_B = selectColorInput(s, ss, colors_0, colors_1, color_b) & int3(255, 255, 255); int3 color_C = selectColorInput(s, ss, colors_0, colors_1, color_c) & int3(255, 255, 255); int3 color_D = selectColorInput(s, ss, colors_0, colors_1, color_d); // 10 bits + sign int3 color; if (color_bias != 3u) { // Normal mode color = tevLerp3(color_A, color_B, color_C, color_D, color_bias, color_op, false, color_shift); } else { // Compare mode // op 6 and 7 do a select per color channel if (color_compare_op == 6u) { // TevCompareMode::RGB8, TevComparison::GT color.r = (color_A.r > color_B.r) ? color_C.r : 0; color.g = (color_A.g > color_B.g) ? color_C.g : 0; color.b = (color_A.b > color_B.b) ? color_C.b : 0; } else if (color_compare_op == 7u) { // TevCompareMode::RGB8, TevComparison::EQ color.r = (color_A.r == color_B.r) ? color_C.r : 0; color.g = (color_A.g == color_B.g) ? color_C.g : 0; color.b = (color_A.b == color_B.b) ? color_C.b : 0; } else { // The remaining ops do one compare which selects all 3 channels color = tevCompare(color_compare_op, color_A, color_B) ? color_C : int3(0, 0, 0); } color = color_D + color; } // Clamp result if (color_clamp) color = clamp(color, 0, 255); else color = clamp(color, -1024, 1023); // Write result to the correct input register of the next stage setRegColor(s, color_dest, color); // Alpha Combiner uint alpha_a = bitfieldExtract(uint(ss.ac), 13, 3); uint alpha_b = bitfieldExtract(uint(ss.ac), 10, 3); uint alpha_c = bitfieldExtract(uint(ss.ac), 7, 3); uint alpha_d = bitfieldExtract(uint(ss.ac), 4, 3); uint alpha_bias = bitfieldExtract(uint(ss.ac), 16, 2); bool alpha_op = bool(bitfieldExtract(uint(ss.ac), 18, 1)); bool alpha_clamp = bool(bitfieldExtract(uint(ss.ac), 19, 1)); uint alpha_shift = bitfieldExtract(uint(ss.ac), 20, 2); uint alpha_dest = bitfieldExtract(uint(ss.ac), 22, 2); uint alpha_compare_op = alpha_shift << 1 | uint(alpha_op); int alpha_A; int alpha_B; if (alpha_bias != 3u || alpha_compare_op > 5u) { // Small optimisation here: alpha_A and alpha_B are unused by compare ops 0-5 alpha_A = selectAlphaInput(s, ss, colors_0, colors_1, alpha_a) & 255; alpha_B = selectAlphaInput(s, ss, colors_0, colors_1, alpha_b) & 255; }; int alpha_C = selectAlphaInput(s, ss, colors_0, colors_1, alpha_c) & 255; int alpha_D = selectAlphaInput(s, ss, colors_0, colors_1, alpha_d); // 10 bits + sign int alpha; if (alpha_bias != 3u) { // Normal mode alpha = tevLerp(alpha_A, alpha_B, alpha_C, alpha_D, alpha_bias, alpha_op, true, alpha_shift); } else { // Compare mode if (alpha_compare_op == 6u) { // TevCompareMode::A8, TevComparison::GT alpha = (alpha_A > alpha_B) ? alpha_C : 0; } else if (alpha_compare_op == 7u) { // TevCompareMode::A8, TevComparison::EQ alpha = (alpha_A == alpha_B) ? alpha_C : 0; } else { // All remaining alpha compare ops actually compare the color channels alpha = tevCompare(alpha_compare_op, color_A, color_B) ? alpha_C : 0; } alpha = alpha_D + alpha; } // Clamp result if (alpha_clamp) alpha = clamp(alpha, 0, 255); else alpha = clamp(alpha, -1024, 1023); // Write result to the correct input register of the next stage setRegAlpha(s, alpha_dest, alpha); } } // Main TEV loop int4 TevResult; TevResult.xyz = getTevReg(s, bitfieldExtract(uint(bpmem_combiners(num_stages).x), 22, 2)).xyz; TevResult.w = getTevReg(s, bitfieldExtract(uint(bpmem_combiners(num_stages).y), 22, 2)).w; TevResult &= 255; int zCoord = int(rawpos.z * 16777216.0); zCoord = clamp(zCoord, 0, 0xFFFFFF); // Depth Texture int early_zCoord = zCoord; if (bpmem_ztex_op != 0u) { int ztex = int(czbias[1].w); // fixed bias // Whatever texture was in our last stage, it's now our depth texture ztex += idot(s.TexColor.xyzw, czbias[0].xyzw); ztex += (bpmem_ztex_op == 1u) ? zCoord : 0; zCoord = ztex & 0xFFFFFF; } // Alpha Test if (bpmem_alphaTest != 0u) { bool comp0 = alphaCompare(TevResult.a, alphaRef.r, bitfieldExtract(uint(bpmem_alphaTest), 16, 3)); bool comp1 = alphaCompare(TevResult.a, alphaRef.g, bitfieldExtract(uint(bpmem_alphaTest), 19, 3)); // These if statements are written weirdly to work around intel and Qualcomm bugs with handling booleans. switch (bitfieldExtract(uint(bpmem_alphaTest), 22, 2)) { case 0u: // AND if (comp0 && comp1) break; else discard; break; case 1u: // OR if (comp0 || comp1) break; else discard; break; case 2u: // XOR if (comp0 != comp1) break; else discard; break; case 3u: // XNOR if (comp0 == comp1) break; else discard; break; } } if (bpmem_dither) { // Flipper uses a standard 2x2 Bayer Matrix for 6 bit dithering // Here the matrix is encoded into the two factor constants int2 dither = int2(rawpos.xy) & 1; TevResult.rgb = (TevResult.rgb - (TevResult.rgb >> 6)) + abs(dither.y * 3 - dither.x * 2); } // Fog uint fog_function = bitfieldExtract(uint(bpmem_fogParam3), 21, 3); if (fog_function != 0x0u /* Off (no fog) */) { // TODO: This all needs to be converted from float to fixed point float ze; if (bitfieldExtract(uint(bpmem_fogParam3), 20, 1) == 0u) { // perspective // ze = A/(B - (Zs >> B_SHF) ze = (cfogf.x * 16777216.0) / float(cfogi.y - (zCoord >> cfogi.w)); } else { // orthographic // ze = a*Zs (here, no B_SHF) ze = cfogf.z * float(zCoord) / 16777216.0; } if (bool(bitfieldExtract(uint(bpmem_fogRangeBase), 10, 1))) { // x_adjust = sqrt((x-center)^2 + k^2)/k // ze *= x_adjust float offset = (2.0 * (rawpos.x / cfogf.w)) - 1.0 - cfogf.z; float floatindex = clamp(9.0 - abs(offset) * 9.0, 0.0, 9.0); uint indexlower = uint(floatindex); uint indexupper = indexlower + 1u; float klower = cfogrange[indexlower >> 2u][indexlower & 3u]; float kupper = cfogrange[indexupper >> 2u][indexupper & 3u]; float k = lerp(klower, kupper, frac(floatindex)); float x_adjust = sqrt(offset * offset + k * k) / k; ze *= x_adjust; } float fog = clamp(ze - cfogf.y, 0.0, 1.0); if (fog_function >= 0x4u /* Exponential fog */) { switch (fog_function) { case 0x4u /* Exponential fog */: fog = 1.0 - exp2(-8.0 * fog); break; case 0x5u /* Exponential-squared fog */: fog = 1.0 - exp2(-8.0 * fog * fog); break; case 0x6u /* Backwards exponential fog */: fog = exp2(-8.0 * (1.0 - fog)); break; case 0x7u /* Backwards exponenential-sequared fog */: fog = 1.0 - fog; fog = exp2(-8.0 * fog * fog); break; } } int ifog = iround(fog * 256.0); TevResult.rgb = (TevResult.rgb * (256 - ifog) + cfogcolor.rgb * ifog) >> 8; } // Logic Ops if (logic_op_enable) { int4 fb_value = iround(initial_ocol0 * 255.0); switch (logic_op_mode) { case 0u: TevResult = int4(0, 0, 0, 0); break; case 1u: TevResult = TevResult & fb_value; break; case 2u: TevResult = TevResult & ~fb_value; break; case 3u: TevResult = TevResult; break; case 4u: TevResult = ~TevResult & fb_value; break; case 5u: TevResult = fb_value; break; case 6u: TevResult = TevResult ^ fb_value; break; case 7u: TevResult = TevResult | fb_value; break; case 8u: TevResult = ~(TevResult | fb_value); break; case 9u: TevResult = ~(TevResult ^ fb_value); break; case 10u: TevResult = ~fb_value; break; case 11u: TevResult = TevResult | ~fb_value; break; case 12u: TevResult = ~TevResult; break; case 13u: TevResult = ~TevResult | fb_value; break; case 14u: TevResult = ~(TevResult & fb_value); break; case 15u: TevResult = int4(255, 255, 255, 255); break; } } if (bpmem_rgba6_format) ocol0.rgb = float3(TevResult.rgb >> 2) / 63.0; else ocol0.rgb = float3(TevResult.rgb) / 255.0; if (bpmem_dstalpha != 0u) ocol0.a = float(bitfieldExtract(uint(bpmem_dstalpha), 0, 8) >> 2) / 63.0; else ocol0.a = float(TevResult.a >> 2) / 63.0; // Dest alpha override (dual source blending) // Colors will be blended against the alpha from ocol1 and // the alpha from ocol0 will be written to the framebuffer. ocol1 = float4(0.0, 0.0, 0.0, float(TevResult.a) / 255.0); } int4 getRasColor(State s, StageState ss, float4 colors_0, float4 colors_1) { // Select Ras for stage uint ras = bitfieldExtract(uint(ss.order), 7, 3); if (ras < 2u) { // Lighting Channel 0 or 1 int4 color = iround(((ras == 0u) ? colors_0 : colors_1) * 255.0); uint swap = bitfieldExtract(uint(ss.ac), 0, 2); return Swizzle(swap, color); } else if (ras == 5u) { // Alpha Bumb return int4(s.AlphaBump, s.AlphaBump, s.AlphaBump, s.AlphaBump); } else if (ras == 6u) { // Normalzied Alpha Bump int normalized = s.AlphaBump | s.AlphaBump >> 5; return int4(normalized, normalized, normalized, normalized); } else { return int4(0, 0, 0, 0); } } int4 getKonstColor(State s, StageState ss) { // Select Konst for stage // TODO: a switch case might be better here than an dynamically // indexed uniform lookup uint tevksel = bpmem_tevksel(ss.stage>>1); if ((ss.stage & 1u) == 0u) return int4(konstLookup[bitfieldExtract(uint(tevksel), 4, 5)].rgb, konstLookup[bitfieldExtract(uint(tevksel), 9, 5)].a); else return int4(konstLookup[bitfieldExtract(uint(tevksel), 14, 5)].rgb, konstLookup[bitfieldExtract(uint(tevksel), 19, 5)].a); } #version 320 es #define FORCE_EARLY_Z layout(early_fragment_tests) in #define ATTRIBUTE_LOCATION(x) #define FRAGMENT_OUTPUT_LOCATION(x) #define FRAGMENT_OUTPUT_LOCATION_INDEXED(x, y) #define UBO_BINDING(packing, x) layout(packing, binding = x) #define SAMPLER_BINDING(x) layout(binding = x) #define TEXEL_BUFFER_BINDING(x) layout(binding = x) #define SSBO_BINDING(x) layout(binding = x) #define IMAGE_BINDING(format, x) layout(format, binding = x) #define VARYING_LOCATION(x) #extension GL_ANDROID_extension_pack_es31a : enable #extension GL_EXT_blend_func_extended : enable #extension GL_EXT_shader_framebuffer_fetch: enable #define FRAGMENT_INOUT inout precision highp float; precision highp int; precision highp sampler2DArray; precision highp usamplerBuffer; precision highp sampler2DMS; precision highp image2DArray; #define API_OPENGL 1 #define float2 vec2 #define float3 vec3 #define float4 vec4 #define uint2 uvec2 #define uint3 uvec3 #define uint4 uvec4 #define int2 ivec2 #define int3 ivec3 #define int4 ivec4 #define frac fract #define lerp mix layout(lines) in; layout(triangle_strip, max_vertices = 4) out; struct Light { int4 color; float4 cosatt; float4 distatt; float4 pos; float4 dir; }; UBO_BINDING(std140, 3) uniform GSBlock { float4 cstereo; float4 clinept; int4 ctexoffset; }; struct VS_OUTPUT { float4 pos; float4 colors_0; float4 colors_1; float clipDist0; float clipDist1; }; VARYING_LOCATION(0) in VertexData { centroid float4 pos; centroid float4 colors_0; centroid float4 colors_1; centroid float clipDist0; centroid float clipDist1; } vs[2]; VARYING_LOCATION(0) out VertexData { centroid float4 pos; centroid float4 colors_0; centroid float4 colors_1; centroid float clipDist0; centroid float clipDist1; } ps; void main() { VS_OUTPUT start, end; start.pos = vs[0].pos; start.colors_0 = vs[0].colors_0; start.colors_1 = vs[0].colors_1; start.clipDist0 = vs[0].clipDist0; start.clipDist1 = vs[0].clipDist1; end.pos = vs[1].pos; end.colors_0 = vs[1].colors_0; end.colors_1 = vs[1].colors_1; end.clipDist0 = vs[1].clipDist0; end.clipDist1 = vs[1].clipDist1; float2 offset; float2 to = abs(end.pos.xy / end.pos.w - start.pos.xy / start.pos.w); if (clinept.y * to.y > clinept.x * to.x) { offset = float2(clinept.z / clinept.x, 0); } else { offset = float2(0, -clinept.z / clinept.y); } for (int i = 0; i < 2; ++i) { VS_OUTPUT f; f.pos = vs[i].pos; f.colors_0 = vs[i].colors_0; f.colors_1 = vs[i].colors_1; f.clipDist0 = vs[i].clipDist0; f.clipDist1 = vs[i].clipDist1; VS_OUTPUT l = f; VS_OUTPUT r = f; l.pos.xy -= offset * l.pos.w; r.pos.xy += offset * r.pos.w; if (ctexoffset[2] != 0) { float texOffset = 1.0 / float(ctexoffset[2]); } gl_Position = l.pos; ps.pos = l.pos; ps.colors_0 = l.colors_0; ps.colors_1 = l.colors_1; ps.clipDist0 = l.clipDist0; ps.clipDist1 = l.clipDist1; EmitVertex(); gl_Position = r.pos; ps.pos = r.pos; ps.colors_0 = r.colors_0; ps.colors_1 = r.colors_1; ps.clipDist0 = r.clipDist0; ps.clipDist1 = r.clipDist1; EmitVertex(); } EndPrimitive(); } Dolphin Version: Dolphin 5.0-15684 Video Backend: OpenGL ES