diff --git a/targets/app/common/DLC/DLCAudioFile.cpp b/targets/app/common/DLC/DLCAudioFile.cpp index a9626f7d2..7a9c6ef1b 100644 --- a/targets/app/common/DLC/DLCAudioFile.cpp +++ b/targets/app/common/DLC/DLCAudioFile.cpp @@ -13,11 +13,6 @@ #include "platform/storage/storage.h" #include "util/StringHelpers.h" -#if defined(_WINDOWS64) -#include "app/windows/XML/ATGXmlParser.h" -#include "app/windows/XML/xmlFilesCallback.h" -#endif - namespace { constexpr std::size_t AUDIO_DLC_WCHAR_BIN_SIZE = 2; diff --git a/targets/app/common/DLCController.cpp b/targets/app/common/DLCController.cpp index 1948ce660..6e61d5c5e 100644 --- a/targets/app/common/DLCController.cpp +++ b/targets/app/common/DLCController.cpp @@ -240,7 +240,7 @@ int32_t DLCController::registerDLCData(char* pType, char* pBannerName, pDLCData->uiSortIndex = uiSortIndex; pDLCData->iConfig = iConfig; - if (pBannerName != "") { + if (strcmp(!pBannerName, "") != 0) { strncpy(pDLCData->wchBanner, pBannerName, MAX_BANNERNAME_SIZE); } if (pDataFile[0] != 0) { diff --git a/targets/app/common/Game.cpp b/targets/app/common/Game.cpp index e0a5d5d98..111b91a64 100644 --- a/targets/app/common/Game.cpp +++ b/targets/app/common/Game.cpp @@ -47,10 +47,6 @@ #include "platform/renderer/renderer.h" #include "platform/storage/storage.h" #include "strings.h" -#if defined(_WINDOWS64) -#include "app/windows/XML/ATGXmlParser.h" -#include "app/windows/XML/xmlFilesCallback.h" -#endif #include #include #include diff --git a/targets/app/common/Iggy/gdraw/gdraw.c b/targets/app/common/Iggy/gdraw/gdraw.c index eeb97c02a..9a1ea981d 100644 --- a/targets/app/common/Iggy/gdraw/gdraw.c +++ b/targets/app/common/Iggy/gdraw/gdraw.c @@ -688,7 +688,7 @@ static void gdraw_FramebufferRenderbufferSafe(GLenum target, GLenum attachment, #define glFramebufferRenderbuffer_SAFE gdraw_FramebufferRenderbufferSafe #define glFramebufferRenderbuffer glFramebufferRenderbuffer_SAFE -#include "app/windows/Iggy/gdraw/gdraw_gl_shared.inl" +#include "gdraw_gl_shared.inl" #undef glVertexAttribPointer #define glVertexAttribPointer gdraw_real_vtxattrib diff --git a/targets/app/common/Iggy/gdraw/gdraw.h b/targets/app/common/Iggy/gdraw/gdraw.h index 3f99cf6ec..b88da0756 100644 --- a/targets/app/common/Iggy/gdraw/gdraw.h +++ b/targets/app/common/Iggy/gdraw/gdraw.h @@ -2,8 +2,8 @@ #define __LINUX_IGGY_GDRAW_H__ #include "app/common/Iggy/include/rrCore.h" -#include "app/windows/Iggy/include/gdraw.h" -#include "app/windows/Iggy/include/iggy.h" +#include "app/common/Iggy/include/gdraw.h" +#include "app/common/Iggy/include/iggy.h" #ifdef __cplusplus extern "C" { diff --git a/targets/app/common/Iggy/gdraw/gdraw_gl_shaders.inl b/targets/app/common/Iggy/gdraw/gdraw_gl_shaders.inl new file mode 100644 index 000000000..89899d295 --- /dev/null +++ b/targets/app/common/Iggy/gdraw/gdraw_gl_shaders.inl @@ -0,0 +1,1536 @@ +// This file was automatically generated by shadergen. Do not edit by hand! + +static char pshader_basic_frag0[] = + "#version 110 // only need 100, but 110 works around a driver " + "issue\n" + "#define MAIN(x) void main()\n" + "#define ARGS\n" + "#define ARGS2\n" + "#define DECLARE_SAMPLER(type, name, reg) uniform type name\n" + "#define DECLARE_CONST(type, name, reg) uniform type name\n" + "#define DECLARE_CONST_EXTRA(type, name, reg) uniform type name\n" + "#define SAMPLER sampler2D\n" + "#define TEX2D texture2D\n" + "#define VEC4 vec4\n" + "#define VEC3 vec3\n" + "#define VEC2 vec2\n" + "#define LOWP\n" + "#define MEDIUMP\n" + "#define HIGHP\n" + "#define TC0 tex_coord.xy\n" + "#define TC1 tex_coord.zw\n" + "#define OUTPUT(x) gl_FragColor = x\n" + "\n" + "#define SHADER_ALPHATEST(x) if (x < 0.5) discard\n" + "\n" + "#define COMMON_PSCONSTANTS DECLARE_CONST(VEC4, color_mul, 0); " + "DECLARE_CONST(VEC4, color_add, 1); DECLARE_CONST(VEC4, focal, 2); varying " + "VEC4 tex_coord\n" + "#define COMMON_PCONSTANTS2 varying VEC4 tex_coord; varying VEC4 " + "color_mul; varying VEC4 color_add; varying VEC4 clip_rect;\n" + "#define BEGIN_CONSTANTS\n" + "#define END_CONSTANTS\n" + "\n" + "#define saturate(x) clamp(x,0.0,1.0)\n"; +static char pshader_basic_frag1[] = ""; +static char pshader_basic_frag2[] = + "COMMON_PSCONSTANTS;\n" + "\n" + "#ifdef TEX0\n" + "DECLARE_SAMPLER(LOWP SAMPLER, tex0, 0);\n" + "#endif\n" + "\n" + "#ifndef READ_ALPHA_TEX\n" + "#define READ_ALPHA_TEX(x) ((x).a)\n" + "#endif\n" + "\n" + "#ifndef AATEX_USE_SAMPLER1\n" + "DECLARE_SAMPLER(LOWP SAMPLER, tex1, 7);\n" + "#else\n" + "DECLARE_SAMPLER(LOWP SAMPLER, tex1, 1);\n" + "#endif\n" + "\n" + "MAIN((ARGS))\n" + "{\n" + " LOWP VEC4 z;\n" + " MEDIUMP VEC4 t;\n" + " \n" + " z = color_mul;\n" + " \n" + " #ifndef ADDITIVE_ALPHA\n" + " z.rgb *= z.a; // premultiply; could do outside\n" + " #endif\n" + " \n" + " #ifdef TEX0\n" + " MEDIUMP VEC2 c,tc0;\n" + " tc0 = TC0;\n" + "\n" + " #ifdef EXPLICIT_PROJECTION\n" + " float one_over_w = 1.0 / TC1.y;\n" + " tc0.x *= one_over_w;\n" + " tc0.y *= one_over_w;\n" + " #endif\n" + " \n" + " #ifdef TEX0_RADIAL\n" + " tc0.x = sqrt(dot(tc0.xy,tc0.xy));\n" + " tc0.y = tc0.x; // necessary on some OpenGL devices\n" + " #else\n" + " #ifdef TEX0_FOCAL\n" + " c.x = tc0.x + focal.x;\n" + " c.y = tc0.y;\n" + " t.x = c.x * focal.y;\n" + " t.y = (c.x*c.x + c.y*c.y) * focal.z;\n" + " tc0.x = sqrt(t.y + t.x*t.x) - t.x;\n" + " tc0.y = tc0.x;\n" + " #endif\n" + " #endif\n" + "\n" + " #ifdef TEX0_ALPHA\n" + " t.a = READ_ALPHA_TEX(TEX2D(tex0, tc0));\n" + " #ifdef ADDITIVE_ALPHA\n" + " z.a *= t.a;\n" + " #else\n" + " z *= t.a;\n" + " #endif\n" + " #else\n" + " t = TEX2D(tex0, tc0);\n" + " #ifdef ADDITIVE_ALPHA\n" + " if (t.a != 0.0) t.rgb = t.rgb * (1.0/t.a); // unpremultiply\n" + " #endif\n" + " z *= t;\n" + " #endif\n" + " #endif\n" + "\n" + " MEDIUMP VEC2 tc1;\n" + " tc1.xy = TC1.xy;\n" + "\n" + " #ifdef EXPLICIT_PROJECTION\n" + " tc1.x /= TC1.y;\n" + " #endif\n" + "\n" + " // antialiasing blend curve\n" + " t = TEX2D(tex1, tc1.xy);\n" + " #ifdef ADDITIVE_ALPHA\n" + " z.a *= t.a;\n" + " #else\n" + " z *= t;\n" + " #endif\n" + "\n" + " #ifdef ADDITIVE_ALPHA\n" + " z += color_add;\n" + " z.rgb *= z.a; // premultiply\n" + " #else\n" + " #ifdef ADDITIVE\n" + " z.rgb += color_add.rgb * z.a; // scale addend to match premultiply\n" + " z.rgb = min(z.rgb, z.a);\n" + " #endif\n" + " #endif\n" + " \n" + " #ifdef TEX0_ALPHA_TEST\n" + " SHADER_ALPHATEST(z.a);\n" + " #endif\n" + "\n" + " OUTPUT(z);\n" + "}\n"; +static char pshader_basic_frag3[] = "#define ADDITIVE\n"; +static char pshader_basic_frag4[] = "#define ADDITIVE_ALPHA\n"; +static char pshader_basic_frag5[] = "#define TEX0\n"; +static char pshader_basic_frag6[] = + "#define TEX0\n" + "#define TEX0_ALPHA\n"; +static char pshader_basic_frag7[] = + "#define TEX0\n" + "#define TEX0_RADIAL\n"; +static char pshader_basic_frag8[] = + "#define TEX0\n" + "#define TEX0_FOCAL\n"; +static char pshader_basic_frag9[] = + "#define TEX0\n" + "#define TEX0_ALPHA\n" + "#define TEX0_ALPHA_TEST\n"; + +#define NUMFRAGMENTS_pshader_basic 4 +static char* pshader_basic_arr[18][NUMFRAGMENTS_pshader_basic] = { + { + pshader_basic_frag0, + pshader_basic_frag1, + pshader_basic_frag1, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag1, + pshader_basic_frag3, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag1, + pshader_basic_frag4, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag5, + pshader_basic_frag1, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag5, + pshader_basic_frag3, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag5, + pshader_basic_frag4, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag6, + pshader_basic_frag1, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag6, + pshader_basic_frag3, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag6, + pshader_basic_frag4, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag7, + pshader_basic_frag1, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag7, + pshader_basic_frag3, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag7, + pshader_basic_frag4, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag8, + pshader_basic_frag1, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag8, + pshader_basic_frag3, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag8, + pshader_basic_frag4, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag9, + pshader_basic_frag1, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag9, + pshader_basic_frag3, + pshader_basic_frag2, + }, + { + pshader_basic_frag0, + pshader_basic_frag9, + pshader_basic_frag4, + pshader_basic_frag2, + }, +}; + +static char** pshader_basic(int tex0, int additive) { + return pshader_basic_arr[0 + tex0 * 3 + additive * 1]; +} + +static char* pshader_basic_vars[] = {"tex0", "tex1", "color_mul", + "color_add", "focal", NULL}; + +static char pshader_general2_frag0[] = + "#version 110 // only need 100, but 110 works around a driver " + "issue\n" + "#define MAIN(x) void main()\n" + "#define ARGS\n" + "#define ARGS2\n" + "#define DECLARE_SAMPLER(type, name, reg) uniform type name\n" + "#define DECLARE_CONST(type, name, reg) uniform type name\n" + "#define DECLARE_CONST_EXTRA(type, name, reg) uniform type name\n" + "#define SAMPLER sampler2D\n" + "#define TEX2D texture2D\n" + "#define VEC4 vec4\n" + "#define VEC3 vec3\n" + "#define VEC2 vec2\n" + "#define LOWP\n" + "#define MEDIUMP\n" + "#define HIGHP\n" + "#define TC0 tex_coord.xy\n" + "#define TC1 tex_coord.zw\n" + "#define OUTPUT(x) gl_FragColor = x\n" + "\n" + "#define SHADER_ALPHATEST(x) if (x < 0.5) discard\n" + "\n" + "#define COMMON_PSCONSTANTS DECLARE_CONST(VEC4, color_mul, 0); " + "DECLARE_CONST(VEC4, color_add, 1); DECLARE_CONST(VEC4, focal, 2); varying " + "VEC4 tex_coord\n" + "#define COMMON_PCONSTANTS2 varying VEC4 tex_coord; varying VEC4 " + "color_mul; varying VEC4 color_add; varying VEC4 clip_rect;\n" + "#define BEGIN_CONSTANTS\n" + "#define END_CONSTANTS\n" + "\n" + "#define saturate(x) clamp(x,0.0,1.0)\n"; +static char pshader_general2_frag1[] = + "COMMON_PCONSTANTS2\n" + "DECLARE_SAMPLER(LOWP SAMPLER, tex0, 0);\n" + "MAIN((ARGS2))\n" + "{\n" + " // get t, the basic texture color\n" + "\n" + " LOWP VEC4 t;\n" + " \n" + " t = TEX2D(tex0, tex_coord.xy);\n" + "\n" + " // now color-transform t\n" + " //\n" + " // to determine optimal format for vertex shader to output the color, " + "let's\n" + " // start by doing the math assuming the texture is premultiplied, but " + "not\n" + " // the color transform:\n" + " //\n" + " // out.r = (tex.r * color_mul.r * color_mul.a) + (color_add.r * " + "tex.a * color_mul.a)\n" + " // out.a = tex.a * color_mul.a * color_add.a // color_add.a is " + "blend mode emulation\n" + " //\n" + " // now, we can see in the above we can premultiply both mul and add by " + "color_mul.a\n" + " //\n" + " // out.r = (tex.r * color_mulp.r) + (color_addp.r * tex.a)\n" + " // out.a = tex.a * color_mulp.a // can premultiply color_add.a " + "here as well\n" + "\n" + " \n" + " LOWP VEC4 c;\n" + " c.rgb = t.rgb * color_mul.rgb + t.a * color_add.rgb;\n" + " c.a = t.a * color_mul.a;\n" + "\n" + " // apply clip rect\n" + " //\n" + " // naive math, using panel-space coordinates\n" + " //\n" + " // panel_offset = abs(pos-center) - half_width\n" + " //\n" + " // Above function is negative where not clipped, positive\n" + " // where clipped. Now, we want to capture a one-pixel boundary,\n" + " // so we need to go to pixel coordinates:\n" + " //\n" + " // panel_offset *= pixels_per_panel_unit;\n" + " // // note this doesn't account for non-uniform scale of panel\n" + " //\n" + " // And now, with an offset in pixels, we want to compute an AA " + "mask:\n" + " //\n" + " // saturate(1-panel_offset)\n" + " //\n" + " // Note that we can just multiply pixels_per_panel_unit into\n" + " // each of the terms in panel offset, and we're left with:\n" + " //\n" + " // saturate(1 - (abs() - k))\n" + " //\n" + " // which becomes:\n" + " //\n" + " // saturate(k+1 - abs())\n" + " //\n" + " // and the +1 is folded into k.\n" + "\n" + " LOWP VEC2 cliprect_alpha = saturate(clip_rect.zw - abs(tex_coord.zw - " + "clip_rect.xy));\n" + "\n" + " float edge_alpha = cliprect_alpha.x * cliprect_alpha.y;\n" + " // could be min, but multiply represents coverage better in theory; " + "@TODO check visually\n" + "\n" + " // multiply it into c's alpha, but c's already premultiplied so " + "multiply it all\n" + " c *= edge_alpha;\n" + "\n" + " OUTPUT(c);\n" + "}\n"; + +#define NUMFRAGMENTS_pshader_general2 2 +static char* pshader_general2_arr[1][NUMFRAGMENTS_pshader_general2] = { + { + pshader_general2_frag0, + pshader_general2_frag1, + }, +}; + +static char** pshader_general2(void) { return pshader_general2_arr[0]; } + +static char* pshader_general2_vars[] = {"tex0", NULL}; + +static char pshader_exceptional_blend_frag0[] = + "#version 110 // only need 100, but 110 works around a driver " + "issue\n" + "#define MAIN(x) void main()\n" + "#define ARGS\n" + "#define ARGS2\n" + "#define DECLARE_SAMPLER(type, name, reg) uniform type name\n" + "#define DECLARE_CONST(type, name, reg) uniform type name\n" + "#define DECLARE_CONST_EXTRA(type, name, reg) uniform type name\n" + "#define SAMPLER sampler2D\n" + "#define TEX2D texture2D\n" + "#define VEC4 vec4\n" + "#define VEC3 vec3\n" + "#define VEC2 vec2\n" + "#define LOWP\n" + "#define MEDIUMP\n" + "#define HIGHP\n" + "#define TC0 tex_coord.xy\n" + "#define TC1 tex_coord.zw\n" + "#define OUTPUT(x) gl_FragColor = x\n" + "\n" + "#define SHADER_ALPHATEST(x) if (x < 0.5) discard\n" + "\n" + "#define COMMON_PSCONSTANTS DECLARE_CONST(VEC4, color_mul, 0); " + "DECLARE_CONST(VEC4, color_add, 1); DECLARE_CONST(VEC4, focal, 2); varying " + "VEC4 tex_coord\n" + "#define COMMON_PCONSTANTS2 varying VEC4 tex_coord; varying VEC4 " + "color_mul; varying VEC4 color_add; varying VEC4 clip_rect;\n" + "#define BEGIN_CONSTANTS\n" + "#define END_CONSTANTS\n" + "\n" + "#define saturate(x) clamp(x,0.0,1.0)\n"; +static char pshader_exceptional_blend_frag1[] = + "#define BLENDPROG return s*d;\n" + "#define ALPHAFUNC return sa+da-sa*da;\n"; +static char pshader_exceptional_blend_frag2[] = + "#define TEX0\n" + "\n" + "MEDIUMP float compute(MEDIUMP float s, MEDIUMP float sa, MEDIUMP float d, " + "MEDIUMP float da)\n" + "{\n" + " BLENDPROG\n" + "}\n" + "\n" + "MEDIUMP float compute_a(MEDIUMP float sa, MEDIUMP float da)\n" + "{\n" + " ALPHAFUNC\n" + "}\n" + "\n" + "DECLARE_SAMPLER(LOWP SAMPLER, tex0, 0);\n" + "DECLARE_SAMPLER(LOWP SAMPLER, tex1, 1);\n" + "COMMON_PSCONSTANTS;\n" + "\n" + "MAIN((ARGS))\n" + "{\n" + " MEDIUMP VEC4 srcc,dstc;\n" + " MEDIUMP VEC3 mixed;\n" + " MEDIUMP VEC2 tc;\n" + " srcc = TEX2D(tex0, TC0.xy);\n" + " srcc = srcc*VEC4(color_mul.rgb, 1.0)*color_mul.a + color_add*srcc.a;\n" + " srcc = clamp(srcc,VEC4(0.0,0.0,0.0,0.0),VEC4(1.0,1.0,1.0,1.0));\n" + " tc = TC0.xy;\n" + " #ifndef EXCEPTIONAL_BLEND_LOAD\n" + " #ifndef EXCEPTIONAL_BLEND_RESCALE\n" + " dstc = TEX2D(tex1, tc).rgba;\n" + " #else\n" + " dstc = TEX2D(tex1, tc*rescale1.xy + rescale1.zw).rgba;\n" + " #endif\n" + " #else\n" + " dstc = EXCEPTIONAL_BLEND_LOAD(tex1, tc);\n" + " #endif\n" + " mixed.r = compute(srcc.r,srcc.a, dstc.r,dstc.a);\n" + " mixed.g = compute(srcc.g,srcc.a, dstc.g,dstc.a);\n" + " mixed.b = compute(srcc.b,srcc.a, dstc.b,dstc.a);\n" + " MEDIUMP VEC4 res;\n" + " #ifdef DIRECT\n" + " res.rgb = mixed;\n" + " #else\n" + " res.rgb = mixed + (1.0-srcc.a)*dstc.rgb + (1.0-dstc.a)*srcc.rgb;\n" + " #endif\n" + " res.a = compute_a(srcc.a,dstc.a);\n" + " OUTPUT(res);\n" + "}\n"; +static char pshader_exceptional_blend_frag3[] = + "#define BLENDPROG return sa*da - (da-d)*(sa-s);\n" + "#define ALPHAFUNC return sa+da-sa*da;\n"; +static char pshader_exceptional_blend_frag4[] = + "#define BLENDPROG return max(sa*d,s*da);\n" + "#define ALPHAFUNC return sa+da-sa*da;\n"; +static char pshader_exceptional_blend_frag5[] = + "#define BLENDPROG return min(sa*d,s*da);\n" + "#define ALPHAFUNC return sa+da-sa*da;\n"; +static char pshader_exceptional_blend_frag6[] = + "#define DIRECT\n" + "#define BLENDPROG return min(d+s,1.0);\n" + "#define ALPHAFUNC return min(sa+da,1.0);\n"; +static char pshader_exceptional_blend_frag7[] = + "#define DIRECT\n" + "#define BLENDPROG return max(d-s,0.0);\n" + "#define ALPHAFUNC return min(sa+da,1.0);\n"; +static char pshader_exceptional_blend_frag8[] = + "#define BLENDPROG return abs(sa*d-s*da);\n" + "#define ALPHAFUNC return sa+da-sa*da;\n"; +static char pshader_exceptional_blend_frag9[] = + "#define BLENDPROG return sa*(da-d);\n" + "#define ALPHAFUNC return sa+da-sa*da;\n"; +static char pshader_exceptional_blend_frag10[] = + "#define BLENDPROG return d < da/2.0 ? (2.0*s*d) : (sa*da - " + "2.0*(da-d)*(sa-s));\n" + "#define ALPHAFUNC return sa+da-sa*da;\n"; +static char pshader_exceptional_blend_frag11[] = + "#define BLENDPROG return s < sa/2.0 ? (2.0*s*d) : (sa*da - " + "2.0*(da-d)*(sa-s));\n" + "#define ALPHAFUNC return sa+da-sa*da;\n"; +static char pshader_exceptional_blend_frag12[] = + "#define DIRECT\n" + "#define BLENDPROG return d*(1.0-sa);\n" + "#define ALPHAFUNC return (1.0-sa)*da;\n"; +static char pshader_exceptional_blend_frag13[] = + "#define DIRECT\n" + "#define BLENDPROG return d*sa;\n" + "#define ALPHAFUNC return sa*da;\n"; + +#define NUMFRAGMENTS_pshader_exceptional_blend 3 +static char* + pshader_exceptional_blend_arr[13][NUMFRAGMENTS_pshader_exceptional_blend] = + { + { + NULL, + NULL, + NULL, + }, + { + pshader_exceptional_blend_frag0, + pshader_exceptional_blend_frag1, + pshader_exceptional_blend_frag2, + }, + { + pshader_exceptional_blend_frag0, + pshader_exceptional_blend_frag3, + pshader_exceptional_blend_frag2, + }, + { + pshader_exceptional_blend_frag0, + pshader_exceptional_blend_frag4, + pshader_exceptional_blend_frag2, + }, + { + pshader_exceptional_blend_frag0, + pshader_exceptional_blend_frag5, + pshader_exceptional_blend_frag2, + }, + { + pshader_exceptional_blend_frag0, + pshader_exceptional_blend_frag6, + pshader_exceptional_blend_frag2, + }, + { + pshader_exceptional_blend_frag0, + pshader_exceptional_blend_frag7, + pshader_exceptional_blend_frag2, + }, + { + pshader_exceptional_blend_frag0, + pshader_exceptional_blend_frag8, + pshader_exceptional_blend_frag2, + }, + { + pshader_exceptional_blend_frag0, + pshader_exceptional_blend_frag9, + pshader_exceptional_blend_frag2, + }, + { + pshader_exceptional_blend_frag0, + pshader_exceptional_blend_frag10, + pshader_exceptional_blend_frag2, + }, + { + pshader_exceptional_blend_frag0, + pshader_exceptional_blend_frag11, + pshader_exceptional_blend_frag2, + }, + { + pshader_exceptional_blend_frag0, + pshader_exceptional_blend_frag12, + pshader_exceptional_blend_frag2, + }, + { + pshader_exceptional_blend_frag0, + pshader_exceptional_blend_frag13, + pshader_exceptional_blend_frag2, + }, +}; + +static char** pshader_exceptional_blend(int blend_mode) { + return pshader_exceptional_blend_arr[0 + blend_mode * 1]; +} + +static char* pshader_exceptional_blend_vars[] = {"tex0", "tex1", "color_mul", + "color_add", NULL}; + +static char pshader_filter_frag0[] = + "#version 110 // only need 100, but 110 works around a driver " + "issue\n" + "#define MAIN(x) void main()\n" + "#define ARGS\n" + "#define ARGS2\n" + "#define DECLARE_SAMPLER(type, name, reg) uniform type name\n" + "#define DECLARE_CONST(type, name, reg) uniform type name\n" + "#define DECLARE_CONST_EXTRA(type, name, reg) uniform type name\n" + "#define SAMPLER sampler2D\n" + "#define TEX2D texture2D\n" + "#define VEC4 vec4\n" + "#define VEC3 vec3\n" + "#define VEC2 vec2\n" + "#define LOWP\n" + "#define MEDIUMP\n" + "#define HIGHP\n" + "#define TC0 tex_coord.xy\n" + "#define TC1 tex_coord.zw\n" + "#define OUTPUT(x) gl_FragColor = x\n" + "\n" + "#define SHADER_ALPHATEST(x) if (x < 0.5) discard\n" + "\n" + "#define COMMON_PSCONSTANTS DECLARE_CONST(VEC4, color_mul, 0); " + "DECLARE_CONST(VEC4, color_add, 1); DECLARE_CONST(VEC4, focal, 2); varying " + "VEC4 tex_coord\n" + "#define COMMON_PCONSTANTS2 varying VEC4 tex_coord; varying VEC4 " + "color_mul; varying VEC4 color_add; varying VEC4 clip_rect;\n" + "#define BEGIN_CONSTANTS\n" + "#define END_CONSTANTS\n" + "\n" + "#define saturate(x) clamp(x,0.0,1.0)\n"; +static char pshader_filter_frag1[] = ""; +static char pshader_filter_frag2[] = + "#define TEX0\n" + "\n" + "DECLARE_SAMPLER(LOWP SAMPLER, tex0, 0);\n" + "DECLARE_SAMPLER(LOWP SAMPLER, tex1, 1);\n" + "DECLARE_SAMPLER(LOWP SAMPLER, tex2, 2);\n" + "\n" + "COMMON_PSCONSTANTS;\n" + "\n" + "BEGIN_CONSTANTS \n" + " DECLARE_CONST_EXTRA(HIGHP VEC4, clamp0, 4);\n" + " DECLARE_CONST_EXTRA(HIGHP VEC4, clamp1, 5);\n" + " #define CLAMP(a,b) clamp(a.xy, b.xy, b.zw)\n" + "\n" + " DECLARE_CONST_EXTRA(LOWP VEC4, color, 6);\n" + " DECLARE_CONST_EXTRA(LOWP VEC4, color2, 7);\n" + " DECLARE_CONST_EXTRA(MEDIUMP VEC4, tc_off, 8);\n" + "END_CONSTANTS\n" + " \n" + "MAIN((ARGS))\n" + "{\n" + " LOWP VEC4 source;\n" + " source = TEX2D(tex1, CLAMP(TC0.xy,clamp1));\n" + " MEDIUMP float shadow_a = TEX2D(tex0, CLAMP(TC0.xy + " + "tc_off.xy,clamp0)).a;\n" + " \n" + " #ifdef BEVEL\n" + " MEDIUMP float shadow_b = TEX2D(tex0, CLAMP(TC0.xy - " + "tc_off.xy,clamp0)).a;\n" + " shadow_a = (shadow_b - shadow_a) * tc_off.z;\n" + " #ifdef GRADIENT\n" + " shadow_a = clamp(shadow_a*0.5 + 0.5, 0.0, 1.0);\n" + " #else\n" + " shadow_b = clamp(-shadow_a, 0.0, 1.0);\n" + " shadow_a = clamp(shadow_a, 0.0, 1.0);\n" + " #endif\n" + " #else\n" + " #ifdef INNER\n" + " #ifndef GRADIENT\n" + " shadow_a = 1.0-shadow_a;\n" + " #endif // !GRADIENT\n" + " #endif // INNER\n" + " shadow_a = min(shadow_a*tc_off.z,1.0);\n" + " #endif // BEVEL\n" + " \n" + " #ifdef GRADIENT\n" + " MEDIUMP VEC2 gtc = VEC2(shadow_a, 0.5);\n" + " MEDIUMP VEC4 ecolor = TEX2D(tex2, gtc);\n" + " shadow_a = 1.0;\n" + " #else\n" + " #ifdef BEVEL\n" + " MEDIUMP VEC4 ecolor = shadow_b*color + shadow_a*color2;\n" + " shadow_a = 1.0;\n" + " #else\n" + " MEDIUMP VEC4 ecolor = color;\n" + " #endif\n" + " #endif\n" + " \n" + " #ifdef ONTOP\n" + " #ifdef KNOCKOUT\n" + " OUTPUT(ecolor);\n" + " #else\n" + " OUTPUT(ecolor + source * (1.0-ecolor.a));\n" + " #endif\n" + " #else\n" + " \n" + " #ifdef KNOCKOUT\n" + " #ifdef INNER\n" + " // KNOCKOUT & INNER\n" + " OUTPUT(ecolor * source.a * shadow_a);\n" + " #else\n" + " // KNOCKOUT & !INNER\n" + " OUTPUT(ecolor * (1.0-source.a) * shadow_a);\n" + " #endif\n" + " #else // !KNOCKOUT\n" + " \n" + " #ifdef INNER\n" + " // !KNOCKOUT & INNER\n" + " /* this is particularly subtle; effectively computes\n" + " invert shadow\n" + " unpremultiply source\n" + " shadow*color over source.rgb (treat as opaque)\n" + " multiply through by source alpha (i.e. make premultiplied again)\n" + " but expressed without *actually* unpremultiplying\n" + " */\n" + " LOWP VEC4 shadow = ecolor * shadow_a;\n" + " LOWP VEC4 res;\n" + " res.rgb = shadow.rgb*source.a + source.rgb*(1.0-shadow.a);\n" + " res.a = source.a;\n" + " OUTPUT(res);\n" + " #else\n" + " // !KNOCKOUT & !INNER\n" + " LOWP VEC4 shadow = ecolor * shadow_a;\n" + " OUTPUT(shadow * (1.0-source.a) + source);\n" + " #endif // INNER\n" + "\n" + " #endif // KNOCKOUT\n" + " #endif // ONTOP\n" + "}\n"; +static char pshader_filter_frag3[] = "#define KNOCKOUT\n"; +static char pshader_filter_frag4[] = "#define GRADIENT\n"; +static char pshader_filter_frag5[] = "#define INNER\n"; +static char pshader_filter_frag6[] = "#define ONTOP\n"; +static char pshader_filter_frag7[] = "#define BEVEL\n"; + +#define NUMFRAGMENTS_pshader_filter 7 +static char* pshader_filter_arr[32][NUMFRAGMENTS_pshader_filter] = { + { + pshader_filter_frag0, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag3, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag4, + pshader_filter_frag1, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag4, + pshader_filter_frag3, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag5, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag5, + pshader_filter_frag1, + pshader_filter_frag3, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag5, + pshader_filter_frag4, + pshader_filter_frag1, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag5, + pshader_filter_frag4, + pshader_filter_frag3, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag1, + pshader_filter_frag6, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag1, + pshader_filter_frag6, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag3, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag1, + pshader_filter_frag6, + pshader_filter_frag1, + pshader_filter_frag4, + pshader_filter_frag1, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag1, + pshader_filter_frag6, + pshader_filter_frag1, + pshader_filter_frag4, + pshader_filter_frag3, + pshader_filter_frag2, + }, + { + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + }, + { + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + }, + { + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + }, + { + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + }, + { + pshader_filter_frag0, + pshader_filter_frag7, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag7, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag3, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag7, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag4, + pshader_filter_frag1, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag7, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag4, + pshader_filter_frag3, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag7, + pshader_filter_frag1, + pshader_filter_frag5, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag7, + pshader_filter_frag1, + pshader_filter_frag5, + pshader_filter_frag1, + pshader_filter_frag3, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag7, + pshader_filter_frag1, + pshader_filter_frag5, + pshader_filter_frag4, + pshader_filter_frag1, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag7, + pshader_filter_frag1, + pshader_filter_frag5, + pshader_filter_frag4, + pshader_filter_frag3, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag7, + pshader_filter_frag6, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag7, + pshader_filter_frag6, + pshader_filter_frag1, + pshader_filter_frag1, + pshader_filter_frag3, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag7, + pshader_filter_frag6, + pshader_filter_frag1, + pshader_filter_frag4, + pshader_filter_frag1, + pshader_filter_frag2, + }, + { + pshader_filter_frag0, + pshader_filter_frag7, + pshader_filter_frag6, + pshader_filter_frag1, + pshader_filter_frag4, + pshader_filter_frag3, + pshader_filter_frag2, + }, + { + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + }, + { + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + }, + { + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + }, + { + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + }, +}; + +static char** pshader_filter(int bevel, int ontop, int inner, int gradient, + int knockout) { + return pshader_filter_arr[0 + bevel * 16 + ontop * 8 + inner * 4 + + gradient * 2 + knockout * 1]; +} + +static char* pshader_filter_vars[] = {"tex0", "tex1", "color", + "tc_off", "tex2", "clamp0", + "clamp1", "color2", NULL}; + +static char pshader_blur_frag0[] = + "#version 110 // only need 100, but 110 works around a driver " + "issue\n" + "#define MAIN(x) void main()\n" + "#define ARGS\n" + "#define ARGS2\n" + "#define DECLARE_SAMPLER(type, name, reg) uniform type name\n" + "#define DECLARE_CONST(type, name, reg) uniform type name\n" + "#define DECLARE_CONST_EXTRA(type, name, reg) uniform type name\n" + "#define SAMPLER sampler2D\n" + "#define TEX2D texture2D\n" + "#define VEC4 vec4\n" + "#define VEC3 vec3\n" + "#define VEC2 vec2\n" + "#define LOWP\n" + "#define MEDIUMP\n" + "#define HIGHP\n" + "#define TC0 tex_coord.xy\n" + "#define TC1 tex_coord.zw\n" + "#define OUTPUT(x) gl_FragColor = x\n" + "\n" + "#define SHADER_ALPHATEST(x) if (x < 0.5) discard\n" + "\n" + "#define COMMON_PSCONSTANTS DECLARE_CONST(VEC4, color_mul, 0); " + "DECLARE_CONST(VEC4, color_add, 1); DECLARE_CONST(VEC4, focal, 2); varying " + "VEC4 tex_coord\n" + "#define COMMON_PCONSTANTS2 varying VEC4 tex_coord; varying VEC4 " + "color_mul; varying VEC4 color_add; varying VEC4 clip_rect;\n" + "#define BEGIN_CONSTANTS\n" + "#define END_CONSTANTS\n" + "\n" + "#define saturate(x) clamp(x,0.0,1.0)\n"; +static char pshader_blur_frag1[] = "#define TAPS TAP(0); TAP(1);\n"; +static char pshader_blur_frag2[] = + "#define TEX0\n" + "\n" + "DECLARE_SAMPLER(LOWP SAMPLER, tex0, 0);\n" + "COMMON_PSCONSTANTS;\n" + "\n" + "BEGIN_CONSTANTS\n" + " DECLARE_CONST_EXTRA(HIGHP VEC4, clampv, 4);\n" + " #define CLAMP(t) clamp(t, clampv.xy, clampv.zw)\n" + " \n" + " DECLARE_CONST_EXTRA(MEDIUMP VEC4, tap[9], 5);\n" + "END_CONSTANTS\n" + "\n" + "MAIN((ARGS))\n" + "{\n" + " MEDIUMP VEC4 s = VEC4(0,0,0,0);\n" + " \n" + " #define TAP(i) s += TEX2D(tex0, CLAMP(TC0.xy + tap[i].xy)) * " + "tap[i].z\n" + " TAPS\n" + " \n" + " OUTPUT(s);\n" + "}\n"; +static char pshader_blur_frag3[] = "#define TAPS TAP(0); TAP(1); TAP(2);\n"; +static char pshader_blur_frag4[] = + "#define TAPS TAP(0); TAP(1); TAP(2); TAP(3);\n"; +static char pshader_blur_frag5[] = + "#define TAPS TAP(0); TAP(1); TAP(2); TAP(3); TAP(4);\n"; +static char pshader_blur_frag6[] = + "#define TAPS TAP(0); TAP(1); TAP(2); TAP(3); TAP(4); TAP(5);\n"; +static char pshader_blur_frag7[] = + "#define TAPS TAP(0); TAP(1); TAP(2); TAP(3); TAP(4); TAP(5); TAP(6);\n"; +static char pshader_blur_frag8[] = + "#define TAPS TAP(0); TAP(1); TAP(2); TAP(3); TAP(4); TAP(5); TAP(6); " + "TAP(7);\n"; +static char pshader_blur_frag9[] = + "#define TAPS TAP(0); TAP(1); TAP(2); TAP(3); TAP(4); TAP(5); TAP(6); " + "TAP(7); TAP(8);\n"; + +#define NUMFRAGMENTS_pshader_blur 3 +static char* pshader_blur_arr[10][NUMFRAGMENTS_pshader_blur] = { + { + NULL, + NULL, + NULL, + }, + { + NULL, + NULL, + NULL, + }, + { + pshader_blur_frag0, + pshader_blur_frag1, + pshader_blur_frag2, + }, + { + pshader_blur_frag0, + pshader_blur_frag3, + pshader_blur_frag2, + }, + { + pshader_blur_frag0, + pshader_blur_frag4, + pshader_blur_frag2, + }, + { + pshader_blur_frag0, + pshader_blur_frag5, + pshader_blur_frag2, + }, + { + pshader_blur_frag0, + pshader_blur_frag6, + pshader_blur_frag2, + }, + { + pshader_blur_frag0, + pshader_blur_frag7, + pshader_blur_frag2, + }, + { + pshader_blur_frag0, + pshader_blur_frag8, + pshader_blur_frag2, + }, + { + pshader_blur_frag0, + pshader_blur_frag9, + pshader_blur_frag2, + }, +}; + +static char** pshader_blur(int numtaps) { + return pshader_blur_arr[0 + numtaps * 1]; +} + +static char* pshader_blur_vars[] = {"tex0", "tap", "clampv", NULL}; + +static char pshader_color_matrix_frag0[] = + "#version 110 // only need 100, but 110 works around a driver " + "issue\n" + "#define MAIN(x) void main()\n" + "#define ARGS\n" + "#define ARGS2\n" + "#define DECLARE_SAMPLER(type, name, reg) uniform type name\n" + "#define DECLARE_CONST(type, name, reg) uniform type name\n" + "#define DECLARE_CONST_EXTRA(type, name, reg) uniform type name\n" + "#define SAMPLER sampler2D\n" + "#define TEX2D texture2D\n" + "#define VEC4 vec4\n" + "#define VEC3 vec3\n" + "#define VEC2 vec2\n" + "#define LOWP\n" + "#define MEDIUMP\n" + "#define HIGHP\n" + "#define TC0 tex_coord.xy\n" + "#define TC1 tex_coord.zw\n" + "#define OUTPUT(x) gl_FragColor = x\n" + "\n" + "#define SHADER_ALPHATEST(x) if (x < 0.5) discard\n" + "\n" + "#define COMMON_PSCONSTANTS DECLARE_CONST(VEC4, color_mul, 0); " + "DECLARE_CONST(VEC4, color_add, 1); DECLARE_CONST(VEC4, focal, 2); varying " + "VEC4 tex_coord\n" + "#define COMMON_PCONSTANTS2 varying VEC4 tex_coord; varying VEC4 " + "color_mul; varying VEC4 color_add; varying VEC4 clip_rect;\n" + "#define BEGIN_CONSTANTS\n" + "#define END_CONSTANTS\n" + "\n" + "#define saturate(x) clamp(x,0.0,1.0)\n"; +static char pshader_color_matrix_frag1[] = + "#define TEX0\n" + "\n" + "DECLARE_SAMPLER(LOWP SAMPLER, tex0, 0);\n" + "COMMON_PSCONSTANTS;\n" + "\n" + "BEGIN_CONSTANTS\n" + " DECLARE_CONST_EXTRA(MEDIUMP VEC4, data[5], 4);\n" + "END_CONSTANTS\n" + "\n" + "MAIN((ARGS))\n" + "{\n" + " MEDIUMP VEC4 t,color;\n" + " t = TEX2D(tex0, TC0.xy);\n" + " \n" + "#ifndef COLORMATRIX_HAS_ALPHA_EFFECTS\n" + " // version of colormatrix assuming no additive alpha in matrix,\n" + " // which is all CS3 seems to be able to output\n" + " color.r = dot(data[0], t) + data[4].r*t.a;\n" + " color.g = dot(data[1], t) + data[4].g*t.a;\n" + " color.b = dot(data[2], t) + data[4].b*t.a;\n" + " color.a = data[3].a * t.a;\n" + " color.rgb = color.rgb * data[3].a;\n" + "#else\n" + " // version of colormatrix that matches spec \n" + " if (t.a == 0.0)\n" + " t.rgb = VEC3(0.0);\n" + " else\n" + " t.rgb /= t.a;\n" + " color.r = dot(data[0], t) + data[4].r;\n" + " color.g = dot(data[1], t) + data[4].g;\n" + " color.b = dot(data[2], t) + data[4].b;\n" + " color.a = dot(data[3], t) + data[4].a;\n" + " color.rgb = color.rgb * color.a;\n" + "#endif\n" + " OUTPUT(color);\n" + "}\n"; + +#define NUMFRAGMENTS_pshader_color_matrix 2 +static char* pshader_color_matrix_arr[1][NUMFRAGMENTS_pshader_color_matrix] = { + { + pshader_color_matrix_frag0, + pshader_color_matrix_frag1, + }, +}; + +static char** pshader_color_matrix(void) { return pshader_color_matrix_arr[0]; } + +static char* pshader_color_matrix_vars[] = {"tex0", "data", NULL}; + +static char pshader_manual_clear_frag0[] = + "#version 110 // only need 100, but 110 works around a driver " + "issue\n" + "#define MAIN(x) void main()\n" + "#define ARGS\n" + "#define ARGS2\n" + "#define DECLARE_SAMPLER(type, name, reg) uniform type name\n" + "#define DECLARE_CONST(type, name, reg) uniform type name\n" + "#define DECLARE_CONST_EXTRA(type, name, reg) uniform type name\n" + "#define SAMPLER sampler2D\n" + "#define TEX2D texture2D\n" + "#define VEC4 vec4\n" + "#define VEC3 vec3\n" + "#define VEC2 vec2\n" + "#define LOWP\n" + "#define MEDIUMP\n" + "#define HIGHP\n" + "#define TC0 tex_coord.xy\n" + "#define TC1 tex_coord.zw\n" + "#define OUTPUT(x) gl_FragColor = x\n" + "\n" + "#define SHADER_ALPHATEST(x) if (x < 0.5) discard\n" + "\n" + "#define COMMON_PSCONSTANTS DECLARE_CONST(VEC4, color_mul, 0); " + "DECLARE_CONST(VEC4, color_add, 1); DECLARE_CONST(VEC4, focal, 2); varying " + "VEC4 tex_coord\n" + "#define COMMON_PCONSTANTS2 varying VEC4 tex_coord; varying VEC4 " + "color_mul; varying VEC4 color_add; varying VEC4 clip_rect;\n" + "#define BEGIN_CONSTANTS\n" + "#define END_CONSTANTS\n" + "\n" + "#define saturate(x) clamp(x,0.0,1.0)\n"; +static char pshader_manual_clear_frag1[] = + "COMMON_PSCONSTANTS;\n" + "\n" + "MAIN((ARGS))\n" + "{\n" + " OUTPUT(color_mul);\n" + "}\n"; + +#define NUMFRAGMENTS_pshader_manual_clear 2 +static char* pshader_manual_clear_arr[1][NUMFRAGMENTS_pshader_manual_clear] = { + { + pshader_manual_clear_frag0, + pshader_manual_clear_frag1, + }, +}; + +static char** pshader_manual_clear(void) { return pshader_manual_clear_arr[0]; } + +static char* pshader_manual_clear_vars[] = {"color_mul", NULL}; + +static char vshader_vsgl_frag0[] = + "#version 110 // only need 100, but 110 works around a driver " + "issue\n" + "#define MAIN(x) void main()\n" + "#define ARGS\n" + "#define ARGS2\n" + "#define DECLARE_SAMPLER(type, name, reg) uniform type name\n" + "#define DECLARE_CONST(type, name, reg) uniform type name\n" + "#define DECLARE_CONST_EXTRA(type, name, reg) uniform type name\n" + "#define SAMPLER sampler2D\n" + "#define TEX2D texture2D\n" + "#define VEC4 vec4\n" + "#define VEC3 vec3\n" + "#define VEC2 vec2\n" + "#define LOWP\n" + "#define MEDIUMP\n" + "#define HIGHP\n" + "#define TC0 tex_coord.xy\n" + "#define TC1 tex_coord.zw\n" + "#define OUTPUT(x) gl_FragColor = x\n" + "\n" + "#define SHADER_ALPHATEST(x) if (x < 0.5) discard\n" + "\n" + "#define COMMON_PSCONSTANTS DECLARE_CONST(VEC4, color_mul, 0); " + "DECLARE_CONST(VEC4, color_add, 1); DECLARE_CONST(VEC4, focal, 2); varying " + "VEC4 tex_coord\n" + "#define COMMON_PCONSTANTS2 varying VEC4 tex_coord; varying VEC4 " + "color_mul; varying VEC4 color_add; varying VEC4 clip_rect;\n" + "#define BEGIN_CONSTANTS\n" + "#define END_CONSTANTS\n" + "\n" + "#define saturate(x) clamp(x,0.0,1.0)\n"; +static char vshader_vsgl_frag1[] = "#define FORMAT_V2\n"; +static char vshader_vsgl_frag2[] = + " uniform vec4 world0;\n" + " uniform vec4 world1;\n" + " uniform vec4 x_off;\n" + " uniform vec4 texgen_s;\n" + " uniform vec4 texgen_t;\n" + "#ifdef FLASH_10\n" + " uniform vec4 x3d;\n" + " uniform vec4 y3d;\n" + " uniform vec4 w3d;\n" + "#else\n" + " uniform vec4 viewproj;\n" + "#endif\n" + "\n" + " attribute vec4 position;\n" + " attribute vec4 in_attr;\n" + "\n" + " varying vec4 tex_coord;\n" + " \n" + " void main() {\n" + " // world transform\n" + " HIGHP vec4 wpos = vec4(dot(world0, position), dot(world1, " + "position), world0.z, 1.0);\n" + " \n" + " // texture coordinates\n" + " tex_coord = vec4(dot(texgen_s, position), dot(texgen_t, " + "position), 1.0, 0.0);\n" + " #ifdef FORMAT_V2TC2\n" + " tex_coord.xy = in_attr.xy;\n" + " #endif\n" + " \n" + " // antialias processing\n" + " #ifdef FORMAT_V2C4\n" + " HIGHP vec4 q,p;\n" + " HIGHP float len,newlen;\n" + " q.xy = in_attr.yz / 64.0;\n" + " len = length(q.xy);\n" + " p.x = q.x*x_off.x + q.y*x_off.y;\n" + " p.y = q.x*x_off.z + q.y*x_off.w;\n" + " p.z = 0.0; p.w = 0.0;\n" + " p.xy = vec2(dot(world0, p), dot(world1, p));\n" + " newlen = length(p.xy);\n" + " p *= (newlen!=0.0) ? len / newlen : 0.0;\n" + " wpos.xy += p.xy;\n" + " tex_coord.z = in_attr.x / 32.0;\n" + " #endif\n" + " \n" + " // view/projection transform\n" + " gl_Position = vec4(wpos.xy * viewproj.xy + viewproj.zw, " + "wpos.zw);\n" + "\n" + " #ifdef FLASH_10\n" + " gl_Position = wpos.x * x3d + wpos.y * y3d + w3d; // z is " + "ignored!\n" + " gl_Position.w = gl_Position.z;\n" + " gl_Position.z = wpos.z * gl_Position.w;\n" + " #endif\n" + " } \n"; +static char vshader_vsgl_frag3[] = "#define FORMAT_V2C4\n"; +static char vshader_vsgl_frag4[] = "#define FORMAT_V2TC2\n"; + +#define NUMFRAGMENTS_vshader_vsgl 3 +static char* vshader_vsgl_arr[3][NUMFRAGMENTS_vshader_vsgl] = { + { + vshader_vsgl_frag0, + vshader_vsgl_frag1, + vshader_vsgl_frag2, + }, + { + vshader_vsgl_frag0, + vshader_vsgl_frag3, + vshader_vsgl_frag2, + }, + { + vshader_vsgl_frag0, + vshader_vsgl_frag4, + vshader_vsgl_frag2, + }, +}; + +static char** vshader_vsgl(int vformat) { + return vshader_vsgl_arr[0 + vformat * 1]; +} + +static char* vshader_vsgl_vars[] = {"world0", "world1", "x_off", "texgen_s", + "texgen_t", "viewproj", NULL}; + +static char vshader_vsglihud_frag0[] = + "#version 110 // only need 100, but 110 works around a driver " + "issue\n" + "#define MAIN(x) void main()\n" + "#define ARGS\n" + "#define ARGS2\n" + "#define DECLARE_SAMPLER(type, name, reg) uniform type name\n" + "#define DECLARE_CONST(type, name, reg) uniform type name\n" + "#define DECLARE_CONST_EXTRA(type, name, reg) uniform type name\n" + "#define SAMPLER sampler2D\n" + "#define TEX2D texture2D\n" + "#define VEC4 vec4\n" + "#define VEC3 vec3\n" + "#define VEC2 vec2\n" + "#define LOWP\n" + "#define MEDIUMP\n" + "#define HIGHP\n" + "#define TC0 tex_coord.xy\n" + "#define TC1 tex_coord.zw\n" + "#define OUTPUT(x) gl_FragColor = x\n" + "\n" + "#define SHADER_ALPHATEST(x) if (x < 0.5) discard\n" + "\n" + "#define COMMON_PSCONSTANTS DECLARE_CONST(VEC4, color_mul, 0); " + "DECLARE_CONST(VEC4, color_add, 1); DECLARE_CONST(VEC4, focal, 2); varying " + "VEC4 tex_coord\n" + "#define COMMON_PCONSTANTS2 varying VEC4 tex_coord; varying VEC4 " + "color_mul; varying VEC4 color_add; varying VEC4 clip_rect;\n" + "#define BEGIN_CONSTANTS\n" + "#define END_CONSTANTS\n" + "\n" + "#define saturate(x) clamp(x,0.0,1.0)\n"; +static char vshader_vsglihud_frag1[] = + "uniform vec4 worldview[2];\n" + "uniform vec4 material[96];\n" + "uniform float textmode;\n" + "\n" + "#define pixels_per_panel_unit 1.0\n" + "\n" + "attribute vec2 position;\n" + "attribute vec2 texcoord;\n" + "attribute vec4 material_index; \n" + "\n" + "varying vec4 tex_coord;\n" + "varying vec4 color_mul;\n" + "varying vec4 color_add;\n" + "varying vec4 clip_rect;\n" + "\n" + "void main() {\n" + " // view/projection transform\n" + " gl_Position = vec4(worldview[0].w + dot(worldview[0].xy, position),\n" + " worldview[1].w + dot(worldview[1].xy, position),\n" + " 0.0,\n" + " 1.0);\n" + "\n" + " LOWP VEC4 c1_mul,c1_add,c2_mul,c2_add, c_mul,c_add;\n" + " HIGHP VEC4 clip;\n" + "\n" + " // convert 8-bit material_info loaded as float back to integers\n" + " LOWP VEC3 mat = floor(255.0*material_index.xyz + 0.5);\n" + "\n" + " // @TODO: flatten these into a single array\n" + " c1_mul = material[int(mat.r )];\n" + " c1_add = material[int(mat.r+1.0)];\n" + " c2_mul = material[int(mat.g )];\n" + " c2_add = material[int(mat.g+1.0)];\n" + " clip = material[int(mat.b )];\n" + "\n" + " // if textmode is 0, suppress c2_add.rgba\n" + "\n" + " // combine hierarchical and local colors\n" + " color_add.rgb = c1_mul.rgb * (c2_add.rgb * textmode) + c1_add.rgb;\n" + " color_mul = c1_mul * c2_mul;\n" + " color_mul.a *= material_index.w;\n" + "\n" + " // compute premultiplied alpha\n" + " color_mul.rgb *= color_mul.a;\n" + " color_add.rgb *= color_mul.a;\n" + "\n" + " // pass additive blending flag stored in c1_add.a and c2_add.a to " + "pixel shader\n" + " color_add.a = clamp(c1_add.a + c2_add.a * textmode,0.0,1.0);\n" + "\n" + " // except actually we'll premultiply that into color_mul.a\n" + " color_mul.a *= (1.0-color_add.a);\n" + "\n" + " // compute premultiplied cliprect\n" + " // for now cliprect comes in as x0,y0,x1,y1, not center/offset\n" + " \n" + " // coordinates come in already rotated into panel space, which is " + "also where cliprect is defined\n" + " HIGHP VEC2 center = (clip.xy + clip.zw) / 2.0;\n" + " HIGHP VEC2 offset = (clip.zw - clip.xy) / 2.0;\n" + "\n" + " // use of pixels_per_panel_unit here ignores effect of non-uniform " + "scaling\n" + " clip_rect.xy = center * pixels_per_panel_unit;\n" + " clip_rect.zw = offset * pixels_per_panel_unit + 1.0; // offset is " + "location where alpha goes to 0, so it's 1 pixel out\n" + "\n" + " tex_coord.zw = position * pixels_per_panel_unit;\n" + " tex_coord.xy = texcoord;\n" + "} \n"; + +#define NUMFRAGMENTS_vshader_vsglihud 2 +static char* vshader_vsglihud_arr[1][NUMFRAGMENTS_vshader_vsglihud] = { + { + vshader_vsglihud_frag0, + vshader_vsglihud_frag1, + }, +}; + +static char** vshader_vsglihud(void) { return vshader_vsglihud_arr[0]; } + +static char* vshader_vsglihud_vars[] = {"worldview", "material", "textmode", + NULL}; \ No newline at end of file diff --git a/targets/app/common/Iggy/gdraw/gdraw_gl_shared.inl b/targets/app/common/Iggy/gdraw/gdraw_gl_shared.inl new file mode 100644 index 000000000..e08fd755b --- /dev/null +++ b/targets/app/common/Iggy/gdraw/gdraw_gl_shared.inl @@ -0,0 +1,2695 @@ +// gdraw_gl_shared.inl - copyright 2012 RAD Game Tools +// +// This file implements the part of the Iggy graphics driver layer shared +// between GL and GL ES 2 (which is most of it). It heavily depends on a bunch +// of typedefs, #defines and some utility functions that need to be set up +// correctly for the GL version being targeted. It also targets a kind of +// pseudo-GL 2.0; the platform implementation has to set up some #defines and +// perform extra initialization work if we go through extensions instead. This +// is all a bit ugly, but much easier to maintain than the original solution, +// where we just kept two almost identical versions of this code. + +///////////////////////////////////////////////////////////// +// +// common code shared by all GDraw implemetations +// + +// The native handle type holds resource handles and a coarse description. +typedef union { + // handle that is a texture + struct { + GLuint gl; + GLuint gl_renderbuf; + U32 w : 24; + U32 nonpow2 : 8; + U32 h : 24; + U32 reserved : 8; + } tex; + + // handle that is a vertex buffer + struct { + GLuint base; + GLuint indices; + } vbuf; +} GDrawNativeHandle; + +#include "gdraw_shared.inl" + +// max rendertarget stack depth. this depends on the extent to which you +// use filters and non-standard blend modes, and how nested they are. +#define MAX_RENDER_STACK_DEPTH \ + 8 // Iggy is hardcoded to a limit of 16... probably 1-3 is realistic +#define AATEX_SAMPLER 3 // sampler that aa_tex gets set in +#define QUAD_IB_COUNT 4096 // quad index buffer has indices for this many quads + +#define ASSERT_COUNT(a, b) ((a) == (b) ? (b) : -1) + +/////////////////////////////////////////////////////////////////////////////// +// +// debugging/validation +// + +static RADINLINE void break_on_err(GLint e) { +#ifdef _DEBUG + if (e) { + RR_BREAK(); + } +#endif +} + +#ifndef GDRAW_PLATFORM_REPORT_GL_SITE +#define GDRAW_PLATFORM_REPORT_GL_SITE(site) ((void)0) +#endif + +static void report_err(GLint e) { + break_on_err(e); + IggyGDrawSendWarning(NULL, "OpenGL glGetError error"); +} + +static void compilation_err(const char* msg) { + error_msg_platform_specific(msg); + report_err(GL_INVALID_VALUE); +} + +static void eat_gl_err(void) { while (glGetError() != GL_NO_ERROR); } + +static void opengl_check_site(const char* site); + +static void opengl_check(void) { opengl_check_site(NULL); } + +static void opengl_check_site(const char* site) { +#ifdef _DEBUG + GLint e = glGetError(); + if (e != GL_NO_ERROR) { + GDRAW_PLATFORM_REPORT_GL_SITE(site); + report_err(e); + eat_gl_err(); + } +#else + (void)site; +#endif +} + +#ifndef OPENGL_CHECK_SITE +#define OPENGL_CHECK_SITE(site) opengl_check_site(site) +#endif + +static U32 is_pow2(S32 n) { return ((U32)n & (U32)(n - 1)) == 0; } + +/////////////////////////////////////////////////////////////////////////////// +// +// GDraw +// +// This data structure stores all the data for the GDraw, just to keep +// it a bit cleaner instead of storing in globals, even though GDraw is +// a singleton. + +// fragment and vertex program + +// The mac doesn't use extensions for the functions dealing with programs, and +// the non-extension versions take GLuint instead of GLhandle. The mac defines +// GDrawGLProgram to GLuint before including gdraw_gl_shared.inl to account for +// this. +#ifndef GDrawGLProgram +#define GDrawGLProgram GLhandle +#endif + +typedef struct ProgramWithCachedVariableLocations { + GDrawGLProgram program; + GLint vars[2][MAX_VARS]; +} ProgramWithCachedVariableLocations; + +// render-stack state +typedef struct { + GDrawHandle* color_buffer; + GDrawHandle* stencil_depth; + S32 base_x, base_y, width, height; + rrbool cached; +} GDrawFramebufferState; + +// texture format description +typedef struct { + U8 iggyfmt; // IFT_FORMAT_* + U8 blkx, blky; // compressed block size in pixels (for compressed formats) + U8 blkbytes; // block bytes + GLenum intfmt; // GL internal format + GLenum fmt; // GL_TEXTURE_COMPRESSED for compressed formats! + GLenum type; +} TextureFormatDesc; + +static GDrawFunctions gdraw_funcs; + +/////////////////////////////////////////////////////////////////////////////// +// +// GDraw data structure +// +// +// This is the primary rendering abstraction, which hides all +// the platform-specific rendering behavior from /G/. It is +// full of platform-specific graphics state, and also general +// graphics state so that it doesn't have to callback into /G/ +// to get at that graphics state. + +static struct { + S32 multisampling; // number of samples if multisampling (always 0 if no + // GDRAW_MULTISAMPLING) + + S32 vx, vy; // viewport width/height in pixels + S32 fw, fh; // full width/height of bound rendertarget + S32 tw, th; // actual width/height of current tile + S32 tpw, tph; // width/height of padded version of tile + + // tile origin location (without and with padding) + rrbool tile_enabled; + S32 tx0, ty0; + S32 tx0p, ty0p; + + // if we're in the middle of rendering a blur, certain viewport-related + // functions have to behave differently, so they check this flag + rrbool in_blur; + + F32 projection[4]; // scalex, scaley, transx, transy + + // conversion from worldspace to viewspace <0,0>.. -- no translation or + // rotation + F32 world_to_pixel[2]; + + // 3d transformation + F32 xform_3d[3][4]; + rrbool use_3d; + + // render-state stack for 'temporary' rendering + GDrawFramebufferState frame[MAX_RENDER_STACK_DEPTH]; + GDrawFramebufferState* cur; + + // texture and vertex buffer pools + GDrawHandleCache* texturecache; + GDrawHandleCache* vbufcache; + + // GL_EXT_separate_shader_objects isn't sufficiently standard, + // so we have to bind every vertex shader to every fragment shader + + // raw vertex shaders + GLuint vert[GDRAW_vformat__count]; + + // fragment shaders with vertex shaders + ProgramWithCachedVariableLocations + fprog[GDRAW_TEXTURE__count][3][3]; // [tex0mode][additive][vformat] + ProgramWithCachedVariableLocations ihud[2]; + + // fragment shaders with fixed-function + ProgramWithCachedVariableLocations + exceptional_blend[GDRAW_BLENDSPECIAL__count]; + ProgramWithCachedVariableLocations filter_prog[2][16]; + ProgramWithCachedVariableLocations blur_prog[MAX_TAPS + 1]; + ProgramWithCachedVariableLocations colormatrix; + ProgramWithCachedVariableLocations manual_clear; + + // render targets + + // these two lines must be adjacent because of how rendertargets works + GDrawHandleCache rendertargets; + GDrawHandle + rendertarget_handles[MAX_RENDER_STACK_DEPTH]; // not -1, because we use + // +1 to initialize + + gswf_recti rt_valid[MAX_RENDER_STACK_DEPTH + 1]; + GDrawHandle stencil_depth; + + // size of our render targets + S32 frametex_width, frametex_height; + + // framebuffer object used for render-to-texture + GLuint framebuffer_stack_object; + + // framebuffer object used to copy from MSAA renderbuffer to texture + GLuint framebuffer_copy_to_texture; + + // framebuffer object used for main screen (set to non-0 to do render to + // texture) + GLuint main_framebuffer; + + // antialias texture + GLuint aa_tex; + + // canned quad indices + GLuint quad_ib; + + // texture formats + const TextureFormatDesc* tex_formats; + + // caps + U32 has_conditional_non_power_of_two + : 1; // non-power-of-2 supported, but only CLAMP_TO_EDGE and can't have + // mipmaps + U32 has_packed_depth_stencil : 1; + U32 has_depth24 : 1; + U32 has_mapbuffer : 1; + U32 has_texture_max_level : 1; + + // fake fence tracking for thrashing detection + U64 frame_counter; +}* gdraw; + +//////////////////////////////////////////////////////////////////////// +// +// General resource management for both textures and vertex buffers +// + +// make a texture with reasonable default state +static void make_texture(GLuint tex) { + glBindTexture(GL_TEXTURE_2D, tex); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); +} + +static void make_rendertarget(GDrawHandle* t, GLuint tex, GLenum int_type, + GLenum ext_type, GLenum data_type, S32 w, S32 h, + S32 size) { + glBindTexture(GL_TEXTURE_2D, tex); + glTexImage2D(GL_TEXTURE_2D, 0, int_type, w, h, 0, ext_type, data_type, + NULL); + make_texture(tex); + glBindTexture(GL_TEXTURE_2D, 0); +} + +static void api_free_resource(GDrawHandle* r) { + if (r->state == GDRAW_HANDLE_STATE_user_owned) return; + + if (!r->cache->is_vertex) { + glDeleteTextures(1, &r->handle.tex.gl); + if (r->handle.tex.gl_renderbuf && + r->handle.tex.gl_renderbuf != r->handle.tex.gl) + glDeleteRenderbuffers(1, &r->handle.tex.gl_renderbuf); + } else { + glDeleteBuffers(1, &r->handle.vbuf.base); + glDeleteBuffers(1, &r->handle.vbuf.indices); + } + opengl_check(); +} + +static void RADLINK gdraw_UnlockHandles(GDrawStats* gstats) { + // since we're not using fences for this implementation, move all textures + // off the active list if you're using fences, this is when the fence needs + // to actually occur + gdraw_HandleCacheUnlockAll(gdraw->texturecache); + gdraw_HandleCacheUnlockAll(gdraw->vbufcache); +} + +//////////////////////////////////////////////////////////////////////// +// +// Texture creation/updating +// + +extern GDrawTexture* gdraw_GLx_(WrappedTextureCreate)(S32 gl_texture_handle, + S32 width, S32 height, + rrbool has_mipmaps) { + GDrawStats stats = {0}; + GDrawHandle* p = gdraw_res_alloc_begin( + gdraw->texturecache, 0, + &stats); // it may need to free one item to give us a handle + GLint old; + + glGetIntegerv(GL_TEXTURE_BINDING_2D, &old); + glBindTexture(GL_TEXTURE_2D, gl_texture_handle); + if (has_mipmaps) + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, + GL_LINEAR_MIPMAP_LINEAR); + else + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glBindTexture(GL_TEXTURE_2D, old); + + p->bytes = 0; + p->handle.tex.gl = gl_texture_handle; + p->handle.tex.w = width; + p->handle.tex.h = height; + p->handle.tex.nonpow2 = !(is_pow2(width) && is_pow2(height)); + gdraw_HandleCacheAllocateEnd(p, 0, NULL, GDRAW_HANDLE_STATE_user_owned); + return (GDrawTexture*)p; +} + +extern void gdraw_GLx_(WrappedTextureChange)(GDrawTexture* tex, + S32 new_gl_texture_handle, + S32 new_width, S32 new_height, + rrbool has_mipmaps) { + GDrawHandle* p = (GDrawHandle*)tex; + GLint old; + + glGetIntegerv(GL_TEXTURE_BINDING_2D, &old); + glBindTexture(GL_TEXTURE_2D, new_gl_texture_handle); + if (has_mipmaps) + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, + GL_LINEAR_MIPMAP_LINEAR); + else + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glBindTexture(GL_TEXTURE_2D, old); + + p->handle.tex.gl = new_gl_texture_handle; + p->handle.tex.w = new_width; + p->handle.tex.h = new_height; + p->handle.tex.nonpow2 = !(is_pow2(new_width) && is_pow2(new_height)); +} + +extern void gdraw_GLx_(WrappedTextureDestroy)(GDrawTexture* tex) { + GDrawStats stats = {0}; + gdraw_res_free((GDrawHandle*)tex, &stats); +} + +static void RADLINK gdraw_SetTextureUniqueID(GDrawTexture* tex, void* old_id, + void* new_id) { + GDrawHandle* p = (GDrawHandle*)tex; + // if this is still the handle it's thought to be, change the owner; + // if the owner *doesn't* match, then they're changing a stale handle, so + // ignore + if (p->owner == old_id) p->owner = new_id; +} + +static rrbool RADLINK gdraw_MakeTextureBegin( + void* owner, S32 width, S32 height, gdraw_texture_format format, U32 flags, + GDraw_MakeTexture_ProcessingInfo* p, GDrawStats* gstats) { + S32 size = 0, asize, stride; + GDrawHandle* t = NULL; + opengl_check(); + + stride = width; + if (format == GDRAW_TEXTURE_FORMAT_rgba32) stride *= 4; + size = stride * height; + + asize = size; + if (flags & GDRAW_MAKETEXTURE_FLAGS_mipmap) asize = asize * 4 / 3; + + t = gdraw_res_alloc_begin(gdraw->texturecache, asize, gstats); + if (!t) return IGGY_RESULT_Error_GDraw; + + glGenTextures(1, &t->handle.tex.gl); + + p->texture_data = IggyGDrawMalloc(size); + if (!p->texture_data) { + gdraw_HandleCacheAllocateFail(t); + IggyGDrawSendWarning(NULL, "GDraw malloc for texture data failed"); + return false; + } + + t->handle.tex.w = width; + t->handle.tex.h = height; + t->handle.tex.nonpow2 = !(is_pow2(width) && is_pow2(height)); + + p->num_rows = height; + p->p0 = t; + p->p1 = owner; + p->stride_in_bytes = stride; + p->texture_type = GDRAW_TEXTURE_TYPE_rgba; + p->i0 = format; + p->i1 = flags; + p->i2 = width; + p->i3 = height; + p->i4 = asize; + opengl_check(); + return true; +} + +static GDrawTexture* RADLINK +gdraw_MakeTextureEnd(GDraw_MakeTexture_ProcessingInfo* p, GDrawStats* stats) { + gdraw_texture_format format = (gdraw_texture_format)p->i0; + S32 flags = p->i1; + rrbool mipmap = (flags & GDRAW_MAKETEXTURE_FLAGS_mipmap) != 0; + S32 width = p->i2, height = p->i3; + GLuint z, e; + GDrawHandle* t = (GDrawHandle*)p->p0; + + z = t->handle.tex.gl; + assert(z != 0); + + make_texture(z); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + + if (format == GDRAW_TEXTURE_FORMAT_font) + glTexImage2D(GL_TEXTURE_2D, 0, GL_ALPHA, width, height, 0, GL_ALPHA, + GL_UNSIGNED_BYTE, p->texture_data); + else + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, + GL_UNSIGNED_BYTE, p->texture_data); + e = glGetError(); + break_on_err(e); + + if (mipmap) glGenerateMipmap(GL_TEXTURE_2D); + if (!e) e = glGetError(); + + if (e != 0) { + gdraw_HandleCacheAllocateFail(t); + IggyGDrawSendWarning(NULL, "GDraw OpenGL error creating texture"); + eat_gl_err(); + return NULL; + } else { + gdraw_HandleCacheAllocateEnd( + t, p->i4, p->p1, + (flags & GDRAW_MAKETEXTURE_FLAGS_never_flush) + ? GDRAW_HANDLE_STATE_pinned + : GDRAW_HANDLE_STATE_locked); + stats->nonzero_flags |= GDRAW_STATS_alloc_tex; + stats->alloc_tex += 1; + stats->alloc_tex_bytes += p->i4; + } + + // default wrap mode is clamp to edge + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + + if (mipmap) + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, + GL_LINEAR_MIPMAP_LINEAR); + else + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + + IggyGDrawFree(p->texture_data); + + opengl_check(); + return (GDrawTexture*)t; +} + +static rrbool RADLINK gdraw_UpdateTextureBegin(GDrawTexture* tex, + void* unique_id, + GDrawStats* stats) { + RR_UNUSED_VARIABLE(stats); + return gdraw_HandleCacheLock((GDrawHandle*)tex, unique_id); +} + +static void RADLINK gdraw_UpdateTextureRect(GDrawTexture* tex, void* unique_id, + S32 x, S32 y, S32 stride, S32 w, + S32 h, U8* data, + gdraw_texture_format format) { + glBindTexture(GL_TEXTURE_2D, ((GDrawHandle*)tex)->handle.tex.gl); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + // @TODO: use 'stride' + glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, w, h, + (format == GDRAW_TEXTURE_FORMAT_font) ? GL_ALPHA : GL_RGBA, + GL_UNSIGNED_BYTE, data); + opengl_check(); +} + +static void RADLINK gdraw_UpdateTextureEnd(GDrawTexture* tex, void* unique_id, + GDrawStats* stats) { + gdraw_HandleCacheUnlock((GDrawHandle*)tex); +} + +static void RADLINK gdraw_FreeTexture(GDrawTexture* tt, void* unique_id, + GDrawStats* gstats) { + GDrawHandle* t = (GDrawHandle*)tt; + assert(t != NULL); + if (t->owner == unique_id || unique_id == NULL) { + if (t->cache == &gdraw->rendertargets) { + gdraw_HandleCacheUnlock(t); + // cache it by simply not freeing it + return; + } + + gdraw_res_free(t, gstats); + } +} + +static rrbool RADLINK gdraw_TryToLockTexture(GDrawTexture* t, void* unique_id, + GDrawStats* gstats) { + RR_UNUSED_VARIABLE(gstats); + return gdraw_HandleCacheLock((GDrawHandle*)t, unique_id); +} + +static void RADLINK gdraw_DescribeTexture(GDrawTexture* tex, + GDraw_Texture_Description* desc) { + GDrawHandle* p = (GDrawHandle*)tex; + desc->width = p->handle.tex.w; + desc->height = p->handle.tex.h; + desc->size_in_bytes = p->bytes; +} + +static void RADLINK gdraw_SetAntialiasTexture(S32 width, U8* rgba) { + if (!gdraw->aa_tex) glGenTextures(1, &gdraw->aa_tex); + + make_texture(gdraw->aa_tex); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, 1, 0, GL_RGBA, + GL_UNSIGNED_BYTE, rgba); + opengl_check(); +} + +//////////////////////////////////////////////////////////////////////// +// +// Vertex buffer creation/deletion +// + +static rrbool RADLINK gdraw_MakeVertexBufferBegin( + void* unique_id, gdraw_vformat vformat, S32 vbuf_size, S32 ibuf_size, + GDraw_MakeVertexBuffer_ProcessingInfo* p, GDrawStats* gstats) { + GLuint e; + GDrawHandle* vb; + opengl_check(); + vb = gdraw_res_alloc_begin(gdraw->vbufcache, vbuf_size + ibuf_size, gstats); + if (!vb) { + IggyGDrawSendWarning(NULL, "GDraw out of vertex buffer memory"); + return false; + } + + e = glGetError(); + vb->handle.vbuf.base = 0; + vb->handle.vbuf.indices = 0; + glGenBuffers(1, &vb->handle.vbuf.base); + glGenBuffers(1, &vb->handle.vbuf.indices); + glBindBuffer(GL_ARRAY_BUFFER, vb->handle.vbuf.base); + glBufferData(GL_ARRAY_BUFFER, vbuf_size, NULL, GL_STATIC_DRAW); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vb->handle.vbuf.indices); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, ibuf_size, NULL, GL_STATIC_DRAW); + if (!e) e = glGetError(); + if (e != GL_NO_ERROR) { + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + glDeleteBuffers(1, &vb->handle.vbuf.base); + glDeleteBuffers(1, &vb->handle.vbuf.indices); + gdraw_HandleCacheAllocateFail(vb); + eat_gl_err(); + IggyGDrawSendWarning(NULL, + "GDraw OpenGL vertex buffer creation failed"); + return false; + } + + p->i0 = vbuf_size; + p->i1 = ibuf_size; + p->p0 = vb; + p->p1 = unique_id; + + if (!gdraw->has_mapbuffer) { + p->vertex_data = IggyGDrawMalloc(vbuf_size); + p->vertex_data_length = vbuf_size; + p->index_data = IggyGDrawMalloc(ibuf_size); + p->index_data_length = ibuf_size; + + // check for out of memory conditions + if (!p->vertex_data || !p->index_data) { + if (p->vertex_data) IggyGDrawFree(p->vertex_data); + if (p->index_data) IggyGDrawFree(p->index_data); + IggyGDrawSendWarning( + NULL, "GDraw malloc for vertex buffer temporary memory failed"); + return false; + } + } else { + p->vertex_data = (U8*)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY); + p->vertex_data_length = vbuf_size; + + p->index_data = + (U8*)glMapBuffer(GL_ELEMENT_ARRAY_BUFFER, GL_WRITE_ONLY); + p->index_data_length = ibuf_size; + } + + opengl_check(); + return true; +} + +static rrbool RADLINK +gdraw_MakeVertexBufferMore(GDraw_MakeVertexBuffer_ProcessingInfo* p) { + assert(0); + return false; +} + +static GDrawVertexBuffer* RADLINK gdraw_MakeVertexBufferEnd( + GDraw_MakeVertexBuffer_ProcessingInfo* p, GDrawStats* stats) { + GDrawHandle* vb = (GDrawHandle*)p->p0; + rrbool ok = true; + GLuint e; + + if (!gdraw->has_mapbuffer) { + glBufferData(GL_ARRAY_BUFFER, p->i0, p->vertex_data, GL_STATIC_DRAW); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, p->i1, p->index_data, + GL_STATIC_DRAW); + IggyGDrawFree(p->vertex_data); + IggyGDrawFree(p->index_data); + } else { + if (!glUnmapBuffer(GL_ARRAY_BUFFER)) ok = false; + if (!glUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER)) ok = false; + } + + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + + e = glGetError(); + if (!ok || e != GL_NO_ERROR) { + glDeleteBuffers(1, &vb->handle.vbuf.base); + glDeleteBuffers(1, &vb->handle.vbuf.indices); + gdraw_HandleCacheAllocateFail(vb); + eat_gl_err(); + return NULL; + } else + gdraw_HandleCacheAllocateEnd(vb, p->i0 + p->i1, p->p1, + GDRAW_HANDLE_STATE_locked); + + opengl_check(); + return (GDrawVertexBuffer*)vb; +} + +static rrbool RADLINK gdraw_TryToLockVertexBuffer(GDrawVertexBuffer* vb, + void* unique_id, + GDrawStats* stats) { + RR_UNUSED_VARIABLE(stats); + return gdraw_HandleCacheLock((GDrawHandle*)vb, unique_id); +} + +static void RADLINK gdraw_FreeVertexBuffer(GDrawVertexBuffer* vb, + void* unique_id, GDrawStats* stats) { + GDrawHandle* h = (GDrawHandle*)vb; + assert(h != NULL); + if (h->owner == unique_id) gdraw_res_free(h, stats); +} + +static void RADLINK gdraw_DescribeVertexBuffer( + GDrawVertexBuffer* vbuf, GDraw_VertexBuffer_Description* desc) { + GDrawHandle* p = (GDrawHandle*)vbuf; + desc->size_in_bytes = p->bytes; +} + +//////////////////////////////////////////////////////////////////////// +// +// Create/free (or cache) render targets +// + +#ifdef __linux__ +typedef struct { + S32 free_count; + S32 live_count; + S32 locked_count; + S32 dead_count; + S32 pinned_count; + S32 user_owned_count; + S32 alloc_count; +} GDrawHandleStateCounts; + +enum { + GDRAW_RT_DIAG_color_memory = 1 << 0, + GDRAW_RT_DIAG_color_handles = 1 << 1, + GDRAW_RT_DIAG_cache_bitmap = 1 << 2, + GDRAW_RT_DIAG_stack_depth = 1 << 3, + GDRAW_RT_DIAG_empty_request = 1 << 4, +}; + +static U32 gdraw_rt_diag_emitted = 0; + +static void gdraw_CountHandleStates(GDrawHandleCache* cache, + GDrawHandleStateCounts* counts) { + S32 i; + counts->free_count = 0; + counts->live_count = 0; + counts->locked_count = 0; + counts->dead_count = 0; + counts->pinned_count = 0; + counts->user_owned_count = 0; + counts->alloc_count = 0; + + if (!cache) return; + + for (i = 0; i < cache->max_handles; ++i) { + switch (cache->handle[i].state) { + case GDRAW_HANDLE_STATE_free: + ++counts->free_count; + break; + case GDRAW_HANDLE_STATE_live: + ++counts->live_count; + break; + case GDRAW_HANDLE_STATE_locked: + ++counts->locked_count; + break; + case GDRAW_HANDLE_STATE_dead: + ++counts->dead_count; + break; + case GDRAW_HANDLE_STATE_pinned: + ++counts->pinned_count; + break; + case GDRAW_HANDLE_STATE_user_owned: + ++counts->user_owned_count; + break; + case GDRAW_HANDLE_STATE_alloc: + ++counts->alloc_count; + break; + default: + break; + } + } +} + +static void gdraw_ReportHandleCacheDiag(char const* label, + GDrawHandleCache* cache, U32 once_bit, + S32 req_w, S32 req_h) { + GDrawHandleStateCounts counts; + + if (gdraw_rt_diag_emitted & once_bit) return; + gdraw_rt_diag_emitted |= once_bit; + + gdraw_CountHandleStates(cache, &counts); + IggyGDrawSendWarning( + NULL, + "GDraw[%s] diag: frame=%dx%d tile=%dx%d padded=%dx%d req=%dx%d " + "depth=%d bytes_free=%d total_bytes=%d handles free=%d live=%d " + "locked=%d dead=%d pinned=%d user=%d alloc=%d", + label, gdraw->frametex_width, gdraw->frametex_height, gdraw->tw, + gdraw->th, gdraw->tpw, gdraw->tph, req_w, req_h, + (int)(gdraw->cur - gdraw->frame), cache ? cache->bytes_free : 0, + cache ? cache->total_bytes : 0, counts.free_count, counts.live_count, + counts.locked_count, counts.dead_count, counts.pinned_count, + counts.user_owned_count, counts.alloc_count); +} +#else +#define gdraw_ReportHandleCacheDiag(label, cache, once_bit, req_w, req_h) \ + ((void)0) +#endif + +static GDrawHandle* get_rendertarget_texture(int width, int height, void* owner, + GDrawStats* gstats) { + S32 size; + GDrawHandle* t; + opengl_check(); + t = gdraw_HandleCacheGetLRU(&gdraw->rendertargets); + if (t) { + gdraw_HandleCacheLock(t, (void*)(UINTa)1); + return t; + } + + size = gdraw->frametex_width * gdraw->frametex_height * 4; + t = gdraw_res_alloc_begin(gdraw->texturecache, size, gstats); + if (!t) return t; + + glGenTextures(1, &t->handle.tex.gl); + make_rendertarget(t, t->handle.tex.gl, GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE, + width, height, 4); + t->handle.tex.w = gdraw->frametex_width; + t->handle.tex.h = gdraw->frametex_height; + t->handle.tex.nonpow2 = + 1; // assume all rendertargets are non-pow2 for consistency + gstats->nonzero_flags |= GDRAW_STATS_alloc_tex; + gstats->alloc_tex += 1; + gstats->alloc_tex_bytes += size; + opengl_check(); + gdraw_HandleCacheAllocateEnd(t, size, owner, GDRAW_HANDLE_STATE_locked); + + return t; +} + +static GDrawHandle* get_color_rendertarget(GDrawStats* gstats) { + S32 size; + GDrawHandle* t; + opengl_check(); + t = gdraw_HandleCacheGetLRU(&gdraw->rendertargets); + if (t) { + gdraw_HandleCacheLock(t, (void*)(UINTa)1); + return t; + } + + // ran out of RTs, allocate a new one + size = gdraw->frametex_width * gdraw->frametex_height * 4; + if (gdraw->rendertargets.bytes_free < size) { + gdraw_ReportHandleCacheDiag("rt-color-memory", &gdraw->rendertargets, + GDRAW_RT_DIAG_color_memory, gdraw->tpw, + gdraw->tph); + IggyGDrawSendWarning( + NULL, "GDraw[rt-color] exceeded available rendertarget memory"); + return NULL; + } + + t = gdraw_HandleCacheAllocateBegin(&gdraw->rendertargets); + if (!t) { + gdraw_ReportHandleCacheDiag("rt-color-handles", &gdraw->rendertargets, + GDRAW_RT_DIAG_color_handles, gdraw->tpw, + gdraw->tph); + IggyGDrawSendWarning( + NULL, "GDraw[rt-color] exceeded available rendertarget handles"); + return t; + } + + glGenTextures(1, &t->handle.tex.gl); + make_rendertarget(t, t->handle.tex.gl, GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE, + gdraw->frametex_width, gdraw->frametex_height, 4); + t->handle.tex.w = gdraw->frametex_width; + t->handle.tex.h = gdraw->frametex_height; + t->handle.tex.nonpow2 = + 1; // assume all rendertargets are non-pow2 for consistency + +#ifdef GDRAW_MULTISAMPLING + if (gdraw->multisampling) { + glGenRenderbuffers(1, &t->handle.tex.gl_renderbuf); + glBindRenderbuffer(GL_RENDERBUFFER, t->handle.tex.gl_renderbuf); + glRenderbufferStorageMultisample(GL_RENDERBUFFER, gdraw->multisampling, + GL_RGBA, gdraw->frametex_width, + gdraw->frametex_height); + glBindRenderbuffer(GL_RENDERBUFFER, 0); + } +#endif + opengl_check(); + + gdraw_HandleCacheAllocateEnd(t, size, (void*)(UINTa)1, + GDRAW_HANDLE_STATE_locked); + gstats->nonzero_flags |= GDRAW_STATS_alloc_tex; + gstats->alloc_tex += gdraw->multisampling ? 2 : 1; + gstats->alloc_tex_bytes += (1 + gdraw->multisampling) * size; + + return t; +} + +static GDrawHandle* get_depthstencil_renderbuffer(GDrawStats* gstats) { + if (!gdraw->stencil_depth.handle.tex.gl) { + gstats->nonzero_flags |= GDRAW_STATS_alloc_tex; + gstats->alloc_tex += 1; + +#ifdef GDRAW_MULTISAMPLING + if (gdraw->multisampling) { + glGenRenderbuffers(1, &gdraw->stencil_depth.handle.tex.gl); + glBindRenderbuffer(GL_RENDERBUFFER, + gdraw->stencil_depth.handle.tex.gl); + glRenderbufferStorageMultisample( + GL_RENDERBUFFER, gdraw->multisampling, GL_DEPTH24_STENCIL8, + gdraw->frametex_width, gdraw->frametex_height); + + gstats->alloc_tex_bytes += gdraw->multisampling * 4 * + gdraw->frametex_width * + gdraw->frametex_height; + } else { +#endif + if (gdraw->has_packed_depth_stencil) { + glGenRenderbuffers(1, &gdraw->stencil_depth.handle.tex.gl); + glBindRenderbuffer(GL_RENDERBUFFER, + gdraw->stencil_depth.handle.tex.gl); + glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, + gdraw->frametex_width, + gdraw->frametex_height); + + gdraw->stencil_depth.handle.tex.gl_renderbuf = + gdraw->stencil_depth.handle.tex.gl; + } else { + // this path is mainly for the iOS simulator + glGenRenderbuffers(1, &gdraw->stencil_depth.handle.tex.gl); + glBindRenderbuffer(GL_RENDERBUFFER, + gdraw->stencil_depth.handle.tex.gl); + glRenderbufferStorage(GL_RENDERBUFFER, + gdraw->has_depth24 ? GL_DEPTH_COMPONENT24 + : GL_DEPTH_COMPONENT16, + gdraw->frametex_width, + gdraw->frametex_height); + + glGenRenderbuffers( + 1, &gdraw->stencil_depth.handle.tex.gl_renderbuf); + glBindRenderbuffer( + GL_RENDERBUFFER, + gdraw->stencil_depth.handle.tex.gl_renderbuf); + glRenderbufferStorage(GL_RENDERBUFFER, GL_STENCIL_INDEX8, + gdraw->frametex_width, + gdraw->frametex_height); + } + + gstats->alloc_tex_bytes += + 4 * gdraw->frametex_width * gdraw->frametex_height; +#ifdef GDRAW_MULTISAMPLING + } +#endif + + glBindRenderbuffer(GL_RENDERBUFFER, 0); + opengl_check(); + } + return &gdraw->stencil_depth; +} + +static void flush_rendertargets(GDrawStats* stats) { + gdraw_res_flush(&gdraw->rendertargets, stats); + + if (gdraw->stencil_depth.handle.tex.gl_renderbuf && + gdraw->stencil_depth.handle.tex.gl_renderbuf != + gdraw->stencil_depth.handle.tex.gl) { + glDeleteRenderbuffers(1, &gdraw->stencil_depth.handle.tex.gl_renderbuf); + gdraw->stencil_depth.handle.tex.gl_renderbuf = 0; + } + + if (gdraw->stencil_depth.handle.tex.gl) { + glDeleteRenderbuffers(1, &gdraw->stencil_depth.handle.tex.gl); + gdraw->stencil_depth.handle.tex.gl = 0; + } + opengl_check(); +} + +//////////////////////////////////////////////////////////////////////// +// +// Begin rendering for a frame +// + +static void lazy_shader(ProgramWithCachedVariableLocations* ptr); + +static RADINLINE void use_lazy_shader(ProgramWithCachedVariableLocations* prg) { + if (!prg->program) + lazy_shader(prg); // already does a glUseProgram! + else + glUseProgram(prg->program); +} + +static void set_viewport(void) { + if (gdraw->in_blur) { + glViewport(0, 0, gdraw->tpw, gdraw->tph); + } else if (gdraw->cur == gdraw->frame) { + glViewport(gdraw->vx, gdraw->vy, gdraw->tw, gdraw->th); + } else if (gdraw->cur->cached) { + glViewport(0, 0, gdraw->cur->width, gdraw->cur->height); + } else { + glViewport(0, 0, gdraw->tpw, gdraw->tph); + // we need to translate from naive pixel space to align a tile + } + opengl_check(); +} + +static void set_projection_raw(S32 x0, S32 x1, S32 y0, S32 y1) { + gdraw->projection[0] = 2.0f / (x1 - x0); + gdraw->projection[1] = 2.0f / (y1 - y0); + gdraw->projection[2] = (x1 + x0) / (F32)(x0 - x1); + gdraw->projection[3] = (y1 + y0) / (F32)(y0 - y1); +} + +static void set_projection(void) { + if (gdraw->in_blur) + set_projection_raw(0, gdraw->tpw, gdraw->tph, 0); + else if (gdraw->cur == gdraw->frame) + set_projection_raw(gdraw->tx0, gdraw->tx0 + gdraw->tw, + gdraw->ty0 + gdraw->th, gdraw->ty0); + else if (gdraw->cur->cached) + set_projection_raw( + gdraw->cur->base_x, gdraw->cur->base_x + gdraw->cur->width, + gdraw->cur->base_y, gdraw->cur->base_y + gdraw->cur->height); + else + set_projection_raw(gdraw->tx0p, gdraw->tx0p + gdraw->tpw, + gdraw->ty0p + gdraw->tph, gdraw->ty0p); +} + +static void clear_renderstate(void) { + clear_renderstate_platform_specific(); + + // deactivate aa_tex + glActiveTexture(GL_TEXTURE0 + AATEX_SAMPLER); + glBindTexture(GL_TEXTURE_2D, 0); + + glColorMask(1, 1, 1, 1); + glDepthMask(GL_TRUE); + + glDisable(GL_CULL_FACE); + glDisable(GL_BLEND); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_SCISSOR_TEST); + glActiveTexture(GL_TEXTURE0); + + glUseProgram(0); + opengl_check(); +} + +static void set_common_renderstate(void) { + clear_renderstate(); + + // activate aa_tex + glActiveTexture(GL_TEXTURE0 + AATEX_SAMPLER); + glBindTexture(GL_TEXTURE_2D, gdraw->aa_tex); + glActiveTexture(GL_TEXTURE0); +} + +void gdraw_GLx_(SetTileOrigin)(S32 x, S32 y, U32 framebuffer) { + gdraw->vx = x; + gdraw->vy = y; + gdraw->main_framebuffer = framebuffer; +} + +static void RADLINK gdraw_SetViewSizeAndWorldScale(S32 w, S32 h, F32 scalex, + F32 scaley) { + memset(gdraw->frame, 0, sizeof(gdraw->frame)); + gdraw->cur = gdraw->frame; + gdraw->fw = w; + gdraw->fh = h; + gdraw->world_to_pixel[0] = scalex; + gdraw->world_to_pixel[1] = scaley; +} + +static void RADLINK gdraw_Set3DTransform(F32* mat) { + if (mat == NULL) + gdraw->use_3d = 0; + else { + gdraw->use_3d = 1; + memcpy(gdraw->xform_3d, mat, sizeof(gdraw->xform_3d)); + } +} + +// must include anything necessary for texture creation/update +static void RADLINK gdraw_RenderingBegin(void) {} +static void RADLINK gdraw_RenderingEnd(void) {} + +static void RADLINK gdraw_RenderTileBegin(S32 x0, S32 y0, S32 x1, S32 y1, + S32 pad, GDrawStats* gstats) { + opengl_check(); + + if (x0 == 0 && y0 == 0 && x1 == gdraw->fw && y1 == gdraw->fh) { + pad = 0; + gdraw->tile_enabled = false; + } else { + gdraw->tile_enabled = true; + } + + gdraw->tx0 = x0; + gdraw->ty0 = y0; + gdraw->tw = x1 - x0; + gdraw->th = y1 - y0; + gdraw->tpw = gdraw->tw + pad * 2; + gdraw->tph = gdraw->th + pad * 2; + // origin of padded region + gdraw->tx0p = x0 - pad; + gdraw->ty0p = y0 - pad; + + if (gdraw->tpw > gdraw->frametex_width || + gdraw->tph > gdraw->frametex_height) { + gdraw->frametex_width = RR_MAX(gdraw->tpw, gdraw->frametex_width); + gdraw->frametex_height = RR_MAX(gdraw->tph, gdraw->frametex_height); + + flush_rendertargets(gstats); + } + + set_viewport(); + set_projection(); + set_common_renderstate(); + + glBindFramebuffer(GL_FRAMEBUFFER, gdraw->main_framebuffer); + opengl_check(); +} + +static void RADLINK gdraw_RenderTileEnd(GDrawStats* stats) { + clear_renderstate(); +} + +#define MAX_DEPTH_VALUE (1 << 13) + +static void RADLINK gdraw_GetInfo(GDrawInfo* d) { + GLint maxtex; + + opengl_check(); + glGetIntegerv(GL_MAX_TEXTURE_SIZE, &maxtex); + + d->num_stencil_bits = 8; + d->max_id = MAX_DEPTH_VALUE - 2; + // for floating point depth, just use mantissa, e.g. 16-20 bits + d->max_texture_size = maxtex; + d->buffer_format = GDRAW_BFORMAT_vbib; + d->shared_depth_stencil = 0; + d->always_mipmap = 0; + d->conditional_nonpow2 = gdraw->has_conditional_non_power_of_two; + opengl_check(); +} + +//////////////////////////////////////////////////////////////////////// +// +// Enable/disable rendertargets in stack fashion +// + +static void clear_with_rect(gswf_recti* region, rrbool clear_depth, + GDrawStats* stats); + +static void set_render_target_state(void) { + GLint h; +#ifdef GDRAW_MULTISAMPLING + if (gdraw->multisampling) { + glGetIntegerv(GL_FRAMEBUFFER_BINDING, &h); + h = gdraw->cur->color_buffer + ? gdraw->cur->color_buffer->handle.tex.gl_renderbuf + : 0; + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_RENDERBUFFER, h); + h = gdraw->cur->stencil_depth ? gdraw->cur->stencil_depth->handle.tex.gl + : 0; + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + GL_RENDERBUFFER, h); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, + GL_RENDERBUFFER, h); + } else { +#endif + h = gdraw->cur->color_buffer ? gdraw->cur->color_buffer->handle.tex.gl + : 0; + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, h, 0); + if (gdraw->cur->stencil_depth) { + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + GL_RENDERBUFFER, + gdraw->cur->stencil_depth->handle.tex.gl); + glFramebufferRenderbuffer( + GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, + gdraw->cur->stencil_depth->handle.tex.gl_renderbuf); + } else { + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + GL_RENDERBUFFER, 0); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, + GL_RENDERBUFFER, 0); + } +#ifdef GDRAW_MULTISAMPLING + } +#endif + opengl_check(); +} + +static rrbool RADLINK gdraw_TextureDrawBufferBegin(gswf_recti* region, + gdraw_texture_format format, + U32 flags, void* owner, + GDrawStats* gstats) { + GDrawFramebufferState* n = gdraw->cur + 1; + GDrawHandle* t; + int k; + if (gdraw->tw == 0 || gdraw->th == 0) { + gdraw_ReportHandleCacheDiag( + "rt-empty", &gdraw->rendertargets, GDRAW_RT_DIAG_empty_request, + region->x1 - region->x0, region->y1 - region->y0); + IggyGDrawSendWarning( + NULL, "GDraw[rt-stack] got a request for an empty rendertarget"); + return false; + } + + if (n >= &gdraw->frame[MAX_RENDER_STACK_DEPTH]) { + gdraw_ReportHandleCacheDiag( + "rt-stack-depth", &gdraw->rendertargets, GDRAW_RT_DIAG_stack_depth, + region->x1 - region->x0, region->y1 - region->y0); + IggyGDrawSendWarning(NULL, + "GDraw[rt-stack] rendertarget nesting exceeded " + "MAX_RENDER_STACK_DEPTH"); + return false; + } + + if (owner) { + t = get_rendertarget_texture(region->x1 - region->x0, + region->y1 - region->y0, owner, gstats); + if (!t) { + gdraw_ReportHandleCacheDiag("rt-cacheAsBitmap", gdraw->texturecache, + GDRAW_RT_DIAG_cache_bitmap, + region->x1 - region->x0, + region->y1 - region->y0); + IggyGDrawSendWarning( + NULL, "GDraw[rt-cacheAsBitmap] ran out of rendertargets"); + return false; + } + } else { + t = get_color_rendertarget(gstats); + if (!t) { + IggyGDrawSendWarning(NULL, + "GDraw[rt-color] ran out of rendertargets"); + return false; + } + } + n->color_buffer = t; + assert(n->color_buffer != NULL); + + if (n == gdraw->frame + 1) + n->stencil_depth = get_depthstencil_renderbuffer(gstats); + else + n->stencil_depth = (n - 1)->stencil_depth; + ++gdraw->cur; + + gdraw->cur->cached = owner != NULL; + if (owner) { + gdraw->cur->base_x = region->x0; + gdraw->cur->base_y = region->y0; + gdraw->cur->width = region->x1 - region->x0; + gdraw->cur->height = region->y1 - region->y0; + } + + gstats->nonzero_flags |= GDRAW_STATS_rendtarg; + + glBindFramebuffer(GL_FRAMEBUFFER, gdraw->framebuffer_stack_object); + set_render_target_state(); + + assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE); + + // viewport for clear (make sure scissor is inside viewport just in case) + glViewport(0, 0, gdraw->frametex_width, gdraw->frametex_height); + + k = (int)(n->color_buffer - gdraw->rendertargets.handle); + if (region) { + S32 ox, oy; + + // in a perfect world, we'd only need 1 pixel of border on all sides for + // bilinear filtering, which would mean pad = 1. however, texture + // interpolator precision is not that high even on PC parts, and if we + // only use 1 pixel of padding we will often get some un-filled pixels + // "creeping in" from the sides. pad = 2 is fine on recent PC parts, but + // not old PC parts or even fairly new mobile parts, so we play it safe + // and use 3 pixels which so far gives good results everywhere. + S32 pad = 3; + + // region.x0,y0 are the top left of the rectangle in display space + // x,y are the *bottom* left of the rectangle in window space + S32 h = gdraw->tph; + S32 xt0, yt0, xt1, yt1; + S32 x0, y0, x1, y1; + + if (gdraw->in_blur || !gdraw->tile_enabled) + ox = oy = 0; + else + ox = gdraw->tx0, oy = gdraw->ty0; + + // clamp region to tile (in gdraw coords) + xt0 = RR_MAX(region->x0 - ox, 0); + yt0 = RR_MAX(region->y0 - oy, 0); + xt1 = RR_MIN(region->x1 - ox, gdraw->tpw); + yt1 = RR_MIN(region->y1 - oy, gdraw->tph); + + // but the padding gets clamped to framebuffer coords! also transfer to + // window space here. + x0 = RR_MAX(xt0 - pad, 0); + y0 = RR_MAX(h - yt1 - pad, 0); + x1 = RR_MIN(xt1 + pad, gdraw->frametex_width); + y1 = RR_MIN(h - yt0 + pad, gdraw->frametex_height); + + if (x1 <= x0 || + y1 <= y0) { // region doesn't intersect with current tile + --gdraw->cur; + + // remove color and stencil buffers + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + GL_TEXTURE_2D, 0, 0); + + // switch render target back + if (gdraw->cur == gdraw->frame) + glBindFramebuffer(GL_FRAMEBUFFER, gdraw->main_framebuffer); + else + set_render_target_state(); + + set_viewport(); + set_projection(); + opengl_check(); + + // free our render target + gdraw_FreeTexture((GDrawTexture*)n->color_buffer, 0, gstats); + + // note: don't send a warning since this will happen during regular + // tiled rendering + return false; + } + + glEnable(GL_SCISSOR_TEST); + glScissor(x0, y0, x1 - x0, y1 - y0); + gdraw->rt_valid[k].x0 = xt0; + gdraw->rt_valid[k].y0 = yt0; + gdraw->rt_valid[k].x1 = xt1; + gdraw->rt_valid[k].y1 = yt1; + gstats->cleared_pixels += (x1 - x0) * (y1 - y0); + } else { + glDisable(GL_SCISSOR_TEST); + gdraw->rt_valid[k].x0 = 0; + gdraw->rt_valid[k].y0 = 0; + gdraw->rt_valid[k].x1 = gdraw->frametex_width; + gdraw->rt_valid[k].y1 = gdraw->frametex_height; + gstats->cleared_pixels += + gdraw->frametex_width * gdraw->frametex_height; + } + + gstats->nonzero_flags |= GDRAW_STATS_clears; + gstats->num_clears += 1; + +#ifdef GDRAW_FEWER_CLEARS + if (region) { + clear_with_rect(region, n == gdraw->frame + 1, gstats); + } else +#endif // GDRAW_FEWER_CLEARS + { + glClearColor(0, 0, 0, 0); // must clear destination alpha + glClearStencil(0); + glClearDepth(1); + glStencilMask(255); + glDepthMask(GL_TRUE); + glColorMask(1, 1, 1, 1); + glDisable(GL_STENCIL_TEST); + if (n == gdraw->frame + 1) + glClear(GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT | + GL_DEPTH_BUFFER_BIT); + else + glClear(GL_COLOR_BUFFER_BIT); + } + + set_viewport(); + set_projection(); + + opengl_check(); + + return true; +} + +static GDrawTexture* RADLINK gdraw_TextureDrawBufferEnd(GDrawStats* gstats) { + GDrawFramebufferState* n = gdraw->cur; + GDrawFramebufferState* m = --gdraw->cur; + if (gdraw->fw == 0 || gdraw->fh == 0) return 0; + + if (n >= &gdraw->frame[MAX_RENDER_STACK_DEPTH]) + return 0; // already returned a warning in Start...() + + assert(m >= gdraw->frame); // bug in Iggy -- unbalanced + + if (m != gdraw->frame) assert(m->color_buffer != NULL); + assert(n->color_buffer != NULL); + + // remove color and stencil buffers + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_RENDERBUFFER, 0); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, + GL_RENDERBUFFER, 0); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + GL_RENDERBUFFER, 0); + +#ifdef GDRAW_MULTISAMPLING + if (gdraw->multisampling) { + // blit from multisample to texture + if (n->color_buffer->handle.tex.gl_renderbuf) { + GLuint res; + glBindFramebuffer(GL_READ_FRAMEBUFFER, + gdraw->framebuffer_copy_to_texture); + glFramebufferRenderbuffer( + GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, 0); + glFramebufferRenderbuffer( + GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_RENDERBUFFER, 0); + glFramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + GL_RENDERBUFFER, 0); + glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + GL_RENDERBUFFER, 0); + glFramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_RENDERBUFFER, + n->color_buffer->handle.tex.gl_renderbuf); + glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, + n->color_buffer->handle.tex.gl, 0); + res = glCheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + glBlitFramebuffer(0, 0, gdraw->tpw, gdraw->tph, 0, 0, gdraw->tpw, + gdraw->tph, GL_COLOR_BUFFER_BIT, GL_NEAREST); + gstats->nonzero_flags |= GDRAW_STATS_blits; + gstats->num_blits += 1; + gstats->num_blit_pixels += (gdraw->tpw * gdraw->tph); + + glFramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_RENDERBUFFER, 0); + glFramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_RENDERBUFFER, 0); + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); + } + } +#endif + + gstats->nonzero_flags |= GDRAW_STATS_rendtarg; + gstats->rendertarget_changes += 1; + + // set the new state + if (m == gdraw->frame) // back to initial framebuffer + glBindFramebuffer(GL_FRAMEBUFFER, gdraw->main_framebuffer); + else + set_render_target_state(); + + // reset the viewport if we've reached the root scope + set_viewport(); + set_projection(); + + opengl_check(); + + return (GDrawTexture*)n->color_buffer; +} + +//////////////////////////////////////////////////////////////////////// +// +// Clear stencil/depth buffers +// +// Open question whether we'd be better off finding bounding boxes +// and only clearing those; it depends exactly how fast clearing works. +// + +static void RADLINK gdraw_ClearStencilBits(U32 bits) { + glDisable(GL_SCISSOR_TEST); + glStencilMask(bits); + glClearStencil(0); + glClear(GL_STENCIL_BUFFER_BIT); + opengl_check(); +} + +// this only happens rarely (hopefully never) if we use the depth buffer, +// so we can just clear the whole thing +static void RADLINK gdraw_ClearID(void) { + glDisable(GL_SCISSOR_TEST); + glClearDepth(1); + glDepthMask(GL_TRUE); + glClear(GL_DEPTH_BUFFER_BIT); + opengl_check(); +} + +//////////////////////////////////////////////////////////////////////// +// +// Set all the render state from GDrawRenderState +// +// This also is responsible for getting the framebuffer into a texture +// if the read-modify-write blend operation can't be expressed with +// the native blend operators. (E.g. "screen") +// + +enum { + VVAR_world0 = 0, + VVAR_world1 = 1, + VVAR_xoff = 2, + VVAR_texgen_s = 3, + VVAR_texgen_t = 4, + VVAR_viewproj = 5, + VVAR_x3d = 5, + VVAR_y3d = 6, + VVAR_z3d = 7, +}; + +// convert an ID request to a value suitable for the depth buffer, +// in homogeneous clip space with w=1 (depth from -1..1) +static float depth_from_id(S32 id) { + return 1.0f - 2.0f * (id + 1) / (F32)MAX_DEPTH_VALUE; +} + +static void set_texture(U32 texunit, GDrawTexture* tex) { + glActiveTexture(GL_TEXTURE0 + texunit); + if (tex == NULL) + glBindTexture(GL_TEXTURE_2D, 0); + else + glBindTexture(GL_TEXTURE_2D, ((GDrawHandle*)tex)->handle.tex.gl); +} + +static void set_world_projection(const int* vvars, const F32 world[2 * 4]) { + assert(vvars[VVAR_world0] >= 0 && vvars[VVAR_world1] >= 0 && + vvars[VVAR_viewproj] >= 0); + glUniform4fv(vvars[VVAR_world0], 1, world + 0); + glUniform4fv(vvars[VVAR_world1], 1, world + 4); + glUniform4fv(vvars[VVAR_viewproj], 1, gdraw->projection); +} + +static void set_3d_projection(const int* vvars, const F32 world[2 * 4], + const F32 xform[3][4]) { + assert(vvars[VVAR_world0] >= 0 && vvars[VVAR_world1] >= 0); + glUniform4fv(vvars[VVAR_world0], 1, world + 0); + glUniform4fv(vvars[VVAR_world1], 1, world + 4); + + assert(vvars[VVAR_x3d] >= 0 && vvars[VVAR_y3d] >= 0 && + vvars[VVAR_z3d] >= 0); + glUniform4fv(vvars[VVAR_x3d], 1, xform[0]); + glUniform4fv(vvars[VVAR_y3d], 1, xform[1]); + glUniform4fv(vvars[VVAR_z3d], 1, xform[2]); +} + +static int set_render_state(GDrawRenderState* r, S32 vformat, + const int** ovvars, GDrawPrimitive* p, + GDrawStats* gstats) { + static struct gdraw_gl_blendspec { + GLboolean enable; + GLenum src; + GLenum dst; + } blends[ASSERT_COUNT(GDRAW_BLEND__count, 6)] = { + {GL_FALSE, GL_ONE, GL_ZERO}, // GDRAW_BLEND_none + {GL_TRUE, GL_ONE, GL_ONE_MINUS_SRC_ALPHA}, // GDRAW_BLEND_alpha + {GL_TRUE, GL_DST_COLOR, + GL_ONE_MINUS_SRC_ALPHA}, // GDRAW_BLEND_multiply + {GL_TRUE, GL_ONE, GL_ONE}, // GDRAW_BLEND_add + + {GL_FALSE, GL_ONE, GL_ZERO}, // GDRAW_BLEND_filter + {GL_FALSE, GL_ONE, GL_ZERO}, // GDRAW_BLEND_special + }; + + F32 world[2 * 4]; + ProgramWithCachedVariableLocations* prg; + int *fvars, *vvars; + int blend_mode; + + opengl_check(); + assert((vformat >= 0 && vformat < GDRAW_vformat__basic_count) || + vformat == GDRAW_vformat_ihud1); + + if (vformat == GDRAW_vformat_ihud1) { + glEnable(GL_BLEND); + glBlendFunc(GL_ONE, + GL_ONE_MINUS_SRC_ALPHA); // premultiplied alpha blend mode + prg = &gdraw->ihud[0]; + } else { + // apply the major blend mode + blend_mode = r->blend_mode; + assert(blend_mode >= 0 && + blend_mode < sizeof(blends) / sizeof(*blends)); + if (blends[blend_mode].enable) { + glEnable(GL_BLEND); + glBlendFunc(blends[blend_mode].src, blends[blend_mode].dst); + } else + glDisable(GL_BLEND); + + // set the fragment program if it wasn't set above + if (r->blend_mode != GDRAW_BLEND_special) { + // make sure data has been initialized + int which = r->tex0_mode, additive = 0; + + if (r->cxf_add) { + additive = 1; + if (r->cxf_add[3]) additive = 2; + } + + prg = &gdraw->fprog[which][additive][vformat]; + } else + prg = &gdraw->exceptional_blend[r->special_blend]; + } + + use_lazy_shader(prg); + OPENGL_CHECK_SITE("set_render_state:use_lazy_shader"); + fvars = prg->vars[0]; + vvars = prg->vars[1]; + + if (vformat == GDRAW_vformat_ihud1) { + F32 wv[2][4] = {1.0f / 960, 0, 0, -1.0, 0, -1.0f / 540, 0, +1.0}; + glUniform4fv(vvars[VAR_ihudv_worldview], 2, wv[0]); + OPENGL_CHECK_SITE("set_render_state:ihud_worldview"); + glUniform4fv(vvars[VAR_ihudv_material], p->uniform_count, p->uniforms); + OPENGL_CHECK_SITE("set_render_state:ihud_material"); + glUniform1f(vvars[VAR_ihudv_textmode], p->drawprim_mode ? 0.0f : 1.0f); + OPENGL_CHECK_SITE("set_render_state:ihud_textmode"); + } else { + // set vertex shader constants + if (!r->use_world_space) + gdraw_ObjectSpace(world, r->o2w, depth_from_id(r->id), 0.0f); + else + gdraw_WorldSpace(world, gdraw->world_to_pixel, depth_from_id(r->id), + 0.0f); + +#ifdef FLASH_10 + set_3d_projection(vvars, world, gdraw->xform_3d); +#else + set_world_projection(vvars, world); +#endif + + if (vvars[VVAR_xoff] >= 0) + glUniform4fv(vvars[VVAR_xoff], 1, r->edge_matrix); + + if (r->texgen0_enabled) { + assert(vvars[VVAR_texgen_s] >= 0 && vvars[VVAR_texgen_t] >= 0); + glUniform4fv(vvars[VVAR_texgen_s], 1, r->s0_texgen); + glUniform4fv(vvars[VVAR_texgen_t], 1, r->t0_texgen); + } + } + + // texture stuff + set_texture(0, r->tex[0]); + OPENGL_CHECK_SITE("set_render_state:set_texture0"); + + if (r->tex[0] && gdraw->has_conditional_non_power_of_two && + ((GDrawHandle*)r->tex[0])->handle.tex.nonpow2) { + // only wrap mode allowed in conditional nonpow2 is clamp; this should + // have been set when the texture was created, but to be on the safe + // side... + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + } else + switch (r->wrap0) { + case GDRAW_WRAP_repeat: + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_REPEAT); + break; + case GDRAW_WRAP_clamp: + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, + GL_CLAMP_TO_EDGE); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, + GL_CLAMP_TO_EDGE); + break; + case GDRAW_WRAP_mirror: + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, + GL_MIRRORED_REPEAT); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, + GL_MIRRORED_REPEAT); + break; + } + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, + r->nearest0 ? GL_NEAREST : GL_LINEAR); + + // fragment shader constants + + if (fvars[VAR_cmul] >= 0) + glUniform4f(fvars[VAR_cmul], r->color[0], r->color[1], r->color[2], + r->color[3]); + if (fvars[VAR_cadd] >= 0) { + if (r->cxf_add) + glUniform4f(fvars[VAR_cadd], r->cxf_add[0] / 255.0f, + r->cxf_add[1] / 255.0f, r->cxf_add[2] / 255.0f, + r->cxf_add[3] / 255.0f); + else + glUniform4f(fvars[VAR_cadd], 0, 0, 0, 0); + } + if (fvars[VAR_focal] >= 0) + glUniform4fv(fvars[VAR_focal], 1, r->focal_point); + + glActiveTexture(GL_TEXTURE0); + + // Set pixel operation states + + if (r->scissor) { + S32 x0, y0, x1, y1; + // scissor.x0,y0 are the top left of the rectangle in display space + // x,y are the *bottom* left of the rectangle in window space + x0 = r->scissor_rect.x0; + y0 = r->scissor_rect.y1; + x1 = r->scissor_rect.x1; + y1 = r->scissor_rect.y0; + // convert into tile-relative coordinates + if (gdraw->tile_enabled) { + x0 -= gdraw->tx0; + y0 -= gdraw->ty0; + x1 -= gdraw->tx0; + y1 -= gdraw->ty0; + } + // convert bottom-most edge to bottom-relative + y0 = (gdraw->th) - y0; + y1 = (gdraw->th) - y1; + if (gdraw->cur == gdraw->frame) { + // move into viewport space + x0 += gdraw->vx; + y0 += gdraw->vy; + x1 += gdraw->vx; + y1 += gdraw->vy; + } + glScissor(x0, y0, x1 - x0, y1 - y0); + glEnable(GL_SCISSOR_TEST); + } else + glDisable(GL_SCISSOR_TEST); + + glStencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); + glStencilMask(r->stencil_set); + glStencilFunc(GL_EQUAL, 255, r->stencil_test); + if (r->stencil_set | r->stencil_test) + glEnable(GL_STENCIL_TEST); + else + glDisable(GL_STENCIL_TEST); + + if (r->stencil_set) + glColorMask(0, 0, 0, 0); + else + glColorMask(1, 1, 1, 1); + + if (r->test_id) { + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_LESS); + } else { + glDisable(GL_DEPTH_TEST); + glDepthFunc(GL_LESS); + } + + if (r->set_id) + glDepthMask(GL_TRUE); + else + glDepthMask(GL_FALSE); + + OPENGL_CHECK_SITE("set_render_state:final"); + if (ovvars) *ovvars = vvars; + + return 1; +} + +//////////////////////////////////////////////////////////////////////// +// +// Vertex formats +// + +static void set_vertex_format(S32 format, F32* vertices) { + const void* vertex_offset_0 = (const void*)(size_t)vertices; + const void* vertex_offset_8 = + (const void*)((size_t)vertices + (2 * sizeof(F32))); + const void* vertex_offset_16 = + (const void*)((size_t)vertices + (4 * sizeof(F32))); + + switch (format) { + case GDRAW_vformat_v2: + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, vertex_offset_0); + glEnableVertexAttribArray(0); + break; + + case GDRAW_vformat_v2aa: + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 16, + vertex_offset_0); + glVertexAttribPointer(1, 4, GL_SHORT, GL_FALSE, 16, + vertex_offset_8); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + break; + + case GDRAW_vformat_v2tc2: + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 16, + vertex_offset_0); + glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 16, + vertex_offset_8); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + break; + + case GDRAW_vformat_ihud1: + glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 20, + vertex_offset_0); + glVertexAttribPointer(1, 2, GL_FLOAT, GL_FALSE, 20, + vertex_offset_8); + glVertexAttribPointer(2, 4, GL_UNSIGNED_BYTE, GL_TRUE, 20, + vertex_offset_16); + glEnableVertexAttribArray(0); + glEnableVertexAttribArray(1); + glEnableVertexAttribArray(2); + break; + + default: + assert(0); + } +} + +static void reset_vertex_format(S32 format) { + // we don't use attrib #1 for all formats, but doesn't seem worthwhile to + // check + format = format; + glDisableVertexAttribArray(0); + glDisableVertexAttribArray(1); + glDisableVertexAttribArray(2); +} + +//////////////////////////////////////////////////////////////////////// +// +// Draw triangles with a given renderstate +// + +static void tag_resources(void* r1, void* r2, void* r3) { + U64 now = gdraw->frame_counter; + if (r1) ((GDrawHandle*)r1)->fence.value = now; + if (r2) ((GDrawHandle*)r2)->fence.value = now; + if (r3) ((GDrawHandle*)r3)->fence.value = now; +} + +static int vformat_stride[] = {2, 4, 4, 5}; + +static void RADLINK gdraw_DrawIndexedTriangles(GDrawRenderState* r, + GDrawPrimitive* p, + GDrawVertexBuffer* buf, + GDrawStats* gstats) { + GDrawHandle* vb = (GDrawHandle*)buf; + if (vb) { + glBindBuffer(GL_ARRAY_BUFFER, vb->handle.vbuf.base); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vb->handle.vbuf.indices); + } else { + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + } + + if (!set_render_state(r, p->vertex_format, NULL, p, gstats)) return; + gstats->nonzero_flags |= GDRAW_STATS_batches; + gstats->num_batches += 1; + gstats->drawn_indices += p->num_indices; + gstats->drawn_vertices += p->num_vertices; + + if (vb || p->indices) { // regular path + set_vertex_format(p->vertex_format, p->vertices); + glDrawElements(GL_TRIANGLES, p->num_indices, GL_UNSIGNED_SHORT, + p->indices); + } else { // dynamic quads + S32 pos = 0; + U32 stride = + vformat_stride[p->vertex_format]; // in units of sizeof(F32) + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gdraw->quad_ib); + assert(p->num_vertices % 4 == 0); + + while (pos < p->num_vertices) { + S32 vert_count = RR_MIN(p->num_vertices - pos, QUAD_IB_COUNT * 4); + set_vertex_format(p->vertex_format, p->vertices + pos * stride); + glDrawElements(GL_TRIANGLES, (vert_count >> 2) * 6, + GL_UNSIGNED_SHORT, NULL); + pos += vert_count; + } + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + } + reset_vertex_format(p->vertex_format); + + if (vb) { + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + } + + OPENGL_CHECK_SITE("gdraw_DrawIndexedTriangles:final"); + tag_resources(vb, r->tex[0], r->tex[1]); +} + +/////////////////////////////////////////////////////////////////////// +// +// Flash 8 filter effects +// + +// caller sets up texture coordinates +static void do_screen_quad(gswf_recti* s, F32* tc, const int* vvars, + GDrawStats* gstats, F32 depth) { + F32 px0 = (F32)s->x0, py0 = (F32)s->y0, px1 = (F32)s->x1, py1 = (F32)s->y1; + F32 s0 = tc[0], t0 = tc[1], s1 = tc[2], t1 = tc[3]; + F32 vert[4][4]; + F32 world[2 * 4]; + + OPENGL_CHECK_SITE("do_screen_quad:begin"); + + vert[0][0] = px0; + vert[0][1] = py0; + vert[0][2] = s0; + vert[0][3] = t0; + vert[1][0] = px1; + vert[1][1] = py0; + vert[1][2] = s1; + vert[1][3] = t0; + vert[2][0] = px1; + vert[2][1] = py1; + vert[2][2] = s1; + vert[2][3] = t1; + vert[3][0] = px0; + vert[3][1] = py1; + vert[3][2] = s0; + vert[3][3] = t1; + + OPENGL_CHECK_SITE("do_screen_quad:after_vertices"); + gdraw_PixelSpace(world); + world[2] = depth; + set_world_projection(vvars, world); + OPENGL_CHECK_SITE("do_screen_quad:after_projection"); + + set_vertex_format(GDRAW_vformat_v2tc2, vert[0]); + glBindBuffer(GL_ARRAY_BUFFER, 0); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + OPENGL_CHECK_SITE("do_screen_quad:before_draw"); + glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + reset_vertex_format(GDRAW_vformat_v2tc2); + OPENGL_CHECK_SITE("do_screen_quad:after_draw"); + + gstats->nonzero_flags |= GDRAW_STATS_batches; + gstats->num_batches += 1; + gstats->drawn_vertices += 4; + gstats->drawn_indices += 6; + + OPENGL_CHECK_SITE("do_screen_quad:final"); +} + +#ifdef GDRAW_FEWER_CLEARS +static void clear_with_rect(gswf_recti* region, rrbool clear_depth, + GDrawStats* gstats) { + F32 tc[4] = {0, 0, 0, 0}; + + use_lazy_shader(&gdraw->manual_clear); + glUniform4f(gdraw->manual_clear.vars[0][0], 0.0, 0, 0, 0); + + glDisable(GL_BLEND); + + if (clear_depth) { + glEnable(GL_DEPTH_TEST); + glDepthFunc(GL_ALWAYS); + glDepthMask(GL_TRUE); + + glEnable(GL_STENCIL_TEST); + glStencilMask(255); + glStencilOp(GL_REPLACE, GL_REPLACE, GL_REPLACE); + glStencilFunc(GL_ALWAYS, 0, 255); + } else { + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + } + + glColorMask(1, 1, 1, 1); + glColor4f(0, 0, 0, 0); + + { + // coordinate system doesn't match, so just draw whole screen, rely on + // scissor to clip it properly + gswf_recti foo = {-10000, -10000, 10000, 10000}; + do_screen_quad(&foo, tc, gdraw->manual_clear.vars[1], gstats, 1.0f); + } +} +#endif + +static void gdraw_DriverBlurPass(GDrawRenderState* r, int taps, F32* data, + gswf_recti* s, F32* tc, F32 height_max, + F32* clamp, GDrawStats* gstats) { + ProgramWithCachedVariableLocations* prg = &gdraw->blur_prog[taps]; + F32 clampv[4]; + + // fix OpenGL t values for rendertargets are from bottom, not top + tc[1] = height_max - tc[1]; + tc[3] = height_max - tc[3]; + + clampv[0] = clamp[0]; + clampv[1] = height_max - clamp[3]; + clampv[2] = clamp[2]; + clampv[3] = height_max - clamp[1]; + + use_lazy_shader(prg); + set_texture(0, r->tex[0]); + + glColorMask(1, 1, 1, 1); + glDisable(GL_BLEND); + glDisable(GL_SCISSOR_TEST); + + assert(prg->vars[0][VAR_blur_tap] >= 0); + glUniform4fv(prg->vars[0][VAR_blur_tap], taps, data); + glUniform4fv(prg->vars[0][VAR_blur_clampv], 1, clampv); + + do_screen_quad(s, tc, prg->vars[1], gstats, 0); + tag_resources(r->tex[0], 0, 0); +} + +static void gdraw_Colormatrix(GDrawRenderState* r, gswf_recti* s, float* tc, + GDrawStats* gstats) { + ProgramWithCachedVariableLocations* prg = &gdraw->colormatrix; + if (!gdraw_TextureDrawBufferBegin( + s, GDRAW_TEXTURE_FORMAT_rgba32, + GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_color | + GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_alpha, + NULL, gstats)) + return; + use_lazy_shader(prg); + set_texture(0, r->tex[0]); + glUniform4fv(prg->vars[0][VAR_colormatrix_data], 5, r->shader_data); + do_screen_quad(s, tc, gdraw->colormatrix.vars[1], gstats, 0); + tag_resources(r->tex[0], 0, 0); + r->tex[0] = gdraw_TextureDrawBufferEnd(gstats); +} + +static gswf_recti* get_valid_rect(GDrawTexture* tex) { + GDrawHandle* h = (GDrawHandle*)tex; + S32 n = (S32)(h - gdraw->rendertargets.handle); + assert(n >= 0 && n <= MAX_RENDER_STACK_DEPTH + 1); + return &gdraw->rt_valid[n]; +} + +static void set_clamp_constant(GLint constant, GDrawTexture* tex) { + gswf_recti* s = get_valid_rect(tex); + // when we make the valid data, we make sure there is an extra empty pixel + // at the border we also have to convert from GDraw coords to GL coords + // here. + glUniform4f(constant, (s->x0 - 0.5f) / gdraw->frametex_width, + (gdraw->tph - s->y1 - 0.5f) / gdraw->frametex_height, + (s->x1 + 0.5f) / gdraw->frametex_width, + (gdraw->tph - s->y0 + 0.5f) / gdraw->frametex_height); +} + +static void gdraw_Filter(GDrawRenderState* r, gswf_recti* s, float* tc, + int isbevel, GDrawStats* gstats) { + ProgramWithCachedVariableLocations* prg = + &gdraw->filter_prog[isbevel][r->filter_mode]; + if (!gdraw_TextureDrawBufferBegin( + s, GDRAW_TEXTURE_FORMAT_rgba32, + GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_color | + GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_alpha, + NULL, gstats)) + return; + use_lazy_shader(prg); + set_texture(0, r->tex[0]); + set_texture(1, r->tex[1]); + set_texture(2, r->tex[2]); + glUniform4fv(prg->vars[0][VAR_filter_color], 1, &r->shader_data[0]); + glUniform4f(prg->vars[0][VAR_filter_tc_off], + -r->shader_data[4] / (F32)gdraw->frametex_width, + r->shader_data[5] / (F32)gdraw->frametex_height, + r->shader_data[6], 0); + if (prg->vars[0][VAR_filter_color2] >= 0) + glUniform4fv(prg->vars[0][VAR_filter_color2], 1, &r->shader_data[8]); + set_clamp_constant(prg->vars[0][VAR_filter_clamp0], r->tex[0]); + set_clamp_constant(prg->vars[0][VAR_filter_clamp1], r->tex[1]); + do_screen_quad(s, tc, prg->vars[1], gstats, 0); + tag_resources(r->tex[0], 0, 0); + r->tex[0] = gdraw_TextureDrawBufferEnd(gstats); +} + +static void RADLINK gdraw_FilterQuad(GDrawRenderState* r, S32 x0, S32 y0, + S32 x1, S32 y1, GDrawStats* gstats) { + F32 tc[4]; + gswf_recti s; + + // clip to tile boundaries + s.x0 = RR_MAX(x0, gdraw->tx0p); + s.y0 = RR_MAX(y0, gdraw->ty0p); + s.x1 = RR_MIN(x1, gdraw->tx0p + gdraw->tpw); + s.y1 = RR_MIN(y1, gdraw->ty0p + gdraw->tph); + if (s.x1 <= s.x0 || s.y1 <= s.y0) return; + + // if it's a rendertarget, it's inverted from our design because OpenGL is + // bottom-left 0,0 and we have to compensate for scaling + tc[0] = (s.x0 - gdraw->tx0p) / (F32)gdraw->frametex_width; + tc[1] = (gdraw->tph - (s.y0 + gdraw->ty0p)) / (F32)gdraw->frametex_height; + tc[2] = (s.x1 - gdraw->tx0p) / (F32)gdraw->frametex_width; + tc[3] = (gdraw->tph - (s.y1 - gdraw->ty0p)) / (F32)gdraw->frametex_height; + + glUseProgram(0); + set_texture(0, 0); + set_texture(1, 0); + set_texture(2, 0); + + glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP); + glStencilMask(255); + glDisable(GL_STENCIL_TEST); + glColorMask(1, 1, 1, 1); + glDisable(GL_BLEND); + glDisable(GL_DEPTH_TEST); + OPENGL_CHECK_SITE("gdraw_FilterQuad:pre_filter"); + + if (r->blend_mode == GDRAW_BLEND_filter) { + switch (r->filter) { + case GDRAW_FILTER_blur: { + GDrawBlurInfo b; + gswf_recti bounds = *get_valid_rect(r->tex[0]); + gdraw_ShiftRect(&s, &s, -gdraw->tx0p, + -gdraw->ty0p); // blur uses physical + // rendertarget coordinates + + b.BlurPass = gdraw_DriverBlurPass; + b.w = gdraw->tpw; + b.h = gdraw->tph; + b.frametex_width = gdraw->frametex_width; + b.frametex_height = gdraw->frametex_height; + + // blur passes must override the viewport/ortho projection + + gdraw->in_blur = true; // prevent viewport/projection munging + // in start/end texture + set_viewport(); + set_projection(); + gdraw_Blur(&gdraw_funcs, &b, r, &s, &bounds, gstats); + + gdraw->in_blur = false; + + set_viewport(); + set_projection(); + break; + } + + case GDRAW_FILTER_colormatrix: + gdraw_Colormatrix(r, &s, tc, gstats); + break; + + case GDRAW_FILTER_dropshadow: + gdraw_Filter(r, &s, tc, 0, gstats); + break; + + case GDRAW_FILTER_bevel: + gdraw_Filter(r, &s, tc, 1, gstats); + break; + + default: + assert(0); + } + } else { + GDrawTexture* blend_tex = NULL; + const int* vvars; + + // for crazy blend modes, we need to read back from the framebuffer + // and do the blending in the pixel shader. we do this with + // CopyTexSubImage, rather than trying to render-to-texture-all-along, + // because that's a pain. + // @TODO: propogate the rectangle down and only copy what we need, like + // in 360 + + if (r->blend_mode == GDRAW_BLEND_special) { + blend_tex = (GDrawTexture*)get_color_rendertarget(gstats); + glBindTexture(GL_TEXTURE_2D, + ((GDrawHandle*)blend_tex)->handle.tex.gl); + if (gdraw->cur != gdraw->frame) + glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, gdraw->tpw, + gdraw->tph); + else + glCopyTexSubImage2D(GL_TEXTURE_2D, 0, gdraw->tx0 - gdraw->tx0p, + gdraw->ty0 - gdraw->ty0p, gdraw->vx, + gdraw->vy, gdraw->tw, gdraw->th); + + set_texture(1, blend_tex); + } + + if (!set_render_state(r, GDRAW_vformat_v2tc2, &vvars, NULL, gstats)) + return; + do_screen_quad(&s, tc, vvars, gstats, 0); + tag_resources(r->tex[0], r->tex[1], 0); + if (blend_tex) gdraw_FreeTexture(blend_tex, 0, gstats); + } +} + +void gdraw_GLx_(NoMoreGDrawThisFrame)(void) { + clear_renderstate(); + ++gdraw->frame_counter; +} + +void gdraw_GLx_(BeginCustomDraw)(IggyCustomDrawCallbackRegion* region, + F32* matrix) { + clear_renderstate(); + gdraw_GetObjectSpaceMatrix(matrix, region->o2w, gdraw->projection, + depth_from_id(0), 1); +} + +void gdraw_GLx_(EndCustomDraw)(IggyCustomDrawCallbackRegion* region) { + set_common_renderstate(); +} + +/////////////////////////////////////////////////////////////////////// +// +// Vertex and Fragment program initialization +// + +#include GDRAW_SHADERS + +static void make_vars(GDrawGLProgram prog, S32 vars[2][8], char** varn) { + if (prog) { + char** varn2 = (varn == pshader_general2_vars ? vshader_vsglihud_vars + : vshader_vsgl_vars); + S32 k; + for (k = 0; varn[k]; ++k) + if (varn[k][0]) + vars[0][k] = glGetUniformLocation(prog, varn[k]); + else + vars[0][k] = -1; + + for (k = 0; varn2[k]; ++k) + if (varn2[k][0]) + vars[1][k] = glGetUniformLocation(prog, varn2[k]); + else + vars[1][k] = -1; + + if (vars[0][0] >= 0) assert(vars[0][0] != vars[0][1]); + } +} + +static void make_fragment_program(ProgramWithCachedVariableLocations* p, + int num_strings, char** strings, + char** varn) { + S32 i; + GLint res; + GDrawGLProgram shad; + opengl_check(); + for (i = 0; i < MAX_VARS; ++i) { + p->vars[0][i] = -1; + p->vars[1][i] = -1; + } + + shad = glCreateShader(GL_FRAGMENT_SHADER); + glShaderSource(shad, num_strings, (const GLchar**)strings, NULL); + glCompileShader(shad); + glGetShaderiv(shad, GL_COMPILE_STATUS, &res); + if (!res) { + char errors[512]; + glGetShaderInfoLog(shad, sizeof(errors) - 2, &res, errors); + compilation_err(errors); + p->program = 0; + } else { + S32 vert = GDRAW_vformat_v2tc2; + ProgramWithCachedVariableLocations* basic_fprog_begin = + &gdraw->fprog[0][0][0]; + ProgramWithCachedVariableLocations* basic_fprog_end = + basic_fprog_begin + + (sizeof(gdraw->fprog) / sizeof(gdraw->fprog[0][0][0])); + if (p >= basic_fprog_begin && p < basic_fprog_end) { + // for basic rendering shaders, we have three versions corresponding + // to the three vertex formats we support. + S32 n = (S32)(p - basic_fprog_begin); + vert = n % 3; + } + + if (p == &gdraw->ihud[0]) vert = GDRAW_vformat_ihud1; + + opengl_check(); + p->program = glCreateProgram(); + glAttachShader(p->program, shad); + glAttachShader(p->program, gdraw->vert[vert]); + opengl_check(); + + if (vert == GDRAW_vformat_ihud1) { + glBindAttribLocation(p->program, 0, "position"); + glBindAttribLocation(p->program, 1, "texcoord"); + glBindAttribLocation(p->program, 2, "material_index"); + } else { + glBindAttribLocation(p->program, 0, "position"); + glBindAttribLocation(p->program, 1, "in_attr"); + } + + glLinkProgram(p->program); + glGetProgramiv(p->program, GL_LINK_STATUS, &res); + if (!res) { + char errors[512]; + glGetProgramiv(p->program, GL_INFO_LOG_LENGTH, &res); + glGetProgramInfoLog(p->program, sizeof(errors) - 2, &res, errors); + compilation_err(errors); + glDeleteShader(shad); + glDeleteProgram(p->program); + p->program = 0; + } else + make_vars(p->program, p->vars, varn); + } + opengl_check(); + glUseProgram(p->program); // now activate the program + opengl_check(); +} + +static void make_vertex_program(GLuint* vprog, int num_strings, + char** strings) { + GLint res; + GDrawGLProgram shad; + opengl_check(); + + if (strings[0]) { + shad = glCreateShader(GL_VERTEX_SHADER); + glShaderSource(shad, num_strings, (const GLchar**)strings, NULL); + glCompileShader(shad); + glGetShaderiv(shad, GL_COMPILE_STATUS, &res); + if (!res) { + char errors[512]; + glGetShaderInfoLog(shad, sizeof(errors) - 2, &res, errors); + compilation_err(errors); + glDeleteShader(shad); + shad = 0; + } + opengl_check(); + *vprog = shad; + } else { + *vprog = 0; + } +} + +static void bind_sampler(ProgramWithCachedVariableLocations* prog, int varidx, + int sampleridx) { + int var = prog->vars[0][varidx]; + if (var >= 0) glUniform1i(var, sampleridx); +} + +static void make_vertex_programs(void) { + int type; + for (type = 0; type < GDRAW_vformat__basic_count; type++) + make_vertex_program(&gdraw->vert[type], NUMFRAGMENTS_vshader_vsgl, + vshader_vsgl(type)); + type = GDRAW_vformat_ihud1; + make_vertex_program(&gdraw->vert[type], NUMFRAGMENTS_vshader_vsglihud, + vshader_vsglihud()); +} + +static void lazy_shader(ProgramWithCachedVariableLocations* ptr) { + ProgramWithCachedVariableLocations* basic_fprog_begin = + &gdraw->fprog[0][0][0]; + ProgramWithCachedVariableLocations* basic_fprog_end = + basic_fprog_begin + + (sizeof(gdraw->fprog) / sizeof(gdraw->fprog[0][0][0])); + + if (ptr >= basic_fprog_begin && ptr < basic_fprog_end) { + S32 n = (S32)(ptr - basic_fprog_begin); + n /= 3; + + make_fragment_program(ptr, NUMFRAGMENTS_pshader_basic, + pshader_basic_arr[n], pshader_basic_vars); + bind_sampler(ptr, VAR_tex0, 0); + bind_sampler(ptr, VAR_tex1, AATEX_SAMPLER); + return; + } + + if (ptr >= &gdraw->exceptional_blend[0] && + ptr < &gdraw->exceptional_blend[GDRAW_BLENDSPECIAL__count]) { + S32 n = (S32)(ptr - gdraw->exceptional_blend); + make_fragment_program(ptr, NUMFRAGMENTS_pshader_exceptional_blend, + pshader_exceptional_blend_arr[n], + pshader_exceptional_blend_vars); + bind_sampler(ptr, VAR_tex0, 0); + bind_sampler(ptr, VAR_tex1, 1); + return; + } + + if (ptr >= &gdraw->filter_prog[0][0] && ptr <= &gdraw->filter_prog[1][15]) { + S32 n = (S32)(ptr - gdraw->filter_prog[0]); + make_fragment_program(ptr, NUMFRAGMENTS_pshader_filter, + pshader_filter_arr[n], pshader_filter_vars); + bind_sampler(ptr, VAR_filter_tex0, 0); + bind_sampler(ptr, VAR_filter_tex1, 1); + bind_sampler(ptr, VAR_filter_tex2, 2); + return; + } + + if (ptr >= &gdraw->blur_prog[0] && ptr <= &gdraw->blur_prog[MAX_TAPS]) { + S32 n = (S32)(ptr - gdraw->blur_prog); + make_fragment_program(ptr, NUMFRAGMENTS_pshader_blur, + pshader_blur_arr[n], pshader_blur_vars); + bind_sampler(ptr, VAR_blur_tex0, 0); + return; + } + + if (ptr == &gdraw->colormatrix) { + make_fragment_program(ptr, NUMFRAGMENTS_pshader_color_matrix, + pshader_color_matrix_arr[0], + pshader_color_matrix_vars); + bind_sampler(ptr, VAR_colormatrix_tex0, 0); + return; + } + + if (ptr == &gdraw->manual_clear) { + make_fragment_program(ptr, NUMFRAGMENTS_pshader_manual_clear, + pshader_manual_clear_arr[0], + pshader_manual_clear_vars); + return; + } + + if (ptr == &gdraw->ihud[0]) { + make_fragment_program(ptr, NUMFRAGMENTS_pshader_general2, + pshader_general2_arr[0], pshader_general2_vars); + bind_sampler(ptr, VAR_tex0, 0); + return; + } + + RR_BREAK(); +} + +static rrbool make_quad_indices(void) { + int size = QUAD_IB_COUNT * 6 * sizeof(GLushort); + GLushort* inds = IggyGDrawMalloc(size); + int i, e; + + if (!inds) return 0; + + // make quad inds + for (i = 0; i < QUAD_IB_COUNT; i++) { + inds[i * 6 + 0] = (GLushort)(i * 4 + 0); + inds[i * 6 + 1] = (GLushort)(i * 4 + 1); + inds[i * 6 + 2] = (GLushort)(i * 4 + 2); + inds[i * 6 + 3] = (GLushort)(i * 4 + 0); + inds[i * 6 + 4] = (GLushort)(i * 4 + 2); + inds[i * 6 + 5] = (GLushort)(i * 4 + 3); + } + + glGenBuffers(1, &gdraw->quad_ib); + glBindBuffer(GL_ARRAY_BUFFER, gdraw->quad_ib); + glBufferData(GL_ARRAY_BUFFER, size, inds, GL_STATIC_DRAW); + IggyGDrawFree(inds); + e = glGetError(); + if (e != GL_NO_ERROR) { + eat_gl_err(); + return 0; + } + + return 1; +} + +//////////////////////////////////////////////////////////////////////// +// +// Create and tear-down the state +// + +typedef struct { + S32 num_handles; + S32 num_bytes; +} GDrawResourceLimit; + +// These are the defaults limits used by GDraw unless the user specifies +// something else. +static GDrawResourceLimit gdraw_limits[GDRAW_GLx_(RESOURCE__count)] = { + MAX_RENDER_STACK_DEPTH + 1, + 16 * 1024 * 1024, // GDRAW_GLx_RESOURCE_rendertarget + 500, + 20 * 1024 * 1024, // GDRAW_GLx_RESOURCE_texture + 1000, + 2 * 1024 * 1024, // GDRAW_GLx_RESOURCE_vertexbuffer +}; + +static GDrawHandleCache* make_handle_cache(gdraw_resourcetype type) { + S32 num_handles = gdraw_limits[type].num_handles; + S32 num_bytes = gdraw_limits[type].num_bytes; + GDrawHandleCache* cache = (GDrawHandleCache*)IggyGDrawMalloc( + sizeof(GDrawHandleCache) + (num_handles - 1) * sizeof(GDrawHandle)); + if (cache) { + gdraw_HandleCacheInit(cache, num_handles, num_bytes); + cache->is_vertex = (type == GDRAW_GLx_(RESOURCE_vertexbuffer)); + } + + return cache; +} + +static void free_gdraw() { + if (!gdraw) return; + if (gdraw->texturecache) IggyGDrawFree(gdraw->texturecache); + if (gdraw->vbufcache) IggyGDrawFree(gdraw->vbufcache); + IggyGDrawFree(gdraw); + gdraw = NULL; +} + +int gdraw_GLx_(SetResourceLimits)(gdraw_resourcetype type, S32 num_handles, + S32 num_bytes) { + GDrawStats stats = {0}; + + if (type == GDRAW_GLx_(RESOURCE_rendertarget)) // RT count is small and + // space is preallocated + num_handles = MAX_RENDER_STACK_DEPTH + 1; + + assert(type >= GDRAW_GLx_(RESOURCE_rendertarget) && + type < GDRAW_GLx_(RESOURCE__count)); + assert(num_handles >= 0); + assert(num_bytes >= 0); + + // nothing to do if the values are unchanged + if (gdraw_limits[type].num_handles == num_handles && + gdraw_limits[type].num_bytes == num_bytes) + return 1; + + gdraw_limits[type].num_handles = num_handles; + gdraw_limits[type].num_bytes = num_bytes; + + // if no gdraw context created, there's nothing to worry about + if (!gdraw) return 1; + + // resize the appropriate pool + switch (type) { + case GDRAW_GLx_(RESOURCE_rendertarget): + flush_rendertargets(&stats); + gdraw_HandleCacheInit(&gdraw->rendertargets, num_handles, + num_bytes); + return 1; + + case GDRAW_GLx_(RESOURCE_texture): + if (gdraw->texturecache) { + gdraw_res_flush(gdraw->texturecache, &stats); + IggyGDrawFree(gdraw->texturecache); + } + gdraw->texturecache = + make_handle_cache(GDRAW_GLx_(RESOURCE_texture)); + return gdraw->texturecache != NULL; + + case GDRAW_GLx_(RESOURCE_vertexbuffer): + if (gdraw->vbufcache) { + gdraw_res_flush(gdraw->vbufcache, &stats); + IggyGDrawFree(gdraw->vbufcache); + } + gdraw->vbufcache = + make_handle_cache(GDRAW_GLx_(RESOURCE_vertexbuffer)); + return gdraw->vbufcache != NULL; + + default: + return 0; + } +} + +GDrawTexture* RADLINK gdraw_GLx_(MakeTextureFromResource)( + U8* resource_file, S32 len, IggyFileTextureRaw* texture) { + int i, offset, mips; + const TextureFormatDesc* fmt; + GDrawTexture* tex; + GLuint gl_texture_handle; + + // look up the texture format + fmt = gdraw->tex_formats; + while (fmt->iggyfmt != texture->format && fmt->blkbytes) fmt++; + if (!fmt->blkbytes) // end of list - i.e. format not supported + return NULL; + + // prepare texture + glGenTextures(1, &gl_texture_handle); + if (gl_texture_handle == 0) return NULL; + + opengl_check(); + make_texture(gl_texture_handle); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + + offset = texture->file_offset; + mips = RR_MAX(texture->mipmaps, 1); + + // disable mipmaps if non-pow-2 is unsupported + if (gdraw->has_conditional_non_power_of_two) + if (!is_pow2(texture->w) || !is_pow2(texture->h)) mips = 1; + + // disable mipmaps if chain is incomplete and GL_TEXTURE_MAX_LEVEL is + // unsupported + if (!gdraw->has_texture_max_level && mips > 1) { + int lastmip = mips - 1; + if ((texture->w >> lastmip) > 1 || (texture->h >> lastmip) > 1) + mips = 1; + } + + for (i = 0; i < mips; i++) { + U8* data = resource_file + offset; + int w = RR_MAX(texture->w >> i, 1); + int h = RR_MAX(texture->h >> i, 1); + int j; + + if (texture->format == IFT_FORMAT_rgba_4444_LE) { + for (j = 0; j < w * h; ++j) { + unsigned short x = *(unsigned short*)(data + j * 2); + x = ((x >> 12) & 0xf) | ((x << 4) & 0xfff0); + *(unsigned short*)(data + j * 2) = x; + } + } + if (texture->format == IFT_FORMAT_rgba_5551_LE) { + for (j = 0; j < w * h; ++j) { + unsigned short x = *(unsigned short*)(data + j * 2); + x = (x >> 15) | (x << 1); + *(unsigned short*)(data + j * 2) = x; + } + } + + if (fmt->fmt != 0) { + glTexImage2D(GL_TEXTURE_2D, i, fmt->intfmt, w, h, 0, fmt->fmt, + fmt->type, data); + offset += w * h * fmt->blkbytes; + } else { + int size = ((w + fmt->blkx - 1) / fmt->blkx) * + ((h + fmt->blky - 1) / fmt->blky) * fmt->blkbytes; + glCompressedTexImage2D(GL_TEXTURE_2D, i, fmt->intfmt, w, h, 0, size, + data); + offset += size; + } + + opengl_check(); + } + + if (gdraw->has_texture_max_level) + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, mips - 1); + + tex = gdraw_GLx_(WrappedTextureCreate)(gl_texture_handle, texture->w, + texture->h, mips > 1); + if (tex == NULL) glDeleteTextures(1, &gl_texture_handle); + opengl_check(); + return tex; +} + +void RADLINK gdraw_GLx_(DestroyTextureFromResource)(GDrawTexture* tex) { + if (tex) gdraw_GLx_(WrappedTextureDestroy)(tex); +} + +static rrbool hasext(const char* exts, const char* which) { + const char* where; + size_t len; + +#ifdef GDRAW_USE_glGetStringi + if (exts == NULL) { + GLint i, num_exts; + glGetIntegerv(GL_NUM_EXTENSIONS, &num_exts); + for (i = 0; i < num_exts; ++i) + if (0 == strcmp(which, (char const*)glGetStringi(GL_EXTENSIONS, i))) + return 1; + return 0; + } +#endif + + where = exts; + len = strlen(which); + + for (;;) { + where = strstr(where, which); + if (where == NULL) return false; + + if ((where == exts || *(where - 1) == ' ') // starts with terminator + && (where[len] == ' ' || where[len] == 0)) // ends with terminator + return true; + where += len; + } +} + +static GDrawFunctions* create_context(S32 w, S32 h) { + gdraw = IggyGDrawMalloc(sizeof(*gdraw)); + if (!gdraw) return NULL; + + memset(gdraw, 0, sizeof(*gdraw)); + + gdraw->texturecache = make_handle_cache(GDRAW_GLx_(RESOURCE_texture)); + gdraw->vbufcache = make_handle_cache(GDRAW_GLx_(RESOURCE_vertexbuffer)); + gdraw_HandleCacheInit( + &gdraw->rendertargets, + gdraw_limits[GDRAW_GLx_(RESOURCE_rendertarget)].num_handles, + gdraw_limits[GDRAW_GLx_(RESOURCE_rendertarget)].num_bytes); + + if (!gdraw->texturecache || !gdraw->vbufcache || !make_quad_indices()) { + free_gdraw(); + return NULL; + } + + opengl_check(); + + gdraw->frametex_width = w; + gdraw->frametex_height = h; + gdraw->frame->cached = false; + + // if the globals have already been initialized, this has no effect; + // otherwise it initializes them with no global texture storage and the + // default global rendertarget storage + + glGenFramebuffers(1, &gdraw->framebuffer_stack_object); + glGenFramebuffers(1, &gdraw->framebuffer_copy_to_texture); + opengl_check(); + + make_vertex_programs(); + // fragment shaders are created lazily + + gdraw_funcs.SetViewSizeAndWorldScale = gdraw_SetViewSizeAndWorldScale; + gdraw_funcs.RenderingBegin = gdraw_RenderingBegin; + gdraw_funcs.RenderingEnd = gdraw_RenderingEnd; + gdraw_funcs.RenderTileBegin = gdraw_RenderTileBegin; + gdraw_funcs.RenderTileEnd = gdraw_RenderTileEnd; + gdraw_funcs.GetInfo = gdraw_GetInfo; + gdraw_funcs.DescribeTexture = gdraw_DescribeTexture; + gdraw_funcs.DescribeVertexBuffer = gdraw_DescribeVertexBuffer; + + gdraw_funcs.TextureDrawBufferBegin = gdraw_TextureDrawBufferBegin; + gdraw_funcs.TextureDrawBufferEnd = gdraw_TextureDrawBufferEnd; + + gdraw_funcs.DrawIndexedTriangles = gdraw_DrawIndexedTriangles; + gdraw_funcs.FilterQuad = gdraw_FilterQuad; + + gdraw_funcs.SetAntialiasTexture = gdraw_SetAntialiasTexture; + + gdraw_funcs.ClearStencilBits = gdraw_ClearStencilBits; + gdraw_funcs.ClearID = gdraw_ClearID; + + gdraw_funcs.MakeTextureBegin = gdraw_MakeTextureBegin; + gdraw_funcs.MakeTextureMore = NULL; + gdraw_funcs.MakeTextureEnd = gdraw_MakeTextureEnd; + + gdraw_funcs.UpdateTextureRect = gdraw_UpdateTextureRect; + gdraw_funcs.UpdateTextureBegin = gdraw_UpdateTextureBegin; + gdraw_funcs.UpdateTextureEnd = gdraw_UpdateTextureEnd; + gdraw_funcs.FreeTexture = gdraw_FreeTexture; + gdraw_funcs.TryToLockTexture = gdraw_TryToLockTexture; + + gdraw_funcs.MakeVertexBufferBegin = gdraw_MakeVertexBufferBegin; + gdraw_funcs.MakeVertexBufferMore = gdraw_MakeVertexBufferMore; + gdraw_funcs.MakeVertexBufferEnd = gdraw_MakeVertexBufferEnd; + gdraw_funcs.TryToLockVertexBuffer = gdraw_TryToLockVertexBuffer; + gdraw_funcs.FreeVertexBuffer = gdraw_FreeVertexBuffer; + + gdraw_funcs.UnlockHandles = gdraw_UnlockHandles; + gdraw_funcs.SetTextureUniqueID = gdraw_SetTextureUniqueID; + + gdraw_funcs.MakeTextureFromResource = + (gdraw_make_texture_from_resource*)gdraw_GLx_(MakeTextureFromResource); + gdraw_funcs.FreeTextureFromResource = + gdraw_GLx_(DestroyTextureFromResource); + + gdraw_funcs.Set3DTransform = gdraw_Set3DTransform; + + return &gdraw_funcs; +} + +void gdraw_GLx_(DestroyContext)(void) { + if (gdraw) { + GDrawStats stats = {0}; + if (gdraw->texturecache) gdraw_res_flush(gdraw->texturecache, &stats); + if (gdraw->vbufcache) gdraw_res_flush(gdraw->vbufcache, &stats); + flush_rendertargets(&stats); + + if (gdraw->aa_tex) glDeleteTextures(1, &gdraw->aa_tex); + + if (gdraw->quad_ib) glDeleteBuffers(1, &gdraw->quad_ib); + } + + opengl_check(); + free_gdraw(); +} \ No newline at end of file diff --git a/targets/app/common/Iggy/gdraw/gdraw_shared.inl b/targets/app/common/Iggy/gdraw/gdraw_shared.inl new file mode 100644 index 000000000..fac66a23d --- /dev/null +++ b/targets/app/common/Iggy/gdraw/gdraw_shared.inl @@ -0,0 +1,2693 @@ +// gdraw_shared.inl - author: Sean Barrett - copyright 2010 RAD Game Tools +// +// This file implements some common code that can be shared across +// all the sample implementations of GDraw. + +#if defined(IGGY_DISABLE_GDRAW_ASSERT) +#define assert(x) +#else +#include +#endif + +#include + +#if !defined(GDRAW_MAYBE_UNUSED) +#define GDRAW_MAYBE_UNUSED +#endif + +/////////////////////////////////////////////////////////////// +// +// GDrawHandleCache manages resource "handles" used by Iggy +// (i.e. these handles wrap the platform resource handles, +// and this file provides those wrappers and facilities for +// LRU tracking them). Moreover, for console platforms, we +// actually implement our own managed resource pools. +// +// This is the main state machine when GDRAW_MANAGE_MEM is defined: +// (which covers all console platforms) +// +// +------+ +--------+ | +// | Live |<------->| Locked | | +// +------+ +--------+ | +// / \ ^ | +// / \ \ | +// v v \ | +// +------+ +------+ +------+ | | +// | Dead |--->| Free |<---| User | | | +// +------+ +------+ +------+ | | +// ^ ^ ^ ^ | | +// \ / \ | | | +// \ / v | | | +// +--------+ +-------+ / | +// | Pinned |<--------| Alloc |/ | +// +--------+ +-------+ | +// +// "Free" handles are not in use and available for allocation. +// "Alloc" handles have been assigned by GDraw, but do not yet +// have a system resource backing them. Resources stay in +// this state until we know that for sure that we're going +// to be able to successfully complete creation, at which +// point the resource transitions to one of the regular states. +// "Live" handles correspond to resources that may be used +// for rendering. They are kept in LRU order. Old resources +// may be evicted to make space. +// "Locked" handles cover resources that are going to be used +// in the next draw command. Once a resource is marked locked, +// it may not be evicted until it's back to "Live". +// "Dead" handles describe resources that have been freed on the +// CPU side, but are still in use by the GPU. Their memory may +// only be reclaimed once the GPU is done with them, at which +// point they are moved to the "Free" list. Items on the "Dead" +// list appear ordered by the last time they were used by the +// GPU - "most stale" first. +// "Pinned" resources can be used in any draw call without getting +// locked first. They can never be LRU-freed, but their memory +// is still managed by GDraw. Currently this is only used for +// the Iggy font cache. +// "User" (user-owned) resources are exactly that. They act much like +// pinned resources, but their memory isn't managed by GDraw. +// When a user-owned resource is freed, we really need to free +// it immediately (instead of marking it as "dead"), which might +// necessitate stalling the CPU until the GPU is finished using +// that resource. Since we don't own the memory, delayed frees +// are not an option. +// +// Without GDRAW_MANAGE_MEM, there's no "Dead" resources, and all +// frees are performed immediately. + +typedef struct GDrawHandleCache GDrawHandleCache; +typedef struct GDrawHandle GDrawHandle; + +typedef struct { + U64 value; +} GDrawFence; + +typedef enum { + GDRAW_HANDLE_STATE_free = 0, + GDRAW_HANDLE_STATE_live, + GDRAW_HANDLE_STATE_locked, + GDRAW_HANDLE_STATE_dead, + GDRAW_HANDLE_STATE_pinned, + GDRAW_HANDLE_STATE_user_owned, + GDRAW_HANDLE_STATE_alloc, + GDRAW_HANDLE_STATE__count, + + // not an actual state! + GDRAW_HANDLE_STATE_sentinel = GDRAW_HANDLE_STATE__count, +} GDrawHandleState; + +struct GDrawHandle { + GDrawNativeHandle handle; // platform handle to a resource (variable size) + void* owner; // 4/8 // opaque handle used to allow freeing resources + // without calling back to owner + + GDrawHandleCache* cache; // 4/8 // which cache this handle came from + + GDrawHandle *next, *prev; // 8/16 // doubly-linked list + +#if defined(GDRAW_MANAGE_MEM) + void* raw_ptr; // 4/8 // pointer to allocation - when you're managing + // memory manually +#if defined(GDRAW_CORRUPTION_CHECK) + U32 cached_raw_value[4]; + rrbool has_check_value; +#endif +#endif + + GDrawFence fence; // 8 // (optional) platform fence for resource + // 4 + U32 bytes : 28; // estimated storage cost to allow setting a loose limit + U32 state : 4; // state the handle is in +}; + +// validate alignment to make sure structure will pack correctly +#if defined(__RAD64__) +RR_COMPILER_ASSERT((sizeof(GDrawHandle) & 7) == 0); +#else +RR_COMPILER_ASSERT((sizeof(GDrawHandle) & 3) == 0); +#endif + +struct GDrawHandleCache { + S32 bytes_free; + S32 total_bytes; + S32 max_handles; + U32 is_vertex : 1; // vertex buffers have different warning codes and + // generate discard callbacks + U32 is_thrashing : 1; + U32 did_defragment : 1; + // 30 unused bits + GDrawHandle state[GDRAW_HANDLE_STATE__count]; // sentinel nodes for all of + // the state lists +#if defined(GDRAW_MANAGE_MEM) + struct gfx_allocator* alloc; +#endif +#if defined(GDRAW_MANAGE_MEM_TWOPOOL) + struct gfx_allocator* alloc_other; +#endif + GDrawFence prev_frame_start, + prev_frame_end; // fence value at start/end of previous frame, for + // thrashing detection + GDrawHandle handle[1]; // the rest of the handles must be stored right + // after this in the containing structure +}; + +#if defined(GDRAW_CORRUPTION_CHECK) +// values for corruption checking +#define GDRAW_CORRUPTIONCHECK_renderbegin 0x10 +#define GDRAW_CORRUPTIONCHECK_renderend 0x20 +#define GDRAW_CORRUPTIONCHECK_nomoregdraw 0x30 +#define GDRAW_CORRUPTIONCHECK_maketexbegin 0x40 +#define GDRAW_CORRUPTIONCHECK_maketexend 0x50 + +#define GDRAW_CORRUPTIONCHECK_wrappedcreateend 0x60 +#define GDRAW_CORRUPTIONCHECK_wrappedcreatebegin 0x61 +#define GDRAW_CORRUPTIONCHECK_wrappeddestroyend 0x70 +#define GDRAW_CORRUPTIONCHECK_wrappeddestroybegin 0x71 + +#define GDRAW_CORRUPTIONCHECK_allochandle 0x80 +#define GDRAW_CORRUPTIONCHECK_allochandle_begin 0x81 +#define GDRAW_CORRUPTIONCHECK_allochandle_postreap 0x82 +#define GDRAW_CORRUPTIONCHECK_allochandle_postfree1 0x83 +#define GDRAW_CORRUPTIONCHECK_allochandle_postfree2 0x84 +#define GDRAW_CORRUPTIONCHECK_allochandle_postfree3 0x85 +#define GDRAW_CORRUPTIONCHECK_allochandle_postalloc1 0x86 +#define GDRAW_CORRUPTIONCHECK_allochandle_postalloc2 0x87 +#define GDRAW_CORRUPTIONCHECK_allochandle_postalloc3 0x88 +#define GDRAW_CORRUPTIONCHECK_allochandle_defrag 0x89 + +#define GDRAW_CORRUPTIONCHECK_freetex 0x90 + +static U32* debug_raw_address(GDrawHandle* t, int choice) { + static int offset_table[4] = {0x555555, 0xaaaaaa, 0x333333, 0x6e6e6e}; + U8* base = (U8*)t->raw_ptr; + int offset = offset_table[choice] & (t->bytes - 1) & ~3; + return (U32*)(base + offset); +} + +static void debug_check_overlap_one(GDrawHandle* t, U8* ptr, S32 len) { + assert(len >= 0); + if (t->raw_ptr && t->raw_ptr != ptr) { + assert(t->raw_ptr < ptr || t->raw_ptr >= ptr + len); + } +} + +static void debug_check_overlap(GDrawHandleCache* c, U8* ptr, S32 len) { + GDrawHandle* t = c->head; + while (t) { + debug_check_overlap_one(t, ptr, len); + t = t->next; + } + t = c->active; + while (t) { + debug_check_overlap_one(t, ptr, len); + t = t->next; + } +} + +static void debug_check_raw_values(GDrawHandleCache* c) { + GDrawHandle* t = c->head; + while (t) { + if (t->raw_ptr && t->has_check_value) { + int i; + for (i = 0; i < 4; ++i) { + if (*debug_raw_address(t, i) != t->cached_raw_value[i]) { + // zlog("!Iggy texture corruption found\n"); + // zlog("t=%p, t->raw_ptr=%p\n", t, t->raw_ptr); + // zlog("Cached values: %08x %08x %08x %08x\n", + // t->cached_raw_value[0], t->cached_raw_value[1], + // t->cached_raw_value[2], t->cached_raw_value[3]); + // zlog("Current values: %08x %08x %08x %08x\n", + // *debug_raw_address(t,0), *debug_raw_address(t,1), + // *debug_raw_address(t,2), *debug_raw_address(t,3)); + assert(0); + } + } + } + t = t->next; + } + t = c->active; + while (t) { + if (t->raw_ptr && t->has_check_value) { + int i; + for (i = 0; i < 4; ++i) { + if (*debug_raw_address(t, i) != t->cached_raw_value[i]) { + // zlog("!Iggy texture corruption found\n"); + // zlog("t=%p, t->raw_ptr=%p\n", t, t->raw_ptr); + // zlog("Cached values: %08x %08x %08x %08x\n", + // t->cached_raw_value[0], t->cached_raw_value[1], + // t->cached_raw_value[2], t->cached_raw_value[3]); + // zlog("Current values: %08x %08x %08x %08x\n", + // *debug_raw_address(t,0), *debug_raw_address(t,1), + // *debug_raw_address(t,2), *debug_raw_address(t,3)); + assert(0); + } + } + } + t = t->next; + } +} + +#if !defined(GDRAW_CORRUPTION_MASK) +#define GDRAW_CORRUPTION_MASK 0 +#endif +#define debug_check_raw_values_if(c, v) \ + if ((GDRAW_CORRUPTION_CHECK & ~GDRAW_CORRUPTION_MASK) == \ + ((v) & ~GDRAW_CORRUPTION_MASK)) \ + debug_check_raw_values(c); \ + else + +static void debug_set_raw_value(GDrawHandle* t) { + if (t->raw_ptr) { + int i; + for (i = 0; i < 4; ++i) + t->cached_raw_value[i] = *debug_raw_address(t, i); + t->has_check_value = true; + } +} + +static void debug_unset_raw_value(GDrawHandle* t) { + t->has_check_value = false; +} + +static void debug_check_value_is_unreferenced(GDrawHandleCache* c, void* ptr) { + GDrawHandle* t = c->head; + while (t) { + assert(t->raw_ptr != ptr); + t = t->next; + } + t = c->active; + while (t) { + assert(t->raw_ptr != ptr); + t = t->next; + } +} + +#else + +#define debug_check_overlap(c, p, len) +#define debug_set_raw_value(t) +#define debug_check_value_is_unreferenced(c, p) +#define debug_unset_raw_value(t) +#define debug_check_raw_values(c) +#define debug_check_raw_values_if(c, v) +#endif + +#if defined(SUPERDEBUG) +static void check_lists(GDrawHandleCache* c) { + GDrawHandle *sentinel, *t; + U32 state; + + // for all lists, verify that they are consistent and + // properly linked + for (state = 0; state < GDRAW_HANDLE_STATE__count; state++) { + S32 count = 0; + sentinel = &c->state[state]; + + assert(!sentinel->cache); + assert(sentinel->state == GDRAW_HANDLE_STATE_sentinel); + for (t = sentinel->next; t != sentinel; t = t->next) { + count++; + assert(t->cache == c); + assert(t->state == state); + assert(t->prev->next == t); + assert(t->next->prev == t); + assert(count < 50000); + } + } + + // for dead list, additionally verify that it's in the right + // order (namely, sorted by ascending fence index) + sentinel = &c->state[GDRAW_HANDLE_STATE_dead]; + for (t = sentinel->next; t != sentinel; t = t->next) { + assert(t->prev == sentinel || t->fence.value >= t->prev->fence.value); + } +} + +#include + +static const char* gdraw_StateName(U32 state) { + switch (state) { + case GDRAW_HANDLE_STATE_free: + return "free"; + case GDRAW_HANDLE_STATE_live: + return "live"; + case GDRAW_HANDLE_STATE_locked: + return "locked"; + case GDRAW_HANDLE_STATE_dead: + return "dead"; + case GDRAW_HANDLE_STATE_pinned: + return "pinned"; + case GDRAW_HANDLE_STATE_user_owned: + return "user-owned"; + case GDRAW_HANDLE_STATE_alloc: + return "alloc"; + case GDRAW_HANDLE_STATE_sentinel: + return ""; + default: + return "???"; + } +} + +#else +static RADINLINE void check_lists(GDrawHandleCache* c) { + RR_UNUSED_VARIABLE(c); +} +#endif + +static void gdraw_HandleTransitionInsertBefore(GDrawHandle* t, + GDrawHandleState new_state, + GDrawHandle* succ) { + check_lists(t->cache); + assert(t->state != + GDRAW_HANDLE_STATE_sentinel); // sentinels should never get here! + assert(t->state != (U32)new_state); // code should never call "transition" + // if it's not transitioning! + // unlink from prev state + t->prev->next = t->next; + t->next->prev = t->prev; + // add to list for new state + t->next = succ; + t->prev = succ->prev; + t->prev->next = t; + t->next->prev = t; +#if defined(SUPERDEBUG) + printf("GD %chandle %p %s->%s\n", t->cache->is_vertex ? 'v' : 't', t, + gdraw_StateName(t->state), gdraw_StateName(new_state)); +#endif + t->state = new_state; + check_lists(t->cache); +} + +static RADINLINE void gdraw_HandleTransitionTo(GDrawHandle* t, + GDrawHandleState new_state) { + gdraw_HandleTransitionInsertBefore(t, new_state, + &t->cache->state[new_state]); +} + +#if defined(GDRAW_MANAGE_MEM_TWOPOOL) +static rrbool gdraw_MigrateResource(GDrawHandle* t, GDrawStats* stats); +static void gdraw_res_free(GDrawHandle* t, GDrawStats* stats); +#endif + +static rrbool gdraw_HandleCacheLockStats(GDrawHandle* t, void* owner, + GDrawStats* stats) { + RR_UNUSED_VARIABLE(stats); + + // if the GPU memory is owned by the user, then we never spontaneously + // free it, and we can always report true. moreover, Iggy doesn't bother + // keeping 'owner' consistent in this case, so we must check this before + // verifying t->owner. + if (t->state == GDRAW_HANDLE_STATE_user_owned) return true; + + // if t->owner has changed, then Iggy is trying to lock an old version + // of this handle from before (the handle has already been recycled to + // point to a new resource) + if (t->owner != owner) return false; + + // otherwise, it's a valid resource and we should lock it until the next + // unlock call + assert(t->state == GDRAW_HANDLE_STATE_live || + t->state == GDRAW_HANDLE_STATE_locked || + t->state == GDRAW_HANDLE_STATE_pinned); + if (t->state == GDRAW_HANDLE_STATE_live) { +#if defined(GDRAW_MANAGE_MEM_TWOPOOL) + // if we defragmented this frame, we can't just make resources live; + // we need to migrate them to their new location. (which might fail + // if we don't have enough memory left in the new pool) + if (t->cache->did_defragment) { + if (!gdraw_MigrateResource(t, stats)) { + gdraw_res_free(t, stats); + return false; + } + } +#endif + gdraw_HandleTransitionTo(t, GDRAW_HANDLE_STATE_locked); + } + return true; +} + +static rrbool gdraw_HandleCacheLock(GDrawHandle* t, void* owner) { + return gdraw_HandleCacheLockStats(t, owner, NULL); +} + +static void gdraw_HandleCacheUnlock(GDrawHandle* t) { + assert(t->state == GDRAW_HANDLE_STATE_locked || + t->state == GDRAW_HANDLE_STATE_pinned || + t->state == GDRAW_HANDLE_STATE_user_owned); + if (t->state == GDRAW_HANDLE_STATE_locked) + gdraw_HandleTransitionTo(t, GDRAW_HANDLE_STATE_live); +} + +static void gdraw_HandleCacheUnlockAll(GDrawHandleCache* c) { + GDrawHandle* sentinel = &c->state[GDRAW_HANDLE_STATE_locked]; + while (sentinel->next != sentinel) + gdraw_HandleTransitionTo(sentinel->next, GDRAW_HANDLE_STATE_live); +} + +static void gdraw_HandleCacheInit(GDrawHandleCache* c, S32 num_handles, + S32 bytes) { + S32 i; + assert(num_handles > 0); + c->max_handles = num_handles; + c->total_bytes = bytes; + c->bytes_free = c->total_bytes; + c->is_vertex = false; + c->is_thrashing = false; + c->did_defragment = false; + for (i = 0; i < GDRAW_HANDLE_STATE__count; i++) { + c->state[i].owner = NULL; + c->state[i].cache = + NULL; // should never follow cache link from sentinels! + c->state[i].next = c->state[i].prev = &c->state[i]; +#if defined(GDRAW_MANAGE_MEM) + c->state[i].raw_ptr = NULL; +#endif + c->state[i].fence.value = 0; + c->state[i].bytes = 0; + c->state[i].state = GDRAW_HANDLE_STATE_sentinel; + } + for (i = 0; i < num_handles; ++i) { + c->handle[i].cache = c; + c->handle[i].prev = + (i == 0) ? &c->state[GDRAW_HANDLE_STATE_free] : &c->handle[i - 1]; + c->handle[i].next = (i == num_handles - 1) + ? &c->state[GDRAW_HANDLE_STATE_free] + : &c->handle[i + 1]; + c->handle[i].bytes = 0; + c->handle[i].state = GDRAW_HANDLE_STATE_free; +#if defined(GDRAW_MANAGE_MEM) + c->handle[i].raw_ptr = NULL; +#endif + } + c->state[GDRAW_HANDLE_STATE_free].next = &c->handle[0]; + c->state[GDRAW_HANDLE_STATE_free].prev = &c->handle[num_handles - 1]; + c->prev_frame_start.value = 0; + c->prev_frame_end.value = 0; +#if defined(GDRAW_MANAGE_MEM) + c->alloc = NULL; +#endif +#if defined(GDRAW_MANAGE_MEM_TWOPOOL) + c->alloc_other = NULL; +#endif + check_lists(c); +} + +static GDrawHandle* gdraw_HandleCacheAllocateBegin(GDrawHandleCache* c) { + GDrawHandle* free_list = &c->state[GDRAW_HANDLE_STATE_free]; + GDrawHandle* t = NULL; + if (free_list->next != free_list) { + t = free_list->next; + gdraw_HandleTransitionTo(t, GDRAW_HANDLE_STATE_alloc); + t->bytes = 0; + t->owner = 0; +#if defined(GDRAW_MANAGE_MEM) + t->raw_ptr = NULL; +#endif +#if defined(GDRAW_CORRUPTION_CHECK) + t->has_check_value = false; +#endif + } + return t; +} + +static void gdraw_HandleCacheAllocateEnd(GDrawHandle* t, S32 bytes, void* owner, + GDrawHandleState new_state) { + assert(t->cache); + assert(t->bytes == 0); + assert(t->owner == 0); + assert(t->state == GDRAW_HANDLE_STATE_alloc); + if (bytes == 0) + assert(new_state == GDRAW_HANDLE_STATE_user_owned); + else + assert(new_state == GDRAW_HANDLE_STATE_locked || + new_state == GDRAW_HANDLE_STATE_pinned); + t->bytes = bytes; + t->owner = owner; + t->cache->bytes_free -= bytes; + + gdraw_HandleTransitionTo(t, new_state); +} + +static void gdraw_HandleCacheFree(GDrawHandle* t) { + GDrawHandleCache* c = t->cache; + assert(t->state != GDRAW_HANDLE_STATE_alloc && + t->state != GDRAW_HANDLE_STATE_sentinel); + c->bytes_free += t->bytes; + t->bytes = 0; + t->owner = 0; +#if defined(GDRAW_MANAGE_MEM) + t->raw_ptr = 0; +#endif +#if defined(GDRAW_CORRUPTION_CHECK) + t->has_check_value = false; +#endif + gdraw_HandleTransitionTo(t, GDRAW_HANDLE_STATE_free); +} + +static void gdraw_HandleCacheAllocateFail(GDrawHandle* t) { + assert(t->state == GDRAW_HANDLE_STATE_alloc); + gdraw_HandleTransitionTo(t, GDRAW_HANDLE_STATE_free); +} + +static GDrawHandle* gdraw_HandleCacheGetLRU(GDrawHandleCache* c) { + // TransitionTo always inserts at the end, which means that the resources + // at the front of the LRU list are the oldest ones, since in-use resources + // will get appended on every transition from "locked" to "live". + GDrawHandle* sentinel = &c->state[GDRAW_HANDLE_STATE_live]; + return (sentinel->next != sentinel) ? sentinel->next : NULL; +} + +static void gdraw_HandleCacheTick(GDrawHandleCache* c, GDrawFence now) { + c->prev_frame_start = c->prev_frame_end; + c->prev_frame_end = now; + + // reset these flags every frame + c->is_thrashing = false; + c->did_defragment = false; +} + +#if defined(GDRAW_MANAGE_MEM) + +static void gdraw_HandleCacheInsertDead(GDrawHandle* t) { + GDrawHandle *s, *sentinel; + + assert(t->state == GDRAW_HANDLE_STATE_live || + t->state == GDRAW_HANDLE_STATE_locked || + t->state == GDRAW_HANDLE_STATE_pinned); + + // figure out where t belongs in the dead list in "chronological order" + // do this by finding its (chronological) successor s + sentinel = &t->cache->state[GDRAW_HANDLE_STATE_dead]; + s = sentinel->next; + while (s != sentinel && s->fence.value <= t->fence.value) s = s->next; + + // and then insert it there + gdraw_HandleTransitionInsertBefore(t, GDRAW_HANDLE_STATE_dead, s); +} + +#endif + +//////////////////////////////////////////////////////////////////////// +// +// Set transformation matrices +// + +// Our vertex shaders use this convention: +// world: our world matrices always look like this +// m00 m01 0 t0 +// m10 m11 0 t1 +// 0 0 0 d +// 0 0 0 1 +// +// we just store the first two rows and insert d +// in the first row, third column. our input position vectors are +// always (x,y,0,1) or (x,y,0,0), so we can still just use dp4 to +// compute final x/y. after that it's a single move to set the +// correct depth value. +// +// viewproj: our view-projection matrix is always just a 2D scale+translate, +// i.e. the matrix looks like this: +// +// p[0] 0 0 p[2] +// 0 p[1] 0 p[3] +// 0 0 1 0 +// 0 0 0 1 +// +// just store (p[0],p[1],p[2],p[3]) in a 4-component vector and the +// projection transform is a single multiply-add. +// +// The output is volatile since it's often in Write-Combined memory where we +// really don't want compiler reordering. + +static RADINLINE void gdraw_PixelSpace(volatile F32* RADRESTRICT vvec) { + // 1:1 pixel mapping - just identity since our "view space" is pixels + vvec[0] = 1.0f; + vvec[1] = 0.0f; + vvec[2] = 0.0f; + vvec[3] = 0.0f; + vvec[4] = 0.0f; + vvec[5] = 1.0f; + vvec[6] = 0.0f; + vvec[7] = 0.0f; +} + +static RADINLINE void gdraw_WorldSpace(volatile F32* RADRESTRICT vvec, + F32* RADRESTRICT world_to_pixel, + F32 depth, F32 misc) { + // World->pixel space transform is just a scale + vvec[0] = world_to_pixel[0]; + vvec[1] = 0.0f; + vvec[2] = depth; + vvec[3] = 0.0f; + vvec[4] = 0.0f; + vvec[5] = world_to_pixel[1]; + vvec[6] = misc; + vvec[7] = 0.0f; +} + +static RADINLINE void gdraw_ObjectSpace(volatile F32* RADRESTRICT vvec, + gswf_matrix* RADRESTRICT xform, + F32 depth, F32 misc) { + // Object->pixel transform is a 2D homogeneous matrix transform + F32 m00 = xform->m00; + F32 m01 = xform->m01; + F32 m10 = xform->m10; + F32 m11 = xform->m11; + F32 trans0 = xform->trans[0]; + F32 trans1 = xform->trans[1]; + + vvec[0] = m00; + vvec[1] = m01; + vvec[2] = depth; + vvec[3] = trans0; + vvec[4] = m10; + vvec[5] = m11; + vvec[6] = misc; + vvec[7] = trans1; +} + +static void gdraw_GetObjectSpaceMatrix(F32* RADRESTRICT mat, + gswf_matrix* RADRESTRICT xform, + F32* RADRESTRICT proj, F32 depth, + int out_col_major) { + int row = out_col_major ? 1 : 4; + int col = out_col_major ? 4 : 1; + + F32 xs = proj[0]; + F32 ys = proj[1]; + + mat[0 * row + 0 * col] = xform->m00 * xs; + mat[0 * row + 1 * col] = xform->m01 * xs; + mat[0 * row + 2 * col] = 0.0f; + mat[0 * row + 3 * col] = xform->trans[0] * xs + proj[2]; + + mat[1 * row + 0 * col] = xform->m10 * ys; + mat[1 * row + 1 * col] = xform->m11 * ys; + mat[1 * row + 2 * col] = 0.0f; + mat[1 * row + 3 * col] = xform->trans[1] * ys + proj[3]; + + mat[2 * row + 0 * col] = 0.0f; + mat[2 * row + 1 * col] = 0.0f; + mat[2 * row + 2 * col] = 0.0f; + mat[2 * row + 3 * col] = depth; + + mat[3 * row + 0 * col] = 0.0f; + mat[3 * row + 1 * col] = 0.0f; + mat[3 * row + 2 * col] = 0.0f; + mat[3 * row + 3 * col] = 1.0f; +} + +//////////////////////////////////////////////////////////////////////// +// +// Blurs +// +// symmetrically expand a rectangle by ex/ey pixels on both sides, then clamp to +// tile bounds +static void gdraw_ExpandRect(gswf_recti* out, gswf_recti const* in, S32 ex, + S32 ey, S32 w, S32 h) { + out->x0 = RR_MAX(in->x0 - ex, 0); + out->y0 = RR_MAX(in->y0 - ey, 0); + out->x1 = RR_MIN(in->x1 + ex, w); + out->y1 = RR_MIN(in->y1 + ey, h); +} + +static void gdraw_ShiftRect(gswf_recti* out, gswf_recti const* in, S32 dx, + S32 dy) { + out->x0 = in->x0 + dx; + out->y0 = in->y0 + dy; + out->x1 = in->x1 + dx; + out->y1 = in->y1 + dy; +} + +#define MAX_TAPS 9 // max # of bilinear samples in one 'convolution' step + +enum { + // basic shader family + VAR_tex0 = 0, + VAR_tex1, + VAR_cmul, + VAR_cadd, + VAR_focal, + + // filter family + VAR_filter_tex0 = 0, + VAR_filter_tex1, + VAR_filter_color, + VAR_filter_tc_off, + VAR_filter_tex2, + VAR_filter_clamp0, + VAR_filter_clamp1, + VAR_filter_color2, + MAX_VARS, + + // blur family + VAR_blur_tex0 = 0, + VAR_blur_tap, + VAR_blur_clampv, + + // color matrix family + VAR_colormatrix_tex0 = 0, + VAR_colormatrix_data, + + // ihud family + VAR_ihudv_worldview = 0, + VAR_ihudv_material, + VAR_ihudv_textmode, +}; + +typedef struct { + S32 w, h, frametex_width, frametex_height; + void (*BlurPass)(GDrawRenderState* r, int taps, float* data, gswf_recti* s, + float* tc, float height_max, float* clampv, + GDrawStats* gstats); +} GDrawBlurInfo; + +static GDrawTexture* gdraw_BlurPass(GDrawFunctions* g, GDrawBlurInfo* c, + GDrawRenderState* r, int taps, float* data, + gswf_recti* draw_bounds, + gswf_recti* sample_bounds, + GDrawStats* gstats) { + F32 tc[4]; + F32 clamp[4]; + F32 t = 0; + F32 texel_scale_s = 1.0f / c->frametex_width; + F32 texel_scale_t = 1.0f / c->frametex_height; + S32 i; + for (i = 0; i < taps; ++i) t += data[4 * i + 2]; + assert(t >= 0.99f && t <= 1.01f); + + tc[0] = texel_scale_s * draw_bounds->x0; + tc[1] = texel_scale_t * draw_bounds->y0; + tc[2] = texel_scale_s * draw_bounds->x1; + tc[3] = texel_scale_t * draw_bounds->y1; + + // sample_bounds is (x0,y0) inclusive, (x1,y1) exclusive + // texel centers are offset by 0.5 from integer coordinates and we don't + // want to sample outside sample_bounds + clamp[0] = texel_scale_s * (sample_bounds->x0 + 0.5f); + clamp[1] = texel_scale_t * (sample_bounds->y0 + 0.5f); + clamp[2] = texel_scale_s * (sample_bounds->x1 - 0.5f); + clamp[3] = texel_scale_t * (sample_bounds->y1 - 0.5f); + + if (!g->TextureDrawBufferBegin( + draw_bounds, GDRAW_TEXTURE_FORMAT_rgba32, + GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_color | + GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_alpha, + 0, gstats)) + return r->tex[0]; + + c->BlurPass(r, taps, data, draw_bounds, tc, (F32)c->h / c->frametex_height, + clamp, gstats); + return g->TextureDrawBufferEnd(gstats); +} + +static GDrawTexture* gdraw_BlurPassDownsample( + GDrawFunctions* g, GDrawBlurInfo* c, GDrawRenderState* r, int taps, + float* data, gswf_recti* draw_bounds, int axis, int divisor, int tex_w, + int tex_h, gswf_recti* sample_bounds, GDrawStats* gstats) { + S32 i; + F32 t = 0; + F32 tc[4]; + F32 clamp[4]; + F32 texel_scale_s = 1.0f / tex_w; + F32 texel_scale_t = 1.0f / tex_h; + gswf_recti z; + + for (i = 0; i < taps; ++i) t += data[4 * i + 2]; + assert(t >= 0.99f && t <= 1.01f); + + // following must be integer divides! + if (axis == 0) { + z.x0 = draw_bounds->x0 / divisor; + z.x1 = (draw_bounds->x1 - 1) / divisor + 1; + z.y0 = draw_bounds->y0; + z.y1 = draw_bounds->y1; + + tc[0] = ((z.x0 - 0.5f) * divisor + 0.5f) * texel_scale_s; + tc[2] = ((z.x1 - 0.5f) * divisor + 0.5f) * texel_scale_s; + tc[1] = z.y0 * texel_scale_t; + tc[3] = z.y1 * texel_scale_t; + } else { + z.x0 = draw_bounds->x0; + z.x1 = draw_bounds->x1; + z.y0 = draw_bounds->y0 / divisor; + z.y1 = (draw_bounds->y1 - 1) / divisor + 1; + + tc[0] = z.x0 * texel_scale_s; + tc[2] = z.x1 * texel_scale_s; + tc[1] = ((z.y0 - 0.5f) * divisor + 0.5f) * texel_scale_t; + tc[3] = ((z.y1 - 0.5f) * divisor + 0.5f) * texel_scale_t; + } + + if (!g->TextureDrawBufferBegin( + &z, GDRAW_TEXTURE_FORMAT_rgba32, + GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_color | + GDRAW_TEXTUREDRAWBUFFER_FLAGS_needs_alpha, + 0, gstats)) + return r->tex[0]; + + clamp[0] = texel_scale_s * (sample_bounds->x0 + 0.5f); + clamp[1] = texel_scale_t * (sample_bounds->y0 + 0.5f); + clamp[2] = texel_scale_s * (sample_bounds->x1 - 0.5f); + clamp[3] = texel_scale_t * (sample_bounds->y1 - 0.5f); + + assert(clamp[0] <= clamp[2]); + assert(clamp[1] <= clamp[3]); + + c->BlurPass(r, taps, data, &z, tc, (F32)c->h / c->frametex_height, clamp, + gstats); + return g->TextureDrawBufferEnd(gstats); +} + +#define unmap(t, a, b) (((t) - (a)) / (F32)((b) - (a))) +#define linear_remap(t, a, b, c, d) ((c) + unmap(t, a, b) * ((d) - (c))) + +static void gdraw_BlurAxis(S32 axis, GDrawFunctions* g, GDrawBlurInfo* c, + GDrawRenderState* r, F32 blur_width, F32 texel, + gswf_recti* draw_bounds, gswf_recti* sample_bounds, + GDrawTexture* protect, GDrawStats* gstats) { + GDrawTexture* t; + F32 data[MAX_TAPS][4]; + S32 off_axis = 1 - axis; + S32 w = ((S32)ceil((blur_width - 1) / 2)) * 2 + + 1; // 1.2 => 3, 2.8 => 3, 3.2 => 5 + F32 edge_weight = + 1 - (w - blur_width) / 2; // 3 => 0 => 1; 1.2 => 1.8 => 0.9 => 0.1 + F32 inverse_weight = 1.0f / blur_width; + + w = ((w - 1) >> 1) + + 1; // 3 => 2, 5 => 3, 7 => 4 (number of texture samples) + + if (!r->tex[0]) return; + + // horizontal filter + if (w > 1) { + if (w <= MAX_TAPS) { + // we have enough taps to just do it + // use 'w' taps + S32 i, expand; + + // just go through and place all the taps in the right place + + // if w is 2 (sample from -1,0,1) + // 0 => -0.5 + // 1 => 1 + + // if w is 3: + // 0 => -1.5 samples from -2,-1 + // 1 => 0.5 samples from 0,1 + // 2 => 2 samples from 2 + + // if w is 4: + // 0 => -2.5 samples from -3,-2 + // 1 => -0.5 samples from -1,0 + // 2 => 1.5 samples from 1,2 + // 3 => 3 samples from 3 + + for (i = 0; i < w; ++i) { + // first texsample samples from -w+1 and -w+2, e.g. w=2 => + // -1,0,1 + data[i][axis] = (-w + 1.5f + i * 2) * texel; + data[i][off_axis] = 0; + data[i][2] = 2 * inverse_weight; // 2 full-weight samples + data[i][3] = 0; + } + // now reweight the last one + data[i - 1][axis] = (w - 1) * texel; + data[i - 1][2] = edge_weight * inverse_weight; + // now reweight the first one + // (ew*0 + 1*1)/(1+ew) = 1/(1+ew) + data[0][axis] = (-w + 1.0f + 1 / (edge_weight + 1)) * texel; + data[0][2] = (edge_weight + 1) * inverse_weight; + + expand = w - 1; + gdraw_ExpandRect(draw_bounds, draw_bounds, axis ? 0 : expand, + axis ? expand : 0, c->w, c->h); + + t = gdraw_BlurPass(g, c, r, w, data[0], draw_bounds, sample_bounds, + gstats); + if (r->tex[0] != protect && r->tex[0] != t) + g->FreeTexture(r->tex[0], 0, gstats); + r->tex[0] = t; + gdraw_ExpandRect(sample_bounds, draw_bounds, 1, 1, c->w, + c->h); // for next pass + } else { + // @OPTIMIZE: for symmetrical blurs we can get a 2-wide blur in the + // *off* axis at the same time we get N-wide in the on axis, which + // could double our max width + S32 i, expand; + // @HACK: this is really a dumb way to do it, i kind of had a brain + // fart, you could get the exact same result by just doing the + // downsample the naive way and then the final sample uses texture + // samples spaced by a texel rather than spaced by two texels -- the + // current method is just as inefficient, it just puts the + // inefficiency in the way the downsampled texture is + // self-overlapping, so the downsampled texture is twice as larger + // as it should be. + + // we COULD be exact by generating a mipmap, then sampling some + // number of samples from the mipmap and some from the original, but + // that would require being polyphase. instead we just are + // approximate. the mipmap weights the edge pixels by one half and + // overlaps them by one sample, so then in phase two we sample N + // slightly-overlapping mipmap samples + // + // instead we do the following. + // divide the source data up into clusters that are K samples + // long. + // ...K0... ...K1... ...K2... ...K3... + // + // Suppose K[i] is the average of all the items in cluster i. + // + // We compute a downsampled texture where T[i] = K[i] + K[i+1]. + // + // Now, we sample N taps from adjacent elements of T, allowing the + // texture unit to bilerp. Suppose a given sample falls at + // coordinate i with sub-position p. Then tap #j will compute: + // T[i+j]*(1-p) + T[i+j+1]*p + // But tap #j+1 will compute: + // T[i+j+1]*(1-p) + T[i+j+2]*p + // so we end up computing: + // sum(T[i+j]) except for the end samples. + // + // So, how do we create these initial clusters? That's easy, we use + // K taps to sample 2K texels. + // + // What value of k do we use? Well, we're constrained to using + // MAX_TAPS on each pass. So at the high end, we're bounded by: + // K = MAX_TAPS + // S = MAX_TAPS (S is number of samples in second pass) + // S addresses S*2-1 texels of T, and each texel adds K more + // samples, so (ignoring the edges) we basically have w = K*S + + // if w == MAX_TAPS*MAX_TAPS, then k = MAX_TAPS + // if w == MAX_TAPS+1, then k = 2 + // + // suppose we have 3 taps, then we can sample 5 samples in one pass, + // so then our max coverage is 25 samples, or a filter width of 13. + // with 7 taps, we sample 13 samples in one pass, max coverage is + // 13*13 samples or (13*13-1)/2 width, which is ((2T-1)*(2T-1)-1)/2 + // or (4T^2 - 4T + 1 -1)/2 or 2T^2 - 2T or 2T*(T-1) + S32 w_mip = (S32)ceil(linear_remap( + w, MAX_TAPS + 1, MAX_TAPS * MAX_TAPS, 2, MAX_TAPS)); + S32 downsample = w_mip; + F32 sample_spacing = texel; + if (downsample < 2) downsample = 2; + if (w_mip > MAX_TAPS) { + // if w_mip > MAX_TAPS, then we ought to use more than one + // mipmap pass, but since that's a huge filter ( > 80 pixels) + // let's just try subsampling and see if it's good enough. + sample_spacing *= w_mip / MAX_TAPS; + w_mip = MAX_TAPS; + } else { + assert(w / downsample <= MAX_TAPS); + } + inverse_weight = 1.0f / (2 * w_mip); + for (i = 0; i < w_mip; ++i) { + data[i][axis] = (-w_mip + 1 + i * 2 + 0.5f) * sample_spacing; + data[i][off_axis] = 0; + data[i][2] = 2 * inverse_weight; + data[i][3] = 0; + } + w = w * 2 / w_mip; + + // @TODO: compute the correct bboxes for this size + // the downsampled texture samples from -w_mip+1 to w_mip + // the sample from within that samples w spots within that, + // or w/2 of those, but they're overlapping by 50%. + // so if a sample is a point i, it samples from the original + // from -w_mip+1 to w_mip + i*w_mip. + // So then the minimum is: -w_mip+1 + (w/2)*w_mip, and + // the maximum is w_mip + (w/2)*w_mip + expand = (((w + 1) >> 1) + 1) * w_mip + 1; + gdraw_ExpandRect(draw_bounds, draw_bounds, axis ? 0 : expand, + axis ? expand : 0, c->w, c->h); + + t = gdraw_BlurPassDownsample( + g, c, r, w_mip, data[0], draw_bounds, axis, downsample, + c->frametex_width, c->frametex_height, sample_bounds, gstats); + if (r->tex[0] != protect && r->tex[0] != t) + g->FreeTexture(r->tex[0], 0, gstats); + r->tex[0] = t; + gdraw_ExpandRect(sample_bounds, draw_bounds, 1, 1, c->w, c->h); + if (!r->tex[0]) return; + + // now do a regular blur pass sampling from that + // the raw texture now contains 'downsample' samples per texel + if (w > 2 * MAX_TAPS) { + sample_spacing = texel * (w - 1) / (2 * MAX_TAPS - 1); + w = 2 * MAX_TAPS; + } else { + sample_spacing = texel; + } + // sample_spacing *= 1.0f/2; + assert(w >= 2 && w <= 2 * MAX_TAPS); + + if (w & 1) { + // we just want to evenly weight even-spaced samples + inverse_weight = 1.0f / w; + + // just go through and place all the taps in the right place + + w = (w + 1) >> 1; + for (i = 0; i < w; ++i) { + data[i][axis] = (-w + 1.0f + 0.5f + i * 2) * sample_spacing; + data[i][off_axis] = 0; + data[i][2] = 2 * inverse_weight; // 2 full-weight samples + data[i][3] = 0; + } + + // fix up the last tap + + // the following test is always true, but we're testing it here + // explicitly so as to make VS2012's static analyzer not + // complain + if (i > 0) { + data[i - 1][axis] = + (-w + 1.0f + (i - 1) * 2) * sample_spacing; + data[i - 1][2] = inverse_weight; + } + } else { + // we just want to evenly weight even-spaced samples + inverse_weight = 1.0f / w; + + // just go through and place all the taps in the right place + w >>= 1; + for (i = 0; i < w; ++i) { + data[i][axis] = (-w + 1.0f + i * 2) * sample_spacing; + data[i][off_axis] = 0; + data[i][2] = 2 * inverse_weight; // 2 full-weight samples + data[i][3] = 0; + } + } + + t = gdraw_BlurPassDownsample( + g, c, r, w, data[0], draw_bounds, axis, 1, + axis == 0 ? c->frametex_width * downsample : c->frametex_width, + axis == 1 ? c->frametex_height * downsample + : c->frametex_height, + sample_bounds, gstats); + if (r->tex[0] != protect && r->tex[0] != t) + g->FreeTexture(r->tex[0], 0, gstats); + r->tex[0] = t; + gdraw_ExpandRect(sample_bounds, draw_bounds, 1, 1, c->w, c->h); + } + } +} + +static void gdraw_Blur(GDrawFunctions* g, GDrawBlurInfo* c, GDrawRenderState* r, + gswf_recti* draw_bounds, gswf_recti* sample_bounds, + GDrawStats* gstats) { + S32 p; + GDrawTexture* protect = r->tex[0]; + gswf_recti sbounds; + + // compute texel offset size + F32 dx = 1.0f / c->frametex_width; + F32 dy = 1.0f / c->frametex_height; + + // blur = 1 => 1 tap + // blur = 1.2 => 3 taps (0.1, 1, 0.1) + // blur = 2.2 => 3 taps (0.6, 1, 0.6) + // blur = 2.8 => 3 taps (0.9, 1, 0.9) + // blur = 3 => 3 taps (1 , 1, 1 ) + // blur = 3.2 => 5 taps (0.1, 1, 1, 1, 0.1) + + // S32 w = ((S32) ceil((r->blur_x-1)/2))*2+1; // 1.2 => (1.2-1)/2 => 0.1 + // => 1.0 => 1 => 2 => 3 S32 h = ((S32) ceil((r->blur_y-1)/2))*2+1; // 3 + // => (3-1)/2 => 1.0 => 1 => 2 => 3 + + // gdraw puts 1 border pixel around everything when producing rendertargets + // and we use this so expand the input sample bounds accordingly + gdraw_ExpandRect(&sbounds, sample_bounds, 1, 1, c->w, c->h); + + for (p = 0; p < r->blur_passes; ++p) { + { + // do the filter separably + gdraw_BlurAxis(0, g, c, r, r->blur_x, dx, draw_bounds, &sbounds, + protect, gstats); + gdraw_BlurAxis(1, g, c, r, r->blur_y, dy, draw_bounds, &sbounds, + protect, gstats); + } + } +} + +#if defined(GDRAW_MANAGE_MEM) + +static void make_pool_aligned(void** start, S32* num_bytes, U32 alignment) { + UINTa addr_orig = (UINTa)*start; + UINTa addr_aligned = (addr_orig + alignment - 1) & ~((UINTa)alignment - 1); + + if (addr_aligned != addr_orig) { + S32 diff = (S32)(addr_aligned - addr_orig); + if (*num_bytes < diff) { + *start = NULL; + *num_bytes = 0; + return; + } else { + *start = (void*)addr_aligned; + *num_bytes -= diff; + } + } +} + +// Very simple arena allocator +typedef struct { + U8* begin; + U8* current; + U8* end; +} GDrawArena; + +static void gdraw_arena_init(GDrawArena* arena, void* start, U32 size) { + arena->begin = (U8*)start; + arena->current = (U8*)start; + arena->end = (U8*)start + size; +} + +static GDRAW_MAYBE_UNUSED void gdraw_arena_reset(GDrawArena* arena) { + arena->current = arena->begin; +} + +static void* gdraw_arena_alloc(GDrawArena* arena, U32 size, U32 align) { + UINTa start_addr = + ((UINTa)arena->current + align - 1) & ~((UINTa)align - 1); + U8* ptr = (U8*)start_addr; + UINTa remaining = arena->end - arena->current; + UINTa total_size = (ptr - arena->current) + size; + if (remaining < total_size) // doesn't fit + return NULL; + + arena->current = ptr + size; + return ptr; +} + +// Allocator for graphics memory. +// Graphics memory is assumed to be write-combined and slow to read for the +// CPU, so we keep all heap management information separately in main memory. +// +// There's a constant management of about 1k (2k for 64bit) to create a heap, +// plus a per-block overhead. The maximum number of blocks the allocator can +// ever use is bounded by 2*max_allocs+1; since GDraw manages a limited +// amount of handles, max_allocs is a known value at heap creation time. +// +// The allocator uses a best-fit heuristic to minimize fragmentation. +// Currently, there are no size classes or other auxiliary data structures to +// speed up this process, since the number of free blocks at any point in time +// is assumed to be fairly low. +// +// The allocator maintains a number of invariants: +// - The free list and physical block list are proper double-linked lists. +// (i.e. block->next->prev == block->prev->next == block) +// - All allocated blocks are also kept in a hash table, indexed by their +// pointer (to allow free to locate the corresponding block_info quickly). +// There's a single-linked, NULL-terminated list of elements in each hash +// bucket. +// - The physical block list is ordered. It always contains all currently +// active blocks and spans the whole managed memory range. There are no +// gaps between blocks, and all blocks have nonzero size. +// - There are no two adjacent free blocks; if two such blocks would be created, +// they are coalesced immediately. +// - The maximum number of blocks that could ever be necessary is allocated +// on initialization. All block_infos not currently in use are kept in a +// single-linked, NULL-terminated list of unused blocks. Every block is either +// in the physical block list or the unused list, and the total number of +// blocks is constant. +// These invariants always hold before and after an allocation/free. + +#if !defined(GFXALLOC_ASSERT) +#define GFXALLOC_ASSERT(x) +#endif + +typedef struct gfx_block_info { + U8* ptr; + gfx_block_info *prev, + *next; // for free blocks this is the free list, for allocated blocks + // it's a (single-linked!) list of elements in the corresponding + // hash bucket + gfx_block_info *prev_phys, *next_phys; + U32 is_free : 1; + U32 is_unused : 1; + U32 size : 30; +} gfx_block_info; +// 24 bytes/block on 32bit, 48 bytes/block on 64bit. + +#define GFXALLOC_HASH_SIZE 256 + +typedef struct gfx_allocator { + U8* mem_base; + U8* mem_end; + U32 max_allocs; + U32 block_align; + U32 block_shift; + S32 actual_bytes_free; + +#if defined(GFXALLOC_CHECK) + int num_blocks; + int num_unused; + int num_alloc; + int num_free; +#endif + + GDrawHandleCache* cache; + + gfx_block_info* unused_list; // next unused block_info (single-linked list) + gfx_block_info* hash[GFXALLOC_HASH_SIZE]; // allocated blocks + gfx_block_info blocks[1]; // first block is head of free list AND head of + // physical block list (sentinel) +} gfx_allocator; +// about 1k (32bit), 2k (64bit) with 256 hash buckets (the default). dominated +// by hash table. + +#if defined(GFXALLOC_CHECK) +#define GFXALLOC_IF_CHECK(x) x +#else +#define GFXALLOC_IF_CHECK(x) +#endif + +static U32 gfxalloc_get_hash_code(gfx_allocator* alloc, void* ptr) { + U32 a = (U32)(((U8*)ptr - alloc->mem_base) >> alloc->block_shift); + + // integer hash function by Bob Jenkins + // (http://burtleburtle.net/bob/hash/integer.html) I use this function + // because integer mults are slow on PPC and large literal constants take + // multiple instrs to set up on all RISC CPUs. + a -= (a << 6); + a ^= (a >> 17); + a -= (a << 9); + a ^= (a << 4); + a -= (a << 3); + a ^= (a << 10); + a ^= (a >> 15); + + return a & (GFXALLOC_HASH_SIZE - 1); +} + +#if defined(SUPERDEBUG) || defined(COMPLETE_DEBUG) +#include +#define MAX_REGIONS 8192 +typedef struct { + U32 begin, end; +} gfx_region; +static gfx_region region[MAX_REGIONS]; + +static int region_sort(const void* p, const void* q) { + U32 a = *(U32*)p; + U32 b = *(U32*)q; + if (a < b) return -1; + if (a > b) return 1; + return 0; +} + +static void gfxalloc_check1(gfx_allocator* alloc) { + assert(alloc->max_allocs * 2 + 1 < MAX_REGIONS); + int i, n = 0; + for (i = 0; i < GFXALLOC_HASH_SIZE; ++i) { + gfx_block_info* b = alloc->hash[i]; + while (b) { + region[n].begin = (UINTa)b->ptr; + region[n].end = region[n].begin + b->size; + ++n; + b = b->next; + } + } + gfx_block_info* b = alloc->blocks[0].next; + while (b != &alloc->blocks[0]) { + region[n].begin = (UINTa)b->ptr; + region[n].end = region[n].begin + b->size; + ++n; + b = b->next; + } + qsort(region, n, sizeof(region[0]), region_sort); + for (i = 0; i + 1 < n; ++i) { + assert(region[i].end == region[i + 1].begin); + } +} +#else +#define gfxalloc_check1(a) +#endif + +#if defined(COMPLETE_DEBUG) +static void verify_against_blocks(int num_regions, void* vptr, S32 len) { + U32* ptr = (U32*)vptr; + // binary search for ptr amongst regions + S32 s = 0, e = num_regions - 1; + assert(len != 0); + while (s < e) { + S32 i = (s + e + 1) >> 1; + // invariant: b[s] <= ptr <= b[e] + if (region[i].begin <= (UINTa)ptr) + s = i; + else + e = i - 1; + + // consider cases: + // s=0,e=1: i = 0, how do we get i to be 1? + } + // at this point, s >= e + assert(s < num_regions && region[s].begin == (UINTa)ptr && + (UINTa)ptr + len <= region[s].end); +} + +static void debug_complete_check(gfx_allocator* alloc, void* ptr, S32 len, + void* skip) { + GDrawHandleCache* c = alloc->cache; + assert(alloc->max_allocs * 2 + 1 < MAX_REGIONS); + int i, n = 0; + for (i = 0; i < GFXALLOC_HASH_SIZE; ++i) { + gfx_block_info* b = alloc->hash[i]; + while (b) { + region[n].begin = (UINTa)b->ptr; + region[n].end = region[n].begin + b->size; + ++n; + b = b->next; + } + } + gfx_block_info* b = alloc->blocks[0].next; + while (b != &alloc->blocks[0]) { + region[n].begin = (UINTa)b->ptr; + region[n].end = region[n].begin + b->size; + ++n; + b = b->next; + } + for (i = 0; i < n; ++i) assert(region[i].end > region[i].begin); + qsort(region, n, sizeof(region[0]), region_sort); + for (i = 0; i + 1 < n; ++i) { + assert(region[i].end == region[i + 1].begin); + } + + if (ptr) verify_against_blocks(n, ptr, len); + + if (c) { + GDrawHandle* t = c->head; + while (t) { + if (t->raw_ptr && t->raw_ptr != skip) + verify_against_blocks(n, t->raw_ptr, t->bytes); + t = t->next; + } + t = c->active; + while (t) { + if (t->raw_ptr && t->raw_ptr != skip) + verify_against_blocks(n, t->raw_ptr, t->bytes); + t = t->next; + } + } +} +#else +#define debug_complete_check(a, p, len, s) +#endif + +#if defined(GFXALLOC_CHECK) +static void gfxalloc_check2(gfx_allocator* alloc) { + int n = 0; + gfx_block_info* b = alloc->unused_list; + while (b) { + ++n; + b = b->next; + } + GFXALLOC_ASSERT(n == alloc->num_unused); + b = alloc->blocks->next; + n = 0; + while (b != alloc->blocks) { + ++n; + b = b->next; + } + GFXALLOC_ASSERT(n == alloc->num_free); + GFXALLOC_ASSERT(alloc->num_blocks == + alloc->num_unused + alloc->num_free + alloc->num_alloc); +} +#define gfxalloc_check(a) \ + do { \ + gfxalloc_check1(a); \ + gfxalloc_check2(a); \ + } while (0) +#else +#define gfxalloc_check2(a) +#define gfxalloc_check(a) +#endif + +static gfx_block_info* gfxalloc_pop_unused(gfx_allocator* alloc) { + GFXALLOC_ASSERT(alloc->unused_list != NULL); + GFXALLOC_ASSERT(alloc->unused_list->is_unused); + GFXALLOC_IF_CHECK(GFXALLOC_ASSERT(alloc->num_unused);) + + gfx_block_info* b = alloc->unused_list; + alloc->unused_list = b->next; + GFXALLOC_ASSERT(alloc->unused_list); + b->is_unused = 0; + GFXALLOC_IF_CHECK(--alloc->num_unused;) + return b; +} + +static void gfxalloc_push_unused(gfx_allocator* alloc, gfx_block_info* b) { + GFXALLOC_ASSERT(!b->is_unused); + b->is_unused = 1; + b->next = alloc->unused_list; + alloc->unused_list = b; + GFXALLOC_IF_CHECK(++alloc->num_unused); +} + +static void gfxalloc_add_free(gfx_allocator* alloc, gfx_block_info* b) { + gfx_block_info* head = alloc->blocks; + + b->is_free = 1; + b->next = head->next; + b->prev = head; + head->next->prev = b; + head->next = b; + GFXALLOC_IF_CHECK(++alloc->num_free;) +} + +static void gfxalloc_rem_free(gfx_allocator* alloc, gfx_block_info* b) { + RR_UNUSED_VARIABLE(alloc); + b->is_free = 0; + b->prev->next = b->next; + b->next->prev = b->prev; + GFXALLOC_IF_CHECK(--alloc->num_free;) +} + +static void gfxalloc_split_free(gfx_allocator* alloc, gfx_block_info* b, + U32 pos) { + gfx_block_info* n = gfxalloc_pop_unused(alloc); + + GFXALLOC_ASSERT(b->is_free); + GFXALLOC_ASSERT(pos > 0 && pos < b->size); + + // set up new free block + n->ptr = b->ptr + pos; + n->prev_phys = b; + n->next_phys = b->next_phys; + n->next_phys->prev_phys = n; + n->size = b->size - pos; + assert(n->size != 0); + gfxalloc_add_free(alloc, n); + + // fix original block + b->next_phys = n; + b->size = pos; + assert(b->size != 0); + + debug_complete_check(alloc, n->ptr, n->size, 0); + debug_complete_check(alloc, b->ptr, b->size, 0); +} + +static gfx_allocator* gfxalloc_create(void* mem, U32 mem_size, U32 align, + U32 max_allocs) { + gfx_allocator* a; + U32 i, max_blocks, size; + + if (!align || + (align & (align - 1)) != 0) // align must be >0 and a power of 2 + return NULL; + + // for <= max_allocs live allocs, there's <= 2*max_allocs+1 blocks. worst + // case: [free][used][free] .... [free][used][free] + max_blocks = max_allocs * 2 + 1; + size = sizeof(gfx_allocator) + max_blocks * sizeof(gfx_block_info); + a = (gfx_allocator*)IggyGDrawMalloc(size); + if (!a) return NULL; + + memset(a, 0, size); + + GFXALLOC_IF_CHECK(a->num_blocks = max_blocks;) + GFXALLOC_IF_CHECK(a->num_alloc = 0;) + GFXALLOC_IF_CHECK(a->num_free = 1;) + GFXALLOC_IF_CHECK(a->num_unused = max_blocks - 1;) + + GFXALLOC_IF_CHECK( + GFXALLOC_ASSERT(a->num_blocks == + a->num_alloc + a->num_free + a->num_unused);) + GFXALLOC_IF_CHECK(GFXALLOC_ASSERT(a->num_free <= a->num_blocks + 1);) + + a->actual_bytes_free = mem_size; + a->mem_base = (U8*)mem; + a->mem_end = a->mem_base + mem_size; + a->max_allocs = max_allocs; + a->block_align = align; + a->block_shift = 0; + while ((1u << a->block_shift) < a->block_align) a->block_shift++; + + // init sentinel block + a->blocks[0].prev = a->blocks[0].next = + &a->blocks[1]; // point to free block + a->blocks[0].prev_phys = a->blocks[0].next_phys = &a->blocks[1]; // same + + // init first free block + a->blocks[1].ptr = a->mem_base; + a->blocks[1].prev = a->blocks[1].next = &a->blocks[0]; + a->blocks[1].prev_phys = a->blocks[1].next_phys = &a->blocks[0]; + a->blocks[1].is_free = 1; + a->blocks[1].size = mem_size; + + // init "unused" list + a->unused_list = a->blocks + 2; + for (i = 2; i < max_blocks; i++) { + a->blocks[i].is_unused = 1; + a->blocks[i].next = a->blocks + (i + 1); + } + a->blocks[i].is_unused = 1; + + gfxalloc_check(a); + debug_complete_check(a, NULL, 0, 0); + return a; +} + +static void* gfxalloc_alloc(gfx_allocator* alloc, U32 size_in_bytes) { + gfx_block_info *cur, *best = NULL; + U32 i, best_wasted = ~0u; + U32 size = size_in_bytes; + debug_complete_check(alloc, NULL, 0, 0); + gfxalloc_check(alloc); + GFXALLOC_IF_CHECK(GFXALLOC_ASSERT(alloc->num_blocks == + alloc->num_alloc + alloc->num_free + + alloc->num_unused);) + GFXALLOC_IF_CHECK( + GFXALLOC_ASSERT(alloc->num_free <= alloc->num_blocks + 1);) + + // round up to multiple of our block alignment + size = (size + alloc->block_align - 1) & ~(alloc->block_align - 1); + assert(size >= size_in_bytes); + assert(size != 0); + + // find best fit among all free blocks. this is O(N)! + for (cur = alloc->blocks[0].next; cur != alloc->blocks; cur = cur->next) { + if (cur->size >= size) { + U32 wasted = cur->size - size; + if (wasted < best_wasted) { + best_wasted = wasted; + best = cur; + if (!wasted) break; // can't get better than perfect + } + } + } + + // return the best fit, if we found any suitable block + if (best) { + debug_check_overlap(alloc->cache, best->ptr, best->size); + // split off allocated part + if (size != best->size) gfxalloc_split_free(alloc, best, size); + debug_complete_check(alloc, best->ptr, best->size, 0); + + // remove from free list and add to allocated hash table + GFXALLOC_ASSERT(best->size == size); + gfxalloc_rem_free(alloc, best); + + i = gfxalloc_get_hash_code(alloc, best->ptr); + best->next = alloc->hash[i]; + alloc->hash[i] = best; + alloc->actual_bytes_free -= size; + GFXALLOC_ASSERT(alloc->actual_bytes_free >= 0); + + GFXALLOC_IF_CHECK(++alloc->num_alloc;) + GFXALLOC_IF_CHECK(GFXALLOC_ASSERT(alloc->num_blocks == + alloc->num_alloc + alloc->num_free + + alloc->num_unused);) + GFXALLOC_IF_CHECK( + GFXALLOC_ASSERT(alloc->num_free <= alloc->num_blocks + 1);) + + debug_complete_check(alloc, best->ptr, best->size, 0); + gfxalloc_check(alloc); + debug_check_overlap(alloc->cache, best->ptr, best->size); + return best->ptr; + } else + return NULL; // not enough space! +} + +static void gfxalloc_free(gfx_allocator* alloc, void* ptr) { + GFXALLOC_IF_CHECK(GFXALLOC_ASSERT(alloc->num_blocks == + alloc->num_alloc + alloc->num_free + + alloc->num_unused);) + GFXALLOC_IF_CHECK( + GFXALLOC_ASSERT(alloc->num_free <= alloc->num_blocks + 1);) + + // find the block in the hash table + gfx_block_info *b, *t, **prevnext; + U32 i = gfxalloc_get_hash_code(alloc, ptr); + + prevnext = &alloc->hash[i]; + b = alloc->hash[i]; + + while (b) { + if (b->ptr == ptr) break; + prevnext = &b->next; + b = b->next; + } + + if (!b) { + GFXALLOC_ASSERT(0); // trying to free a non-allocated block + return; + } + + debug_complete_check(alloc, b->ptr, b->size, 0); + GFXALLOC_IF_CHECK(--alloc->num_alloc;) + + // remove it from the hash table + *prevnext = b->next; + + alloc->actual_bytes_free += b->size; + + // merge with previous block if it's free, else add it to free list + t = b->prev_phys; + if (t->is_free) { + t->size += b->size; + t->next_phys = b->next_phys; + t->next_phys->prev_phys = t; + gfxalloc_push_unused(alloc, b); + b = t; + } else + gfxalloc_add_free(alloc, b); + + // try to merge with next block + t = b->next_phys; + if (t->is_free) { + b->size += t->size; + b->next_phys = t->next_phys; + t->next_phys->prev_phys = b; + gfxalloc_rem_free(alloc, t); + gfxalloc_push_unused(alloc, t); + } + debug_complete_check(alloc, 0, 0, ptr); + gfxalloc_check(alloc); + GFXALLOC_IF_CHECK(GFXALLOC_ASSERT(alloc->num_blocks == + alloc->num_alloc + alloc->num_free + + alloc->num_unused);) + GFXALLOC_IF_CHECK( + GFXALLOC_ASSERT(alloc->num_free <= alloc->num_blocks + 1);) +} + +#if defined(GDRAW_MANAGE_MEM_TWOPOOL) + +static rrbool gfxalloc_is_empty(gfx_allocator* alloc) { + gfx_block_info* first_free = alloc->blocks[0].next; + + // we want to check whether there's exactly one free block that + // covers the entire pool. + if (first_free == alloc->blocks) // 0 free blocks + return false; + + if (first_free->next != alloc->blocks) // >1 free block + return false; + + return first_free->ptr == alloc->mem_base && + first_free->ptr + first_free->size == alloc->mem_end; +} + +static rrbool gfxalloc_mem_contains(gfx_allocator* alloc, void* ptr) { + return alloc->mem_base <= (U8*)ptr && (U8*)ptr < alloc->mem_end; +} + +#endif + +#if defined(GDRAW_DEBUG) + +static void gfxalloc_dump(gfx_allocator* alloc) { + static const char* type[] = { + "allocated", + "free", + }; + + for (gfx_block_info* b = alloc->blocks[0].next_phys; b != alloc->blocks; + b = b->next_phys) { + U8* start = b->ptr; + U8* end = b->ptr + b->size; + printf("%p-%p: %s (%d bytes)\n", start, end, type[b->is_free], b->size); + } +} + +#endif + +#endif + +#if defined(GDRAW_DEFRAGMENT) + +#define GDRAW_DEFRAGMENT_may_overlap \ + 1 // self-overlap for individual copies is OK + +// Defragmentation code for graphics memory. +// The platform implementation must provide a GPU memcpy function and handle all +// necessary synchronization. It must also adjust its resource descriptors to +// match the new addresses after defragmentation. + +static void gdraw_gpu_memcpy(GDrawHandleCache* c, void* dst, void* src, + U32 num_bytes); + +static void gdraw_Defragment_memmove(GDrawHandleCache* c, U8* dst, U8* src, + U32 num_bytes, U32 flags, + GDrawStats* stats) { + if (dst == src) return; + + assert(num_bytes != 0); + + stats->nonzero_flags |= GDRAW_STATS_defrag; + stats->defrag_objects += 1; + stats->defrag_bytes += num_bytes; + + if ((flags & GDRAW_DEFRAGMENT_may_overlap) || dst + num_bytes <= src || + src + num_bytes <= dst) // no problematic overlap + gdraw_gpu_memcpy(c, dst, src, num_bytes); + else { + // need to copy in multiple chunks + U32 chunk_size, pos = 0; + if (dst < src) + chunk_size = (U32)(src - dst); + else + chunk_size = (U32)(dst - src); + + while (pos < num_bytes) { + U32 amount = num_bytes - pos; + if (amount > chunk_size) amount = chunk_size; + gdraw_gpu_memcpy(c, dst + pos, src + pos, amount); + pos += amount; + } + } +} + +static rrbool gdraw_CanDefragment(GDrawHandleCache* c) { + // we can defragment (and extract some gain from it) if and only if there's + // more than one free block. since gfxalloc coalesces free blocks + // immediately and keeps them in a circular linked list, this is very easy + // to detect: just check if the "next" pointer of the first free block + // points to the sentinel. (this is only the case if there are 0 or 1 free + // blocks) + gfx_allocator* alloc = c->alloc; + return alloc->blocks[0].next->next != alloc->blocks; +} + +static void gdraw_DefragmentMain(GDrawHandleCache* c, U32 flags, + GDrawStats* stats) { + gfx_allocator* alloc = c->alloc; + gfx_block_info *b, *n; + U8* p; + S32 i; + + GFXALLOC_IF_CHECK(GFXALLOC_ASSERT(alloc->num_blocks == + alloc->num_alloc + alloc->num_free + + alloc->num_unused);) + GFXALLOC_IF_CHECK( + GFXALLOC_ASSERT(alloc->num_free <= alloc->num_blocks + 1);) + + // go over all allocated memory blocks and clear the "prev" pointer + // (unused for allocated blocks, we'll use it to store a back-pointer to the + // corresponding handle) + for (b = alloc->blocks[0].next_phys; b != alloc->blocks; b = b->next_phys) + if (!b->is_free) b->prev = NULL; + + // go through all handles and store a pointer to the handle in the + // corresponding memory block + for (i = 0; i < c->max_handles; i++) + if (c->handle[i].raw_ptr) { + assert(c->handle[i].bytes != 0); + for (b = alloc->hash[gfxalloc_get_hash_code(alloc, + c->handle[i].raw_ptr)]; + b; b = b->next) + if (b->ptr == c->handle[i].raw_ptr) { + void* block = &c->handle[i]; + b->prev = (gfx_block_info*)block; + break; + } + + GFXALLOC_ASSERT(b != NULL); // didn't find this block anywhere! + } + + // clear alloc hash table (we rebuild it during defrag) + memset(alloc->hash, 0, sizeof(alloc->hash)); + + // defragmentation proper: go over all blocks again, remove all free blocks + // from the physical block list and compact the remaining blocks together. + p = alloc->mem_base; + for (b = alloc->blocks[0].next_phys; b != alloc->blocks; b = n) { + n = b->next_phys; + + if (!b->is_free) { + U32 h; + + // move block if necessary + if (p != b->ptr) { + assert(b->size != 0); + gdraw_Defragment_memmove(c, p, b->ptr, b->size, flags, stats); + b->ptr = p; + assert(b->prev); + if (b->prev) ((GDrawHandle*)b->prev)->raw_ptr = p; + } + + // re-insert into hash table + h = gfxalloc_get_hash_code(alloc, p); + b->next = alloc->hash[h]; + alloc->hash[h] = b; + + p += b->size; + } else { + // free block: remove it from the physical block list + b->prev_phys->next_phys = b->next_phys; + b->next_phys->prev_phys = b->prev_phys; + gfxalloc_rem_free(alloc, b); + gfxalloc_push_unused(alloc, b); + } + } + // the free list should be empty now + assert(alloc->blocks[0].next == &alloc->blocks[0]); + + // unless all memory is allocated, we now need to add a new block for the + // free space at the end + if (p != alloc->mem_end) { + b = gfxalloc_pop_unused(alloc); + + b->ptr = p; + b->prev_phys = alloc->blocks[0].prev_phys; + b->next_phys = &alloc->blocks[0]; + b->prev_phys->next_phys = b; + b->next_phys->prev_phys = b; + b->size = alloc->mem_end - p; + gfxalloc_add_free(alloc, b); + } + + GFXALLOC_IF_CHECK(GFXALLOC_ASSERT(alloc->num_blocks == + alloc->num_alloc + alloc->num_free + + alloc->num_unused);) + GFXALLOC_IF_CHECK( + GFXALLOC_ASSERT(alloc->num_free <= alloc->num_blocks + 1);) +} + +#endif + +#if defined(GDRAW_MANAGE_MEM_TWOPOOL) + +// Defragmentation code for graphics memory, using two-pool strategy. +// +// The platform implementation must provide a GPU memcpy function and handle +// all necessary synchronization. It must also adjust its resource descriptors +// to match the new addresses after defragmentation. +// +// The high concept for two-pool is that we can't update the resource pools +// mid-frame; instead, while preparing for a frame, we need to produce a memory +// configuration that is suitable for rendering a whole frame at once (in +// contrast to our normal incremental strategy, where we can decide to +// defragment mid-frame if things are getting desperate). This is for tiled +// renderers. +// +// Two-pool works like this: +// - As the name suggests, each handle cache has two memory pools and +// corresponding backing +// allocators. The currently used allocator, "alloc", and a second allocator, +// "alloc_other". +// - Any resource used in a command buffer gets locked and *stays locked* until +// we're done +// preparing that command buffer (i.e. no unlocking after every draw as in the +// normal incremental memory management). +// - All allocations happen from "alloc", always. We mostly do our normal LRU +// cache freeing +// to make space when required. +// - We can still run out of space (no surprise) and get into a configuration +// where we have +// to defragment. This is the only tricky part, and where the second pool +// comes in. To defragment, we switch the roles of "alloc" and "alloc_other", +// and allocate new backing storage for all currently "locked" and "pinned" +// resources (i.e. everything we've used in the currently pending frame). +// - In general, we have the invariant that all resources we're using for +// batches we're +// working on must be in the "alloc" (fresh) pool, not in the "other" (stale) +// pool. Therefore, after a defragment/pool switch, any "live" resource (which +// means it's present in the stale pool) has to be copied to the "fresh" pool +// as it's getting locked to maintain this invariant. +// +// What this does is give us a guarantee that any given frame either only +// references resources in one pool (the common case), or does a defragment, in +// which case it looks like this: +// +// +------------------------------+ +// | | +// | | pool A is fresh (=alloc), pool B is stale +// (=alloc_other) | | all resources referenced +// in here are in pool A | | | | | | +// +------------------------------+ <-- defragment! pools flip roles here +// | | +// | | +// | | pool B is fresh (=alloc), pool A is stale +// (=alloc_other) | | all resources referenced +// in here are in pool B | | +// +------------------------------+ +// +// Now, at the end of the frame, we need to decide what to do with the +// resources that remain "live" (i.e. they're in the old pool but weren't +// referenced in the current frame so they didn't get copied). As of this +// writing, we simply free them, to maximize the amount of free memory in the +// new pool (and hopefully minimize the chance that we'll have to defragment +// again soon). It would also be possible to copy some of them though, assuming +// there's enough space. +// +// Freeing resources is an interesting case. When the CPU side of GDraw does a +// "free", we can't immediately reclaim the resource memory, since the GPU will +// generally still have outstanding commands that reference that resource. So +// our freed resources first enter the "Dead" state and only actually get freed +// once the GPU is done with them. What this means is that the list of +// resources in the "dead" state can end up holding references to both the +// fresh and the stale pool; the free implementation needs to be aware of this +// and return the memory to the right allocator. +// +// When we defragment, it's important to make sure that the pool we're flipping +// to is actually empty. What this means is that right before a defragment, we +// need to wait for all stale "dead" resources to actually become free. If the +// last defragment was several frames ago, this is fast - we haven't generated +// any new commands referencing the stale resources in several frames, so most +// likely they're all immediately free-able. By contrast, if we just +// defragmented last frame, this will be a slow operation since we need to wait +// for the GPU pipeline to drain - but if you're triggering defragments in +// several consecutive frames, you're thrashing the resource pools badly and +// are getting really bad performance anyway. + +static void gdraw_gpu_memcpy(GDrawHandleCache* c, void* dst, void* src, + U32 num_bytes); +static void gdraw_gpu_wait_for_transfer_completion(); +static void gdraw_resource_moved(GDrawHandle* t); + +static rrbool gdraw_CanDefragment(GDrawHandleCache* c) { + // we can defragment (and extract some gain from it) if and only if there's + // more than one free block. since gfxalloc coalesces free blocks + // immediately and keeps them in a circular linked list, this is very easy + // to detect: just check if the "next" pointer of the first free block + // points to the sentinel. (this is only the case if there are 0 or 1 free + // blocks) + gfx_allocator* alloc = c->alloc; + if (!c->alloc_other) // if we don't have a second pool, we can't defrag at + // all. + return false; + return alloc->blocks[0].next->next != alloc->blocks; +} + +static rrbool gdraw_MigrateResource(GDrawHandle* t, GDrawStats* stats) { + GDrawHandleCache* c = t->cache; + void* ptr = NULL; + + assert(t->state == GDRAW_HANDLE_STATE_live || + t->state == GDRAW_HANDLE_STATE_locked || + t->state == GDRAW_HANDLE_STATE_pinned); + // anything we migrate should be in the "other" (old) pool + assert(gfxalloc_mem_contains(c->alloc_other, t->raw_ptr)); + + ptr = gfxalloc_alloc(c->alloc, t->bytes); + if (ptr) { + // update stats + stats->nonzero_flags |= GDRAW_STATS_defrag; + stats->defrag_objects += 1; + stats->defrag_bytes += t->bytes; + + // copy contents to new storage + gdraw_gpu_memcpy(c, ptr, t->raw_ptr, t->bytes); + + // free old storage + gfxalloc_free(c->alloc_other, t->raw_ptr); + + // adjust pointers to point to new location + t->raw_ptr = ptr; + gdraw_resource_moved(t); + + return true; + } else + return false; +} + +static rrbool gdraw_MigrateAllResources(GDrawHandle* sentinel, + GDrawStats* stats) { + GDrawHandle* h; + for (h = sentinel->next; h != sentinel; h = h->next) { + if (!gdraw_MigrateResource(h, stats)) return false; + } + return true; +} + +static rrbool gdraw_TwoPoolDefragmentMain(GDrawHandleCache* c, + GDrawStats* stats) { + gfx_allocator* t; + + // swap allocators + t = c->alloc; + c->alloc = c->alloc_other; + c->alloc_other = t; + + // immediately migrate all currently pinned and locked resources + rrbool ok = true; + ok = ok && + gdraw_MigrateAllResources(&c->state[GDRAW_HANDLE_STATE_pinned], stats); + ok = ok && + gdraw_MigrateAllResources(&c->state[GDRAW_HANDLE_STATE_locked], stats); + + return ok; +} + +static rrbool gdraw_StateListIsEmpty(GDrawHandle* head) { + // a list is empty when the head sentinel is the only node + return head->next == head; +} + +static void gdraw_CheckAllPointersUpdated(GDrawHandle* head) { +#if defined(GDRAW_DEBUG) + GDrawHandle* h; + for (h = head->next; h != head; h = h->next) { + assert(gfxalloc_mem_contains(h->cache->alloc, h->raw_ptr)); + } +#endif +} + +static void gdraw_PostDefragmentCleanup(GDrawHandleCache* c, + GDrawStats* stats) { + // if we defragmented during this scene, this is the spot where + // we need to nuke all references to resources that weren't + // carried over into the new pool. + if (c->did_defragment) { + GDrawHandle* h; + + // alloc list should be empty at this point + assert(gdraw_StateListIsEmpty(&c->state[GDRAW_HANDLE_STATE_alloc])); + + // free all remaining live resources (these are the resources we didn't + // touch this frame, hence stale) + h = &c->state[GDRAW_HANDLE_STATE_live]; + while (!gdraw_StateListIsEmpty(h)) gdraw_res_free(h->next, stats); + + // "live" is now empty, and we already checked that "alloc" was empty + // earlier. "dead" may hold objects on the old heap still (that were + // freed before we swapped allocators). "user owned" is not managed by + // us. that leaves "locked" and "pinned" resources, both of which better + // be only pointing into the new heap now! + gdraw_CheckAllPointersUpdated(&c->state[GDRAW_HANDLE_STATE_locked]); + gdraw_CheckAllPointersUpdated(&c->state[GDRAW_HANDLE_STATE_pinned]); + + gdraw_gpu_wait_for_transfer_completion(); + } +} + +#endif + +// Image processing code + +// Compute average of 4 RGBA8888 pixels passed as U32. +// Variables are named assuming the values are stored as big-endian, but all +// bytes are treated equally, so this code will work just fine on little-endian +// data. +static U32 gdraw_Avg4_rgba8888(U32 p0, U32 p1, U32 p2, U32 p3) { + U32 mask = 0x00ff00ff; + U32 bias = 0x00020002; + + U32 gasum = ((p0 >> 0) & mask) + ((p1 >> 0) & mask) + ((p2 >> 0) & mask) + + ((p3 >> 0) & mask) + bias; + U32 rbsum = ((p0 >> 8) & mask) + ((p1 >> 8) & mask) + ((p2 >> 8) & mask) + + ((p3 >> 8) & mask) + bias; + + return ((gasum >> 2) & mask) | ((rbsum << 6) & ~mask); +} + +// Compute average of 2 RGBA8888 pixels passed as U32 +static U32 gdraw_Avg2_rgba8888(U32 p0, U32 p1) { + return (p0 | p1) - (((p0 ^ p1) >> 1) & 0x7f7f7f7f); +} + +// 2:1 downsample in both horizontal and vertical direction, for one line. +// width is width of destination line. +static void gdraw_Downsample_2x2_line(U8* dst, U8* line0, U8* line1, U32 width, + U32 bpp) { + U32 x; + if (bpp == 4) { + U32* in0 = (U32*)line0; + U32* in1 = (U32*)line1; + U32* out = (U32*)dst; + for (x = 0; x < width; x++, in0 += 2, in1 += 2) + *out++ = gdraw_Avg4_rgba8888(in0[0], in0[1], in1[0], in1[1]); + } else if (bpp == 1) { + for (x = 0; x < width; x++, line0 += 2, line1 += 2) + *dst++ = (line0[0] + line0[1] + line1[0] + line1[1] + 2) / 4; + } else + RR_BREAK(); +} + +// 2:1 downsample in horizontal but not vertical direction. +static void gdraw_Downsample_2x1_line(U8* dst, U8* src, U32 width, U32 bpp) { + U32 x; + if (bpp == 4) { + U32* in = (U32*)src; + U32* out = (U32*)dst; + for (x = 0; x < width; x++, in += 2) + *out++ = gdraw_Avg2_rgba8888(in[0], in[1]); + } else if (bpp == 1) { + for (x = 0; x < width; x++, src += 2) + *dst++ = (src[0] + src[1] + 1) / 2; + } else + RR_BREAK(); +} + +// 2:1 downsample in vertical but not horizontal direction. +static void gdraw_Downsample_1x2(U8* dst, S32 dstpitch, U8* src, S32 srcpitch, + U32 height, U32 bpp) { + U32 y; + if (bpp == 4) { + for (y = 0; y < height; y++, dst += dstpitch, src += 2 * srcpitch) + *((U32*)dst) = + gdraw_Avg2_rgba8888(*((U32*)src), *((U32*)(src + srcpitch))); + } else if (bpp == 1) { + for (y = 0; y < height; y++, dst += dstpitch, src += 2 * srcpitch) + *dst = (src[0] + src[srcpitch] + 1) / 2; + } else + RR_BREAK(); +} + +// 2:1 downsample (for mipmaps) +// dst: Pointer to destination buffer +// dstpitch: Pitch for destination buffer +// width: Width of *destination* image (i.e. downsampled version) +// height: Height of *destination* image (i.e. downsampled version) +// src: Pointer to source buffer +// srcpitch: Pitch of source buffer +// bpp: Bytes per pixel for image data +// +// can be used for in-place resizing if src==dst and dstpitch <= srcpitch! +static GDRAW_MAYBE_UNUSED void gdraw_Downsample(U8* dst, S32 dstpitch, + U32 width, U32 height, U8* src, + S32 srcpitch, U32 bpp) { + U32 y; + assert(bpp == 1 || bpp == 4); + + // @TODO gamma? + if (!height) // non-square texture, height was reduced to 1 in a previous + // step + gdraw_Downsample_2x1_line(dst, src, width, bpp); + else if (!width) // non-square texture, width was reduced to 1 in a + // previous step + gdraw_Downsample_1x2(dst, dstpitch, src, srcpitch, height, bpp); + else { + for (y = 0; y < height; y++) { + gdraw_Downsample_2x2_line(dst, src, src + srcpitch, width, bpp); + dst += dstpitch; + src += 2 * srcpitch; + } + } +} + +#if !defined(GDRAW_NO_STREAMING_MIPGEN) + +#define GDRAW_MAXMIPS 16 // maximum number of mipmaps supported. + +typedef struct GDrawMipmapContext { + U32 width; // width of the texture being mipmapped + U32 height; // height of the texture being mipmapped + U32 mipmaps; // number of mipmaps + U32 bpp; // bytes per pixel + + U32 partial_row; // bit N: is mipmap N currently storing a partial row? + U32 bheight; // height of the buffer at miplevel 0 + U8* pixels[GDRAW_MAXMIPS]; + U32 pitch[GDRAW_MAXMIPS]; +} GDrawMipmapContext; + +static rrbool gdraw_MipmapBegin(GDrawMipmapContext* c, U32 width, U32 height, + U32 mipmaps, U32 bpp, U8* buffer, + U32 buffer_size) { + U32 i; + U8* p; + + if (mipmaps > GDRAW_MAXMIPS) return false; + + c->width = width; + c->height = height; + c->mipmaps = mipmaps; + c->bpp = bpp; + c->partial_row = 0; + + // determine how many lines to buffer + // we try to use roughly 2/3rds of the buffer for the first miplevel (less + // than 3/4 since with our partial line buffers, we have extra buffer space + // for lower mip levels). + c->bheight = (2 * buffer_size) / (3 * width * bpp); + + // round down to next-smaller power of 2 (in case we need to swizzle; + // swizzling works on pow2-sized blocks) + while (c->bheight & (c->bheight - 1)) // while not a power of 2... + c->bheight &= c->bheight - 1; // clear least significant bit set + + // then keep lowering the number of buffered lines until they fit (or we + // reach zero, i.e. it doesn't fit) + while (c->bheight) { + p = buffer; + for (i = 0; i < c->mipmaps; i++) { + U32 mw = c->width >> i; + U32 bh = c->bheight >> i; + if (!mw) mw++; + if (!bh) mw *= 2, bh++; // need space for line of previous miplevel + + c->pixels[i] = p; + c->pitch[i] = mw * bpp; + p += c->pitch[i] * bh; + } + + // if it fits, we're done + if (p <= buffer + buffer_size) { + if (c->bheight > + height) // buffer doesn't need to be larger than the image! + c->bheight = height; + return true; + } + + // need to try a smaller line buffer... + c->bheight >>= 1; + } + + // can't fit even one line into our buffer. ouch! + return false; +} + +// returns true if there was data generated for this miplevel, false otherwise. +static rrbool gdraw_MipmapAddLines(GDrawMipmapContext* c, U32 level) { + U32 bw, bh; + + assert(level > 0); // doesn't make sense to call this on level 0 + if (level == 0 || level >= c->mipmaps) + return false; // this level doesn't exist + + bw = c->width >> level; // buffer width at this level + bh = c->bheight >> level; // buffer height at this level + + if (bh) { // we can still do regular downsampling + gdraw_Downsample(c->pixels[level], c->pitch[level], bw, bh, + c->pixels[level - 1], c->pitch[level - 1], c->bpp); + return true; + } else if (c->height >> level) { // need to buffer partial lines, but still + // doing vertical 2:1 downsampling + if ((c->partial_row ^= (1 << level)) & + (1 << level)) { // no buffered partial row for this miplevel yet, + // make one + memcpy(c->pixels[level], c->pixels[level - 1], bw * 2 * c->bpp); + return false; + } else { // have one buffered row, can generate output pixels + gdraw_Downsample_2x2_line(c->pixels[level], c->pixels[level], + c->pixels[level - 1], bw, c->bpp); + return true; + } + } else { // finish off with a chain of Nx1 miplevels + gdraw_Downsample_2x1_line(c->pixels[level], c->pixels[level - 1], bw, + c->bpp); + return true; + } +} + +#endif + +#if defined(GDRAW_CHECK_BLOCK) +static void check_block_alloc(gfx_allocator* alloc, void* ptr, + rrbool allocated) { + int i, n = 0, m = 0; + for (i = 0; i < GFXALLOC_HASH_SIZE; ++i) { + gfx_block_info* b = alloc->hash[i]; + while (b) { + if (b->ptr == ptr) ++n; + b = b->next; + } + } + gfx_block_info* b = alloc->blocks[0].next; + while (b != &alloc->blocks[0]) { + if (b->ptr == ptr) ++m; + b = b->next; + } + if (allocated) + assert(n == 1 && m == 0); + else + assert(n == 0 && m == 1); +} +#else +#define check_block_alloc(a, p, f) +#endif + +#if defined(GDRAW_BUFFER_RING) + +//////////////////////////////////////////////////////////////////////// +// +// Buffer ring +// + +// Implements a dynamic buffer backed by multiple physical buffers, with +// the usual append-only, DISCARD/NOOVERWRITE semantics. +// +// This can be used for dynamic vertex buffers, constant buffers, etc. +#define GDRAW_BUFRING_MAXSEGS 4 // max number of backing segments + +typedef struct gdraw_bufring_seg { + struct gdraw_bufring_seg* next; // next segment in ring + U8* data; // pointer to the allocation + GDrawFence fence; // fence for this segment + U32 used; // number of bytes used +} gdraw_bufring_seg; + +typedef struct gdraw_bufring { + gdraw_bufring_seg* cur; // active ring segment + U32 seg_size; // size of one segment + U32 align; // alignment of segment allocations + gdraw_bufring_seg all_segs[GDRAW_BUFRING_MAXSEGS]; +} gdraw_bufring; + +// forwards +static GDrawFence put_fence(); +static void wait_on_fence(GDrawFence fence); + +static void gdraw_bufring_init(gdraw_bufring* RADRESTRICT ring, void* ptr, + U32 size, U32 nsegs, U32 align) { + U32 i, seg_size; + + ring->seg_size = 0; + if (!ptr || nsegs < 1 || + size < nsegs * align) // bail if no ring buffer memory or too small + return; + + if (nsegs > GDRAW_BUFRING_MAXSEGS) nsegs = GDRAW_BUFRING_MAXSEGS; + + // align needs to be a positive power of two + assert(align >= 1 && (align & (align - 1)) == 0); + + // buffer really needs to be properly aligned + assert(((UINTa)ptr & (align - 1)) == 0); + + seg_size = (size / nsegs) & ~(align - 1); + for (i = 0; i < nsegs; ++i) { + ring->all_segs[i].next = &ring->all_segs[(i + 1) % nsegs]; + ring->all_segs[i].data = (U8*)ptr + i * seg_size; + ring->all_segs[i].fence.value = 0; + ring->all_segs[i].used = 0; + } + + ring->cur = ring->all_segs; + ring->seg_size = seg_size; + ring->align = align; +} + +static void gdraw_bufring_shutdown(gdraw_bufring* RADRESTRICT ring) { + ring->cur = NULL; + ring->seg_size = 0; +} + +static void* gdraw_bufring_alloc(gdraw_bufring* RADRESTRICT ring, U32 size, + U32 align) { + U32 align_up; + gdraw_bufring_seg* seg; + + if (size > ring->seg_size) return NULL; // nope, won't fit + + assert(align <= ring->align); + + // check if it fits in the active segment first + seg = ring->cur; + align_up = (seg->used + align - 1) & -align; + + if ((align_up + size) <= ring->seg_size) { + void* ptr = seg->data + align_up; + seg->used = align_up + size; + return ptr; + } + + // doesn't fit, we have to start a new ring segment. + seg->fence = put_fence(); + + // switch to the next segment, wait till GPU is done with it + seg = ring->cur = seg->next; + wait_on_fence(seg->fence); + + // allocate from the new segment. we assume that segment offsets + // satisfy the highest alignment requirements we ever ask for! + seg->used = size; + return seg->data; +} + +#endif + +//////////////////////////////////////////////////////////////////////// +// +// General resource manager +// + +#if !defined(GDRAW_FENCE_FLUSH) +#define GDRAW_FENCE_FLUSH() +#endif + +#if defined(GDRAW_MANAGE_MEM) +// functions the platform must implement +#if !defined(GDRAW_BUFFER_RING // avoid "redundant redeclaration" warning) +static void wait_on_fence(GDrawFence fence); +#endif +static rrbool is_fence_pending(GDrawFence fence); +static void gdraw_defragment_cache(GDrawHandleCache* c, GDrawStats* stats); + +// functions we implement +static void gdraw_res_reap(GDrawHandleCache* c, GDrawStats* stats); +#endif + +// If GDRAW_MANAGE_MEM is not #defined, this needs to perform the +// actual free using whatever API we're targeting. +// +// If GDRAW_MANAGE_MEM is #defined, the shared code handles the +// memory management part, but you might still need to update +// your state caching. +static void api_free_resource(GDrawHandle* r); + +// Actually frees a resource and releases all allocated resources +static void gdraw_res_free(GDrawHandle* r, GDrawStats* stats) { + assert(r->state == GDRAW_HANDLE_STATE_live || + r->state == GDRAW_HANDLE_STATE_locked || + r->state == GDRAW_HANDLE_STATE_dead || + r->state == GDRAW_HANDLE_STATE_pinned || + r->state == GDRAW_HANDLE_STATE_user_owned); + +#if defined(GDRAW_MANAGE_MEM) + GDRAW_FENCE_FLUSH(); + + // make sure resource isn't in use before we actually free the memory + wait_on_fence(r->fence); + if (r->raw_ptr) { +#if !defined(GDRAW_MANAGE_MEM_TWOPOOL) + gfxalloc_free(r->cache->alloc, r->raw_ptr); +#else + GDrawHandleCache* c = r->cache; + if (gfxalloc_mem_contains(c->alloc, r->raw_ptr)) + gfxalloc_free(c->alloc, r->raw_ptr); + else { + assert(gfxalloc_mem_contains(c->alloc_other, r->raw_ptr)); + gfxalloc_free(c->alloc_other, r->raw_ptr); + } +#endif + } +#endif + + api_free_resource(r); + + stats->nonzero_flags |= GDRAW_STATS_frees; + stats->freed_objects += 1; + stats->freed_bytes += r->bytes; + + gdraw_HandleCacheFree(r); +} + +// Frees the LRU resource in the given cache. +static rrbool gdraw_res_free_lru(GDrawHandleCache* c, GDrawStats* stats) { + GDrawHandle* r = gdraw_HandleCacheGetLRU(c); + if (!r) return false; + + if (c->is_vertex && r->owner) // check for r->owner since it may already be + // killed (if player destroyed first) + IggyDiscardVertexBufferCallback(r->owner, r); + + // was it referenced since end of previous frame (=in this frame)? + // if some, we're thrashing; report it to the user, but only once per frame. + if (c->prev_frame_end.value < r->fence.value && !c->is_thrashing) { + IggyGDrawSendWarning(NULL, c->is_vertex + ? "GDraw Thrashing vertex memory" + : "GDraw Thrashing texture memory"); + c->is_thrashing = true; + } + + gdraw_res_free(r, stats); + return true; +} + +static void gdraw_res_flush(GDrawHandleCache* c, GDrawStats* stats) { + c->is_thrashing = true; // prevents warnings being generated from free_lru + gdraw_HandleCacheUnlockAll(c); + while (gdraw_res_free_lru(c, stats)); +} + +static GDrawHandle* gdraw_res_alloc_outofmem(GDrawHandleCache* c, + GDrawHandle* t, + char const* failed_type) { + if (t) gdraw_HandleCacheAllocateFail(t); + IggyGDrawSendWarning(NULL, + c->is_vertex ? "GDraw Out of static vertex buffer %s" + : "GDraw Out of texture %s", + failed_type); + return NULL; +} + +#if !defined(GDRAW_MANAGE_MEM) + +static GDrawHandle* gdraw_res_alloc_begin(GDrawHandleCache* c, S32 size, + GDrawStats* stats) { + GDrawHandle* t; + if (size > c->total_bytes) + gdraw_res_alloc_outofmem( + c, NULL, "memory (single resource larger than entire pool)"); + else { + // given how much data we're going to allocate, throw out + // data until there's "room" (this basically lets us use + // managed memory and just bound our usage, without actually + // packing it and being exact) + while (c->bytes_free < size) { + if (!gdraw_res_free_lru(c, stats)) { + gdraw_res_alloc_outofmem(c, NULL, "memory"); + break; + } + } + } + + // now try to allocate a handle + t = gdraw_HandleCacheAllocateBegin(c); + if (!t) { + // it's possible we have no free handles, because all handles + // are in use without exceeding the max storage above--in that + // case, just free one texture to give us a free handle (ideally + // we'd trade off cost of regenerating) + if (gdraw_res_free_lru(c, stats)) { + t = gdraw_HandleCacheAllocateBegin(c); + if (t == NULL) { + gdraw_res_alloc_outofmem(c, NULL, "handles"); + } + } + } + return t; +} + +#else + +// Returns whether this resource holds pointers to one of the GDraw-managed +// pools. +static rrbool gdraw_res_is_managed(GDrawHandle* r) { + return r->state == GDRAW_HANDLE_STATE_live || + r->state == GDRAW_HANDLE_STATE_locked || + r->state == GDRAW_HANDLE_STATE_dead || + r->state == GDRAW_HANDLE_STATE_pinned; +} + +// "Reaps" dead resources. Even if the user requests that a +// resource be freed, it might still be in use in a pending +// command buffer. So we can't free the associated memory +// immediately; instead, we flag the resource as "dead" and +// periodically check whether we can actually free the +// pending memory of dead resources ("reap" them). +static void gdraw_res_reap(GDrawHandleCache* c, GDrawStats* stats) { + GDrawHandle* sentinel = &c->state[GDRAW_HANDLE_STATE_dead]; + GDrawHandle* t; + GDRAW_FENCE_FLUSH(); + + // reap all dead resources that aren't in use anymore + while ((t = sentinel->next) != sentinel && !is_fence_pending(t->fence)) + gdraw_res_free(t, stats); +} + +// "Kills" a resource. This means GDraw won't use it anymore +// (it's dead), but there might still be outstanding references +// to it in a pending command buffer, so we can't physically +// free the associated memory until that's all processed. +static void gdraw_res_kill(GDrawHandle* r, GDrawStats* stats) { + GDRAW_FENCE_FLUSH(); // dead list is sorted by fence index - make sure all + // fence values are current. + + r->owner = NULL; + gdraw_HandleCacheInsertDead(r); + gdraw_res_reap(r->cache, stats); +} + +static GDrawHandle* gdraw_res_alloc_begin(GDrawHandleCache* c, S32 size, + GDrawStats* stats) { + GDrawHandle* t; + void* ptr = NULL; + + gdraw_res_reap(c, stats); // NB this also does GDRAW_FENCE_FLUSH(); + if (size > c->total_bytes) + return gdraw_res_alloc_outofmem( + c, NULL, "memory (single resource larger than entire pool)"); + + // now try to allocate a handle + t = gdraw_HandleCacheAllocateBegin(c); + if (!t) { + // it's possible we have no free handles, because all handles + // are in use without exceeding the max storage above--in that + // case, just free one texture to give us a free handle (ideally + // we'd trade off cost of regenerating) + gdraw_res_free_lru(c, stats); + t = gdraw_HandleCacheAllocateBegin(c); + if (!t) return gdraw_res_alloc_outofmem(c, NULL, "handles"); + } + + // try to allocate first + if (size) { + ptr = gfxalloc_alloc(c->alloc, size); + if (!ptr) { + // doesn't currently fit. try to free some allocations to get space + // to breathe. + S32 want_free = RR_MAX(size + (size / 2), GDRAW_MIN_FREE_AMOUNT); + if (want_free > c->total_bytes) + want_free = size; // okay, *really* big resource, just try to + // allocate its real size + + // always keep freeing textures until want_free bytes are free. + while (c->alloc->actual_bytes_free < want_free) { + if (!gdraw_res_free_lru(c, stats)) + return gdraw_res_alloc_outofmem(c, t, "memory"); + } + + // now, keep trying to allocate and free some more memory when it + // still doesn't fit + while (!(ptr = gfxalloc_alloc(c->alloc, size))) { + if (c->alloc->actual_bytes_free >= + 3 * size || // if we should have enough free bytes to + // satisfy the request by now + (c->alloc->actual_bytes_free >= size && + size * 2 >= + c->total_bytes)) // or the resource is very big and + // the alloc doesn't fit + { + // before we actually consider defragmenting, we want to + // free all stale resources (not referenced in the previous + // 2 frames). and if that frees up enough memory so we don't + // have to defragment, all the better! also, never + // defragment twice in a frame, just assume we're thrashing + // when we get in that situation and free up as much as + // possible. + if (!c->did_defragment && + c->prev_frame_start.value <= c->handle->fence.value) { + // defragment. + defrag: + if (gdraw_CanDefragment( + c)) { // only try defrag if it has a chance of + // helping. + gdraw_defragment_cache(c, stats); + c->did_defragment = true; + } + ptr = gfxalloc_alloc(c->alloc, size); + if (!ptr) + return gdraw_res_alloc_outofmem( + c, t, "memory (fragmentation)"); + break; + } + } + + // keep trying to free some more + if (!gdraw_res_free_lru(c, stats)) { + if (c->alloc->actual_bytes_free >= + size) // nothing left to free but we should be good - + // defrag again, even if it's the second time in + // a frame + goto defrag; + + return gdraw_res_alloc_outofmem(c, t, "memory"); + } + } + } + } + + t->fence.value = 0; // hasn't been used yet + t->raw_ptr = ptr; + return t; +} + +#endif \ No newline at end of file diff --git a/targets/app/common/Iggy/include/iggy.h b/targets/app/common/Iggy/include/iggy.h index 188064dac..21f839c8f 100644 --- a/targets/app/common/Iggy/include/iggy.h +++ b/targets/app/common/Iggy/include/iggy.h @@ -126,6 +126,7 @@ typedef enum IggyDatatype { #include IDOCN typedef char IggyUTF16; #else +#include typedef const char16_t IggyUTF16; #endif diff --git a/targets/app/common/UI/ConsoleUIController.cpp b/targets/app/common/UI/ConsoleUIController.cpp index 030342608..a72817a6c 100644 --- a/targets/app/common/UI/ConsoleUIController.cpp +++ b/targets/app/common/UI/ConsoleUIController.cpp @@ -12,7 +12,7 @@ #endif #include "app/common/Iggy/include/rrCore.h" #include "app/common/Game.h" -#include "app/windows/Iggy/include/gdraw.h" +#include "app/common/Iggy/include/gdraw.h" ConsoleUIController ui; diff --git a/targets/app/common/UI/UIController.cpp b/targets/app/common/UI/UIController.cpp index d4cfc9edf..6a8c6b5b9 100644 --- a/targets/app/common/UI/UIController.cpp +++ b/targets/app/common/UI/UIController.cpp @@ -58,19 +58,12 @@ class Tutorial; // #define EXCLUDE_IGGY_ALLOCATIONS_FROM_HEAP_INSPECTOR // #define ENABLE_IGGY_EXPLORER -#if defined(ENABLE_IGGY_EXPLORER) -#include "app/windows/Iggy/include/iggyexpruntime.h" -#endif // #define ENABLE_IGGY_PERFMON #if defined(ENABLE_IGGY_PERFMON) #define PM_ORIGIN_X 24 -#define PM_ORIGIN_Y 34 - -#if defined(__WINDOWS64) -#include "app/windows/Iggy/include/iggyperfmon.h" -#endif +#define PM_ORIGIN_ #endif