ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

arb_program_shader.c
Go to the documentation of this file.
00001 /*
00002  * Pixel and vertex shaders implementation using ARB_vertex_program
00003  * and ARB_fragment_program GL extensions.
00004  *
00005  * Copyright 2002-2003 Jason Edmeades
00006  * Copyright 2002-2003 Raphael Junqueira
00007  * Copyright 2004 Christian Costa
00008  * Copyright 2005 Oliver Stieber
00009  * Copyright 2006 Ivan Gyurdiev
00010  * Copyright 2006 Jason Green
00011  * Copyright 2006 Henri Verbeet
00012  * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
00013  * Copyright 2009 Henri Verbeet for CodeWeavers
00014  *
00015  * This library is free software; you can redistribute it and/or
00016  * modify it under the terms of the GNU Lesser General Public
00017  * License as published by the Free Software Foundation; either
00018  * version 2.1 of the License, or (at your option) any later version.
00019  *
00020  * This library is distributed in the hope that it will be useful,
00021  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00022  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00023  * Lesser General Public License for more details.
00024  *
00025  * You should have received a copy of the GNU Lesser General Public
00026  * License along with this library; if not, write to the Free Software
00027  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
00028  */
00029 
00030 #include "config.h"
00031 
00032 #include <math.h>
00033 #include <stdio.h>
00034 
00035 #include "wined3d_private.h"
00036 
00037 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
00038 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants);
00039 WINE_DECLARE_DEBUG_CHANNEL(d3d_caps);
00040 WINE_DECLARE_DEBUG_CHANNEL(d3d);
00041 
00042 /* Extract a line. Note that this modifies the source string. */
00043 static char *get_line(char **ptr)
00044 {
00045     char *p, *q;
00046 
00047     p = *ptr;
00048     if (!(q = strstr(p, "\n")))
00049     {
00050         if (!*p) return NULL;
00051         *ptr += strlen(p);
00052         return p;
00053     }
00054     *q = '\0';
00055     *ptr = q + 1;
00056 
00057     return p;
00058 }
00059 
00060 static void shader_arb_dump_program_source(const char *source)
00061 {
00062     ULONG source_size;
00063     char *ptr, *line, *tmp;
00064 
00065     source_size = strlen(source) + 1;
00066     tmp = HeapAlloc(GetProcessHeap(), 0, source_size);
00067     if (!tmp)
00068     {
00069         ERR("Failed to allocate %u bytes for shader source.\n", source_size);
00070         return;
00071     }
00072     memcpy(tmp, source, source_size);
00073 
00074     ptr = tmp;
00075     while ((line = get_line(&ptr))) FIXME("    %s\n", line);
00076     FIXME("\n");
00077 
00078     HeapFree(GetProcessHeap(), 0, tmp);
00079 }
00080 
00081 enum arb_helper_value
00082 {
00083     ARB_ZERO,
00084     ARB_ONE,
00085     ARB_TWO,
00086     ARB_0001,
00087     ARB_EPS,
00088 
00089     ARB_VS_REL_OFFSET
00090 };
00091 
00092 static const char *arb_get_helper_value(enum wined3d_shader_type shader, enum arb_helper_value value)
00093 {
00094     if (shader == WINED3D_SHADER_TYPE_GEOMETRY)
00095     {
00096         ERR("Geometry shaders are unsupported\n");
00097         return "bad";
00098     }
00099 
00100     if (shader == WINED3D_SHADER_TYPE_PIXEL)
00101     {
00102         switch (value)
00103         {
00104             case ARB_ZERO: return "ps_helper_const.x";
00105             case ARB_ONE: return "ps_helper_const.y";
00106             case ARB_TWO: return "coefmul.x";
00107             case ARB_0001: return "ps_helper_const.xxxy";
00108             case ARB_EPS: return "ps_helper_const.z";
00109             default: break;
00110         }
00111     }
00112     else
00113     {
00114         switch (value)
00115         {
00116             case ARB_ZERO: return "helper_const.x";
00117             case ARB_ONE: return "helper_const.y";
00118             case ARB_TWO: return "helper_const.z";
00119             case ARB_EPS: return "helper_const.w";
00120             case ARB_0001: return "helper_const.xxxy";
00121             case ARB_VS_REL_OFFSET: return "rel_addr_const.y";
00122         }
00123     }
00124     FIXME("Unmanaged %s shader helper constant requested: %u\n",
00125           shader == WINED3D_SHADER_TYPE_PIXEL ? "pixel" : "vertex", value);
00126     switch (value)
00127     {
00128         case ARB_ZERO: return "0.0";
00129         case ARB_ONE: return "1.0";
00130         case ARB_TWO: return "2.0";
00131         case ARB_0001: return "{0.0, 0.0, 0.0, 1.0}";
00132         case ARB_EPS: return "1e-8";
00133         default: return "bad";
00134     }
00135 }
00136 
00137 static inline BOOL ffp_clip_emul(const struct wined3d_state *state)
00138 {
00139     return state->lowest_disabled_stage < 7;
00140 }
00141 
00142 /* ARB_program_shader private data */
00143 
00144 struct control_frame
00145 {
00146     struct                          list entry;
00147     enum
00148     {
00149         IF,
00150         IFC,
00151         LOOP,
00152         REP
00153     } type;
00154     BOOL                            muting;
00155     BOOL                            outer_loop;
00156     union
00157     {
00158         unsigned int                loop;
00159         unsigned int                ifc;
00160     } no;
00161     struct wined3d_shader_loop_control loop_control;
00162     BOOL                            had_else;
00163 };
00164 
00165 struct arb_ps_np2fixup_info
00166 {
00167     struct ps_np2fixup_info         super;
00168     /* For ARB we need a offset value:
00169      * With both GLSL and ARB mode the NP2 fixup information (the texture dimensions) are stored in a
00170      * consecutive way (GLSL uses a uniform array). Since ARB doesn't know the notion of a "standalone"
00171      * array we need an offset to the index inside the program local parameter array. */
00172     UINT                            offset;
00173 };
00174 
00175 struct arb_ps_compile_args
00176 {
00177     struct ps_compile_args          super;
00178     WORD                            bools;
00179     WORD                            clip;  /* only a boolean, use a WORD for alignment */
00180     unsigned char                   loop_ctrl[MAX_CONST_I][3];
00181 };
00182 
00183 struct stb_const_desc
00184 {
00185     unsigned char           texunit;
00186     UINT                    const_num;
00187 };
00188 
00189 struct arb_ps_compiled_shader
00190 {
00191     struct arb_ps_compile_args      args;
00192     struct arb_ps_np2fixup_info     np2fixup_info;
00193     struct stb_const_desc           bumpenvmatconst[MAX_TEXTURES];
00194     struct stb_const_desc           luminanceconst[MAX_TEXTURES];
00195     UINT                            int_consts[MAX_CONST_I];
00196     GLuint                          prgId;
00197     UINT                            ycorrection;
00198     unsigned char                   numbumpenvmatconsts;
00199     char                            num_int_consts;
00200 };
00201 
00202 struct arb_vs_compile_args
00203 {
00204     struct vs_compile_args          super;
00205     union
00206     {
00207         struct
00208         {
00209             WORD                    bools;
00210             unsigned char           clip_texcoord;
00211             unsigned char           clipplane_mask;
00212         }                           boolclip;
00213         DWORD                       boolclip_compare;
00214     } clip;
00215     DWORD                           ps_signature;
00216     union
00217     {
00218         unsigned char               samplers[4];
00219         DWORD                       samplers_compare;
00220     } vertex;
00221     unsigned char                   loop_ctrl[MAX_CONST_I][3];
00222 };
00223 
00224 struct arb_vs_compiled_shader
00225 {
00226     struct arb_vs_compile_args      args;
00227     GLuint                          prgId;
00228     UINT                            int_consts[MAX_CONST_I];
00229     char                            num_int_consts;
00230     char                            need_color_unclamp;
00231     UINT                            pos_fixup;
00232 };
00233 
00234 struct recorded_instruction
00235 {
00236     struct wined3d_shader_instruction ins;
00237     struct list entry;
00238 };
00239 
00240 struct shader_arb_ctx_priv
00241 {
00242     char addr_reg[20];
00243     enum
00244     {
00245         /* plain GL_ARB_vertex_program or GL_ARB_fragment_program */
00246         ARB,
00247         /* GL_NV_vertex_progam2_option or GL_NV_fragment_program_option */
00248         NV2,
00249         /* GL_NV_vertex_program3 or GL_NV_fragment_program2 */
00250         NV3
00251     } target_version;
00252 
00253     const struct arb_vs_compile_args    *cur_vs_args;
00254     const struct arb_ps_compile_args    *cur_ps_args;
00255     const struct arb_ps_compiled_shader *compiled_fprog;
00256     const struct arb_vs_compiled_shader *compiled_vprog;
00257     struct arb_ps_np2fixup_info         *cur_np2fixup_info;
00258     struct list                         control_frames;
00259     struct list                         record;
00260     BOOL                                recording;
00261     BOOL                                muted;
00262     unsigned int                        num_loops, loop_depth, num_ifcs;
00263     int                                 aL;
00264 
00265     unsigned int                        vs_clipplanes;
00266     BOOL                                footer_written;
00267     BOOL                                in_main_func;
00268 
00269     /* For 3.0 vertex shaders */
00270     const char                          *vs_output[MAX_REG_OUTPUT];
00271     /* For 2.x and earlier vertex shaders */
00272     const char                          *texcrd_output[8], *color_output[2], *fog_output;
00273 
00274     /* 3.0 pshader input for compatibility with fixed function */
00275     const char                          *ps_input[MAX_REG_INPUT];
00276 };
00277 
00278 struct ps_signature
00279 {
00280     struct wined3d_shader_signature_element *sig;
00281     DWORD                               idx;
00282     struct wine_rb_entry                entry;
00283 };
00284 
00285 struct arb_pshader_private {
00286     struct arb_ps_compiled_shader   *gl_shaders;
00287     UINT                            num_gl_shaders, shader_array_size;
00288     DWORD                           input_signature_idx;
00289     DWORD                           clipplane_emulation;
00290     BOOL                            clamp_consts;
00291 };
00292 
00293 struct arb_vshader_private {
00294     struct arb_vs_compiled_shader   *gl_shaders;
00295     UINT                            num_gl_shaders, shader_array_size;
00296     UINT rel_offset;
00297 };
00298 
00299 struct shader_arb_priv
00300 {
00301     GLuint                  current_vprogram_id;
00302     GLuint                  current_fprogram_id;
00303     const struct arb_ps_compiled_shader *compiled_fprog;
00304     const struct arb_vs_compiled_shader *compiled_vprog;
00305     GLuint                  depth_blt_vprogram_id;
00306     GLuint                  depth_blt_fprogram_id_full[tex_type_count];
00307     GLuint                  depth_blt_fprogram_id_masked[tex_type_count];
00308     BOOL                    use_arbfp_fixed_func;
00309     struct wine_rb_tree     fragment_shaders;
00310     BOOL                    last_ps_const_clamped;
00311     BOOL                    last_vs_color_unclamp;
00312 
00313     struct wine_rb_tree     signature_tree;
00314     DWORD ps_sig_number;
00315 
00316     unsigned int highest_dirty_ps_const, highest_dirty_vs_const;
00317     char *vshader_const_dirty, *pshader_const_dirty;
00318     const struct wined3d_context *last_context;
00319 };
00320 
00321 /* GL locking for state handlers is done by the caller. */
00322 static BOOL need_rel_addr_const(const struct arb_vshader_private *shader_data,
00323         const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info)
00324 {
00325     if (shader_data->rel_offset) return TRUE;
00326     if (!reg_maps->usesmova) return FALSE;
00327     return !gl_info->supported[NV_VERTEX_PROGRAM2_OPTION];
00328 }
00329 
00330 /* Returns TRUE if result.clip from GL_NV_vertex_program2 should be used and FALSE otherwise */
00331 static inline BOOL use_nv_clip(const struct wined3d_gl_info *gl_info)
00332 {
00333     return gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]
00334             && !(gl_info->quirks & WINED3D_QUIRK_NV_CLIP_BROKEN);
00335 }
00336 
00337 static BOOL need_helper_const(const struct arb_vshader_private *shader_data,
00338         const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info)
00339 {
00340     if (need_rel_addr_const(shader_data, reg_maps, gl_info)) return TRUE;
00341     if (!gl_info->supported[NV_VERTEX_PROGRAM]) return TRUE; /* Need to init colors. */
00342     if (gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT) return TRUE; /* Load the immval offset. */
00343     if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) return TRUE; /* Have to init texcoords. */
00344     if (!use_nv_clip(gl_info)) return TRUE; /* Init the clip texcoord */
00345     if (reg_maps->usesnrm) return TRUE; /* 0.0 */
00346     if (reg_maps->usespow) return TRUE; /* EPS, 0.0 and 1.0 */
00347     return FALSE;
00348 }
00349 
00350 static unsigned int reserved_vs_const(const struct arb_vshader_private *shader_data,
00351         const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info)
00352 {
00353     unsigned int ret = 1;
00354     /* We use one PARAM for the pos fixup, and in some cases one to load
00355      * some immediate values into the shader. */
00356     if (need_helper_const(shader_data, reg_maps, gl_info)) ++ret;
00357     if (need_rel_addr_const(shader_data, reg_maps, gl_info)) ++ret;
00358     return ret;
00359 }
00360 
00361 /* Loads floating point constants into the currently set ARB_vertex/fragment_program.
00362  * When constant_list == NULL, it will load all the constants.
00363  *
00364  * @target_type should be either GL_VERTEX_PROGRAM_ARB (for vertex shaders)
00365  *  or GL_FRAGMENT_PROGRAM_ARB (for pixel shaders)
00366  */
00367 /* GL locking is done by the caller */
00368 static unsigned int shader_arb_load_constantsF(const struct wined3d_shader *shader,
00369         const struct wined3d_gl_info *gl_info, GLuint target_type, unsigned int max_constants,
00370         const float *constants, char *dirty_consts)
00371 {
00372     struct wined3d_shader_lconst *lconst;
00373     DWORD i, j;
00374     unsigned int ret;
00375 
00376     if (TRACE_ON(d3d_constants))
00377     {
00378         for(i = 0; i < max_constants; i++) {
00379             if(!dirty_consts[i]) continue;
00380             TRACE_(d3d_constants)("Loading constants %i: %f, %f, %f, %f\n", i,
00381                         constants[i * 4 + 0], constants[i * 4 + 1],
00382                         constants[i * 4 + 2], constants[i * 4 + 3]);
00383         }
00384     }
00385 
00386     i = 0;
00387 
00388     /* In 1.X pixel shaders constants are implicitly clamped in the range [-1;1] */
00389     if (target_type == GL_FRAGMENT_PROGRAM_ARB && shader->reg_maps.shader_version.major == 1)
00390     {
00391         float lcl_const[4];
00392         /* ps 1.x supports only 8 constants, clamp only those. When switching between 1.x and higher
00393          * shaders, the first 8 constants are marked dirty for reload
00394          */
00395         for(; i < min(8, max_constants); i++) {
00396             if(!dirty_consts[i]) continue;
00397             dirty_consts[i] = 0;
00398 
00399             j = 4 * i;
00400             if (constants[j + 0] > 1.0f) lcl_const[0] = 1.0f;
00401             else if (constants[j + 0] < -1.0f) lcl_const[0] = -1.0f;
00402             else lcl_const[0] = constants[j + 0];
00403 
00404             if (constants[j + 1] > 1.0f) lcl_const[1] = 1.0f;
00405             else if (constants[j + 1] < -1.0f) lcl_const[1] = -1.0f;
00406             else lcl_const[1] = constants[j + 1];
00407 
00408             if (constants[j + 2] > 1.0f) lcl_const[2] = 1.0f;
00409             else if (constants[j + 2] < -1.0f) lcl_const[2] = -1.0f;
00410             else lcl_const[2] = constants[j + 2];
00411 
00412             if (constants[j + 3] > 1.0f) lcl_const[3] = 1.0f;
00413             else if (constants[j + 3] < -1.0f) lcl_const[3] = -1.0f;
00414             else lcl_const[3] = constants[j + 3];
00415 
00416             GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, lcl_const));
00417         }
00418 
00419         /* If further constants are dirty, reload them without clamping.
00420          *
00421          * The alternative is not to touch them, but then we cannot reset the dirty constant count
00422          * to zero. That's bad for apps that only use PS 1.x shaders, because in that case the code
00423          * above would always re-check the first 8 constants since max_constant remains at the init
00424          * value
00425          */
00426     }
00427 
00428     if (gl_info->supported[EXT_GPU_PROGRAM_PARAMETERS])
00429     {
00430         /* TODO: Benchmark if we're better of with finding the dirty constants ourselves,
00431          * or just reloading *all* constants at once
00432          *
00433         GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, i, max_constants, constants + (i * 4)));
00434          */
00435         for(; i < max_constants; i++) {
00436             if(!dirty_consts[i]) continue;
00437 
00438             /* Find the next block of dirty constants */
00439             dirty_consts[i] = 0;
00440             j = i;
00441             for(i++; (i < max_constants) && dirty_consts[i]; i++) {
00442                 dirty_consts[i] = 0;
00443             }
00444 
00445             GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, j, i - j, constants + (j * 4)));
00446         }
00447     } else {
00448         for(; i < max_constants; i++) {
00449             if(dirty_consts[i]) {
00450                 dirty_consts[i] = 0;
00451                 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, constants + (i * 4)));
00452             }
00453         }
00454     }
00455     checkGLcall("glProgramEnvParameter4fvARB()");
00456 
00457     /* Load immediate constants */
00458     if (shader->load_local_constsF)
00459     {
00460         if (TRACE_ON(d3d_shader))
00461         {
00462             LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
00463             {
00464                 GLfloat* values = (GLfloat*)lconst->value;
00465                 TRACE_(d3d_constants)("Loading local constants %i: %f, %f, %f, %f\n", lconst->idx,
00466                         values[0], values[1], values[2], values[3]);
00467             }
00468         }
00469         /* Immediate constants are clamped for 1.X shaders at loading times */
00470         ret = 0;
00471         LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
00472         {
00473             dirty_consts[lconst->idx] = 1; /* Dirtify so the non-immediate constant overwrites it next time */
00474             ret = max(ret, lconst->idx + 1);
00475             GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, lconst->idx, (GLfloat*)lconst->value));
00476         }
00477         checkGLcall("glProgramEnvParameter4fvARB()");
00478         return ret; /* The loaded immediate constants need reloading for the next shader */
00479     } else {
00480         return 0; /* No constants are dirty now */
00481     }
00482 }
00483 
00487 static void shader_arb_load_np2fixup_constants(void *shader_priv,
00488         const struct wined3d_gl_info *gl_info, const struct wined3d_state *state)
00489 {
00490     const struct shader_arb_priv * priv = shader_priv;
00491 
00492     /* NP2 texcoord fixup is (currently) only done for pixelshaders. */
00493     if (!use_ps(state)) return;
00494 
00495     if (priv->compiled_fprog && priv->compiled_fprog->np2fixup_info.super.active) {
00496         const struct arb_ps_np2fixup_info* const fixup = &priv->compiled_fprog->np2fixup_info;
00497         UINT i;
00498         WORD active = fixup->super.active;
00499         GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS];
00500 
00501         for (i = 0; active; active >>= 1, ++i)
00502         {
00503             const struct wined3d_texture *tex = state->textures[i];
00504             const unsigned char idx = fixup->super.idx[i];
00505             GLfloat *tex_dim = &np2fixup_constants[(idx >> 1) * 4];
00506 
00507             if (!(active & 1)) continue;
00508 
00509             if (!tex) {
00510                 FIXME("Nonexistent texture is flagged for NP2 texcoord fixup\n");
00511                 continue;
00512             }
00513 
00514             if (idx % 2)
00515             {
00516                 tex_dim[2] = tex->pow2_matrix[0];
00517                 tex_dim[3] = tex->pow2_matrix[5];
00518             }
00519             else
00520             {
00521                 tex_dim[0] = tex->pow2_matrix[0];
00522                 tex_dim[1] = tex->pow2_matrix[5];
00523             }
00524         }
00525 
00526         for (i = 0; i < fixup->super.num_consts; ++i) {
00527             GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
00528                                                    fixup->offset + i, &np2fixup_constants[i * 4]));
00529         }
00530     }
00531 }
00532 
00533 /* GL locking is done by the caller. */
00534 static void shader_arb_ps_local_constants(const struct arb_ps_compiled_shader *gl_shader,
00535         const struct wined3d_context *context, const struct wined3d_state *state, UINT rt_height)
00536 {
00537     const struct wined3d_gl_info *gl_info = context->gl_info;
00538     unsigned char i;
00539 
00540     for(i = 0; i < gl_shader->numbumpenvmatconsts; i++)
00541     {
00542         int texunit = gl_shader->bumpenvmatconst[i].texunit;
00543 
00544         /* The state manager takes care that this function is always called if the bump env matrix changes */
00545         const float *data = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_MAT00];
00546         GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
00547                 gl_shader->bumpenvmatconst[i].const_num, data));
00548 
00549         if (gl_shader->luminanceconst[i].const_num != WINED3D_CONST_NUM_UNUSED)
00550         {
00551             /* WINED3D_TSS_BUMPENVLSCALE and WINED3D_TSS_BUMPENVLOFFSET are next to each other.
00552              * point gl to the scale, and load 4 floats. x = scale, y = offset, z and w are junk, we
00553              * don't care about them. The pointers are valid for sure because the stateblock is bigger.
00554              * (they're WINED3D_TSS_TEXTURETRANSFORMFLAGS and WINED3D_TSS_ADDRESSW, so most likely 0 or NaN
00555             */
00556             const float *scale = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_LSCALE];
00557             GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
00558                     gl_shader->luminanceconst[i].const_num, scale));
00559         }
00560     }
00561     checkGLcall("Load bumpmap consts");
00562 
00563     if(gl_shader->ycorrection != WINED3D_CONST_NUM_UNUSED)
00564     {
00565         /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen).
00566         * ycorrection.y: -1.0(onscreen), 1.0(offscreen)
00567         * ycorrection.z: 1.0
00568         * ycorrection.w: 0.0
00569         */
00570         float val[4];
00571         val[0] = context->render_offscreen ? 0.0f : (float) rt_height;
00572         val[1] = context->render_offscreen ? 1.0f : -1.0f;
00573         val[2] = 1.0f;
00574         val[3] = 0.0f;
00575         GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->ycorrection, val));
00576         checkGLcall("y correction loading");
00577     }
00578 
00579     if (!gl_shader->num_int_consts) return;
00580 
00581     for(i = 0; i < MAX_CONST_I; i++)
00582     {
00583         if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED)
00584         {
00585             float val[4];
00586             val[0] = (float)state->ps_consts_i[4 * i];
00587             val[1] = (float)state->ps_consts_i[4 * i + 1];
00588             val[2] = (float)state->ps_consts_i[4 * i + 2];
00589             val[3] = -1.0f;
00590 
00591             GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->int_consts[i], val));
00592         }
00593     }
00594     checkGLcall("Load ps int consts");
00595 }
00596 
00597 /* GL locking is done by the caller. */
00598 static void shader_arb_vs_local_constants(const struct arb_vs_compiled_shader *gl_shader,
00599         const struct wined3d_context *context, const struct wined3d_state *state)
00600 {
00601     const struct wined3d_gl_info *gl_info = context->gl_info;
00602     float position_fixup[4];
00603     unsigned char i;
00604 
00605     /* Upload the position fixup */
00606     shader_get_position_fixup(context, state, position_fixup);
00607     GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->pos_fixup, position_fixup));
00608 
00609     if (!gl_shader->num_int_consts) return;
00610 
00611     for(i = 0; i < MAX_CONST_I; i++)
00612     {
00613         if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED)
00614         {
00615             float val[4];
00616             val[0] = (float)state->vs_consts_i[4 * i];
00617             val[1] = (float)state->vs_consts_i[4 * i + 1];
00618             val[2] = (float)state->vs_consts_i[4 * i + 2];
00619             val[3] = -1.0f;
00620 
00621             GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->int_consts[i], val));
00622         }
00623     }
00624     checkGLcall("Load vs int consts");
00625 }
00626 
00633 /* GL locking is done by the caller (state handler) */
00634 static void shader_arb_load_constants(const struct wined3d_context *context, char usePixelShader, char useVertexShader)
00635 {
00636     struct wined3d_device *device = context->swapchain->device;
00637     const struct wined3d_state *state = &device->stateBlock->state;
00638     const struct wined3d_gl_info *gl_info = context->gl_info;
00639     struct shader_arb_priv *priv = device->shader_priv;
00640 
00641     if (context != priv->last_context)
00642     {
00643         memset(priv->vshader_const_dirty, 1,
00644                 sizeof(*priv->vshader_const_dirty) * device->d3d_vshader_constantF);
00645         priv->highest_dirty_vs_const = device->d3d_vshader_constantF;
00646 
00647         memset(priv->pshader_const_dirty, 1,
00648                 sizeof(*priv->pshader_const_dirty) * device->d3d_pshader_constantF);
00649         priv->highest_dirty_ps_const = device->d3d_pshader_constantF;
00650 
00651         priv->last_context = context;
00652     }
00653 
00654     if (useVertexShader)
00655     {
00656         struct wined3d_shader *vshader = state->vertex_shader;
00657         const struct arb_vs_compiled_shader *gl_shader = priv->compiled_vprog;
00658 
00659         /* Load DirectX 9 float constants for vertex shader */
00660         priv->highest_dirty_vs_const = shader_arb_load_constantsF(vshader, gl_info, GL_VERTEX_PROGRAM_ARB,
00661                 priv->highest_dirty_vs_const, state->vs_consts_f, priv->vshader_const_dirty);
00662         shader_arb_vs_local_constants(gl_shader, context, state);
00663     }
00664 
00665     if (usePixelShader)
00666     {
00667         struct wined3d_shader *pshader = state->pixel_shader;
00668         const struct arb_ps_compiled_shader *gl_shader = priv->compiled_fprog;
00669         UINT rt_height = state->fb->render_targets[0]->resource.height;
00670 
00671         /* Load DirectX 9 float constants for pixel shader */
00672         priv->highest_dirty_ps_const = shader_arb_load_constantsF(pshader, gl_info, GL_FRAGMENT_PROGRAM_ARB,
00673                 priv->highest_dirty_ps_const, state->ps_consts_f, priv->pshader_const_dirty);
00674         shader_arb_ps_local_constants(gl_shader, context, state, rt_height);
00675     }
00676 }
00677 
00678 static void shader_arb_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count)
00679 {
00680     struct wined3d_context *context = context_get_current();
00681     struct shader_arb_priv *priv = device->shader_priv;
00682 
00683     /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active
00684      * context. On a context switch the old context will be fully dirtified */
00685     if (!context || context->swapchain->device != device) return;
00686 
00687     memset(priv->vshader_const_dirty + start, 1, sizeof(*priv->vshader_const_dirty) * count);
00688     priv->highest_dirty_vs_const = max(priv->highest_dirty_vs_const, start + count);
00689 }
00690 
00691 static void shader_arb_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count)
00692 {
00693     struct wined3d_context *context = context_get_current();
00694     struct shader_arb_priv *priv = device->shader_priv;
00695 
00696     /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active
00697      * context. On a context switch the old context will be fully dirtified */
00698     if (!context || context->swapchain->device != device) return;
00699 
00700     memset(priv->pshader_const_dirty + start, 1, sizeof(*priv->pshader_const_dirty) * count);
00701     priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, start + count);
00702 }
00703 
00704 static DWORD *local_const_mapping(const struct wined3d_shader *shader)
00705 {
00706     const struct wined3d_shader_lconst *lconst;
00707     DWORD *ret;
00708     DWORD idx = 0;
00709 
00710     if (shader->load_local_constsF || list_empty(&shader->constantsF))
00711         return NULL;
00712 
00713     ret = HeapAlloc(GetProcessHeap(), 0, sizeof(DWORD) * shader->limits.constant_float);
00714     if (!ret)
00715     {
00716         ERR("Out of memory\n");
00717         return NULL;
00718     }
00719 
00720     LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
00721     {
00722         ret[lconst->idx] = idx++;
00723     }
00724     return ret;
00725 }
00726 
00727 /* Generate the variable & register declarations for the ARB_vertex_program output target */
00728 static DWORD shader_generate_arb_declarations(const struct wined3d_shader *shader,
00729         const struct wined3d_shader_reg_maps *reg_maps, struct wined3d_shader_buffer *buffer,
00730         const struct wined3d_gl_info *gl_info, const DWORD *lconst_map,
00731         DWORD *num_clipplanes, const struct shader_arb_ctx_priv *ctx)
00732 {
00733     DWORD i, next_local = 0;
00734     char pshader = shader_is_pshader_version(reg_maps->shader_version.type);
00735     const struct wined3d_shader_lconst *lconst;
00736     unsigned max_constantsF;
00737     DWORD map;
00738 
00739     /* In pixel shaders, all private constants are program local, we don't need anything
00740      * from program.env. Thus we can advertise the full set of constants in pixel shaders.
00741      * If we need a private constant the GL implementation will squeeze it in somewhere
00742      *
00743      * With vertex shaders we need the posFixup and on some GL implementations 4 helper
00744      * immediate values. The posFixup is loaded using program.env for now, so always
00745      * subtract one from the number of constants. If the shader uses indirect addressing,
00746      * account for the helper const too because we have to declare all available d3d constants
00747      * and don't know which are actually used.
00748      */
00749     if (pshader)
00750     {
00751         max_constantsF = gl_info->limits.arb_ps_native_constants;
00752         /* 24 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value. */
00753         if (max_constantsF < 24)
00754             max_constantsF = gl_info->limits.arb_ps_float_constants;
00755     }
00756     else
00757     {
00758         const struct arb_vshader_private *shader_data = shader->backend_data;
00759         max_constantsF = gl_info->limits.arb_vs_native_constants;
00760         /* 96 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value.
00761          * Also prevents max_constantsF from becoming less than 0 and
00762          * wrapping . */
00763         if (max_constantsF < 96)
00764             max_constantsF = gl_info->limits.arb_vs_float_constants;
00765 
00766         if (reg_maps->usesrelconstF)
00767         {
00768             DWORD highest_constf = 0, clip_limit;
00769 
00770             max_constantsF -= reserved_vs_const(shader_data, reg_maps, gl_info);
00771             max_constantsF -= count_bits(reg_maps->integer_constants);
00772 
00773             for (i = 0; i < shader->limits.constant_float; ++i)
00774             {
00775                 DWORD idx = i >> 5;
00776                 DWORD shift = i & 0x1f;
00777                 if(reg_maps->constf[idx] & (1 << shift)) highest_constf = i;
00778             }
00779 
00780             if(use_nv_clip(gl_info) && ctx->target_version >= NV2)
00781             {
00782                 if(ctx->cur_vs_args->super.clip_enabled)
00783                     clip_limit = gl_info->limits.clipplanes;
00784                 else
00785                     clip_limit = 0;
00786             }
00787             else
00788             {
00789                 unsigned int mask = ctx->cur_vs_args->clip.boolclip.clipplane_mask;
00790                 clip_limit = min(count_bits(mask), 4);
00791             }
00792             *num_clipplanes = min(clip_limit, max_constantsF - highest_constf - 1);
00793             max_constantsF -= *num_clipplanes;
00794             if(*num_clipplanes < clip_limit)
00795             {
00796                 WARN("Only %u clipplanes out of %u enabled\n", *num_clipplanes, gl_info->limits.clipplanes);
00797             }
00798         }
00799         else
00800         {
00801             if (ctx->target_version >= NV2) *num_clipplanes = gl_info->limits.clipplanes;
00802             else *num_clipplanes = min(gl_info->limits.clipplanes, 4);
00803         }
00804     }
00805 
00806     for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
00807     {
00808         if (map & 1) shader_addline(buffer, "TEMP R%u;\n", i);
00809     }
00810 
00811     for (i = 0, map = reg_maps->address; map; map >>= 1, ++i)
00812     {
00813         if (map & 1) shader_addline(buffer, "ADDRESS A%u;\n", i);
00814     }
00815 
00816     if (pshader && reg_maps->shader_version.major == 1 && reg_maps->shader_version.minor <= 3)
00817     {
00818         for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i)
00819         {
00820             if (map & 1) shader_addline(buffer, "TEMP T%u;\n", i);
00821         }
00822     }
00823 
00824     /* Load local constants using the program-local space,
00825      * this avoids reloading them each time the shader is used
00826      */
00827     if (lconst_map)
00828     {
00829         LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
00830         {
00831             shader_addline(buffer, "PARAM C%u = program.local[%u];\n", lconst->idx,
00832                            lconst_map[lconst->idx]);
00833             next_local = max(next_local, lconst_map[lconst->idx] + 1);
00834         }
00835     }
00836 
00837     /* After subtracting privately used constants from the hardware limit(they are loaded as
00838      * local constants), make sure the shader doesn't violate the env constant limit
00839      */
00840     if(pshader)
00841     {
00842         max_constantsF = min(max_constantsF, gl_info->limits.arb_ps_float_constants);
00843     }
00844     else
00845     {
00846         max_constantsF = min(max_constantsF, gl_info->limits.arb_vs_float_constants);
00847     }
00848 
00849     /* Avoid declaring more constants than needed */
00850     max_constantsF = min(max_constantsF, shader->limits.constant_float);
00851 
00852     /* we use the array-based constants array if the local constants are marked for loading,
00853      * because then we use indirect addressing, or when the local constant list is empty,
00854      * because then we don't know if we're using indirect addressing or not. If we're hardcoding
00855      * local constants do not declare the loaded constants as an array because ARB compilers usually
00856      * do not optimize unused constants away
00857      */
00858     if (reg_maps->usesrelconstF)
00859     {
00860         /* Need to PARAM the environment parameters (constants) so we can use relative addressing */
00861         shader_addline(buffer, "PARAM C[%d] = { program.env[0..%d] };\n",
00862                     max_constantsF, max_constantsF - 1);
00863     } else {
00864         for(i = 0; i < max_constantsF; i++) {
00865             DWORD idx, mask;
00866             idx = i >> 5;
00867             mask = 1 << (i & 0x1f);
00868             if (!shader_constant_is_local(shader, i) && (reg_maps->constf[idx] & mask))
00869             {
00870                 shader_addline(buffer, "PARAM C%d = program.env[%d];\n",i, i);
00871             }
00872         }
00873     }
00874 
00875     return next_local;
00876 }
00877 
00878 static const char * const shift_tab[] = {
00879     "dummy",     /*  0 (none) */
00880     "coefmul.x", /*  1 (x2)   */
00881     "coefmul.y", /*  2 (x4)   */
00882     "coefmul.z", /*  3 (x8)   */
00883     "coefmul.w", /*  4 (x16)  */
00884     "dummy",     /*  5 (x32)  */
00885     "dummy",     /*  6 (x64)  */
00886     "dummy",     /*  7 (x128) */
00887     "dummy",     /*  8 (d256) */
00888     "dummy",     /*  9 (d128) */
00889     "dummy",     /* 10 (d64)  */
00890     "dummy",     /* 11 (d32)  */
00891     "coefdiv.w", /* 12 (d16)  */
00892     "coefdiv.z", /* 13 (d8)   */
00893     "coefdiv.y", /* 14 (d4)   */
00894     "coefdiv.x"  /* 15 (d2)   */
00895 };
00896 
00897 static void shader_arb_get_write_mask(const struct wined3d_shader_instruction *ins,
00898         const struct wined3d_shader_dst_param *dst, char *write_mask)
00899 {
00900     char *ptr = write_mask;
00901 
00902     if (dst->write_mask != WINED3DSP_WRITEMASK_ALL)
00903     {
00904         *ptr++ = '.';
00905         if (dst->write_mask & WINED3DSP_WRITEMASK_0) *ptr++ = 'x';
00906         if (dst->write_mask & WINED3DSP_WRITEMASK_1) *ptr++ = 'y';
00907         if (dst->write_mask & WINED3DSP_WRITEMASK_2) *ptr++ = 'z';
00908         if (dst->write_mask & WINED3DSP_WRITEMASK_3) *ptr++ = 'w';
00909     }
00910 
00911     *ptr = '\0';
00912 }
00913 
00914 static void shader_arb_get_swizzle(const struct wined3d_shader_src_param *param, BOOL fixup, char *swizzle_str)
00915 {
00916     /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
00917      * but addressed as "rgba". To fix this we need to swap the register's x
00918      * and z components. */
00919     const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
00920     char *ptr = swizzle_str;
00921 
00922     /* swizzle bits fields: wwzzyyxx */
00923     DWORD swizzle = param->swizzle;
00924     DWORD swizzle_x = swizzle & 0x03;
00925     DWORD swizzle_y = (swizzle >> 2) & 0x03;
00926     DWORD swizzle_z = (swizzle >> 4) & 0x03;
00927     DWORD swizzle_w = (swizzle >> 6) & 0x03;
00928 
00929     /* If the swizzle is the default swizzle (ie, "xyzw"), we don't need to
00930      * generate a swizzle string. Unless we need to our own swizzling. */
00931     if (swizzle != WINED3DSP_NOSWIZZLE || fixup)
00932     {
00933         *ptr++ = '.';
00934         if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) {
00935             *ptr++ = swizzle_chars[swizzle_x];
00936         } else {
00937             *ptr++ = swizzle_chars[swizzle_x];
00938             *ptr++ = swizzle_chars[swizzle_y];
00939             *ptr++ = swizzle_chars[swizzle_z];
00940             *ptr++ = swizzle_chars[swizzle_w];
00941         }
00942     }
00943 
00944     *ptr = '\0';
00945 }
00946 
00947 static void shader_arb_request_a0(const struct wined3d_shader_instruction *ins, const char *src)
00948 {
00949     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
00950     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
00951 
00952     if (!strcmp(priv->addr_reg, src)) return;
00953 
00954     strcpy(priv->addr_reg, src);
00955     shader_addline(buffer, "ARL A0.x, %s;\n", src);
00956 }
00957 
00958 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins,
00959         const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr);
00960 
00961 static void shader_arb_get_register_name(const struct wined3d_shader_instruction *ins,
00962         const struct wined3d_shader_register *reg, char *register_name, BOOL *is_color)
00963 {
00964     /* oPos, oFog and oPts in D3D */
00965     static const char * const rastout_reg_names[] = {"TMP_OUT", "result.fogcoord", "result.pointsize"};
00966     const struct wined3d_shader *shader = ins->ctx->shader;
00967     const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
00968     BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type);
00969     struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
00970 
00971     *is_color = FALSE;
00972 
00973     switch (reg->type)
00974     {
00975         case WINED3DSPR_TEMP:
00976             sprintf(register_name, "R%u", reg->idx);
00977             break;
00978 
00979         case WINED3DSPR_INPUT:
00980             if (pshader)
00981             {
00982                 if (reg_maps->shader_version.major < 3)
00983                 {
00984                     if (!reg->idx) strcpy(register_name, "fragment.color.primary");
00985                     else strcpy(register_name, "fragment.color.secondary");
00986                 }
00987                 else
00988                 {
00989                     if(reg->rel_addr)
00990                     {
00991                         char rel_reg[50];
00992                         shader_arb_get_src_param(ins, reg->rel_addr, 0, rel_reg);
00993 
00994                         if (!strcmp(rel_reg, "**aL_emul**"))
00995                         {
00996                             DWORD idx = ctx->aL + reg->idx;
00997                             if(idx < MAX_REG_INPUT)
00998                             {
00999                                 strcpy(register_name, ctx->ps_input[idx]);
01000                             }
01001                             else
01002                             {
01003                                 ERR("Pixel shader input register out of bounds: %u\n", idx);
01004                                 sprintf(register_name, "out_of_bounds_%u", idx);
01005                             }
01006                         }
01007                         else if (reg_maps->input_registers & 0x0300)
01008                         {
01009                             /* There are two ways basically:
01010                              *
01011                              * 1) Use the unrolling code that is used for loop emulation and unroll the loop.
01012                              *    That means trouble if the loop also contains a breakc or if the control values
01013                              *    aren't local constants.
01014                              * 2) Generate an if block that checks if aL.y < 8, == 8 or == 9 and selects the
01015                              *    source dynamically. The trouble is that we cannot simply read aL.y because it
01016                              *    is an ADDRESS register. We could however push it, load .zw with a value and use
01017                              *    ADAC to load the condition code register and pop it again afterwards
01018                              */
01019                             FIXME("Relative input register addressing with more than 8 registers\n");
01020 
01021                             /* This is better than nothing for now */
01022                             sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx);
01023                         }
01024                         else if(ctx->cur_ps_args->super.vp_mode != vertexshader)
01025                         {
01026                             /* This is problematic because we'd have to consult the ctx->ps_input strings
01027                              * for where to find the varying. Some may be "0.0", others can be texcoords or
01028                              * colors. This needs either a pipeline replacement to make the vertex shader feed
01029                              * proper varyings, or loop unrolling
01030                              *
01031                              * For now use the texcoords and hope for the best
01032                              */
01033                             FIXME("Non-vertex shader varying input with indirect addressing\n");
01034                             sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx);
01035                         }
01036                         else
01037                         {
01038                             /* D3D supports indirect addressing only with aL in loop registers. The loop instruction
01039                              * pulls GL_NV_fragment_program2 in
01040                              */
01041                             sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx);
01042                         }
01043                     }
01044                     else
01045                     {
01046                         if(reg->idx < MAX_REG_INPUT)
01047                         {
01048                             strcpy(register_name, ctx->ps_input[reg->idx]);
01049                         }
01050                         else
01051                         {
01052                             ERR("Pixel shader input register out of bounds: %u\n", reg->idx);
01053                             sprintf(register_name, "out_of_bounds_%u", reg->idx);
01054                         }
01055                     }
01056                 }
01057             }
01058             else
01059             {
01060                 if (ctx->cur_vs_args->super.swizzle_map & (1 << reg->idx)) *is_color = TRUE;
01061                 sprintf(register_name, "vertex.attrib[%u]", reg->idx);
01062             }
01063             break;
01064 
01065         case WINED3DSPR_CONST:
01066             if (!pshader && reg->rel_addr)
01067             {
01068                 const struct arb_vshader_private *shader_data = shader->backend_data;
01069                 UINT rel_offset = shader_data->rel_offset;
01070                 BOOL aL = FALSE;
01071                 char rel_reg[50];
01072                 if (reg_maps->shader_version.major < 2)
01073                 {
01074                     sprintf(rel_reg, "A0.x");
01075                 } else {
01076                     shader_arb_get_src_param(ins, reg->rel_addr, 0, rel_reg);
01077                     if(ctx->target_version == ARB) {
01078                         if (!strcmp(rel_reg, "**aL_emul**"))
01079                         {
01080                             aL = TRUE;
01081                         } else {
01082                             shader_arb_request_a0(ins, rel_reg);
01083                             sprintf(rel_reg, "A0.x");
01084                         }
01085                     }
01086                 }
01087                 if(aL)
01088                     sprintf(register_name, "C[%u]", ctx->aL + reg->idx);
01089                 else if (reg->idx >= rel_offset)
01090                     sprintf(register_name, "C[%s + %u]", rel_reg, reg->idx - rel_offset);
01091                 else
01092                     sprintf(register_name, "C[%s - %u]", rel_reg, rel_offset - reg->idx);
01093             }
01094             else
01095             {
01096                 if (reg_maps->usesrelconstF)
01097                     sprintf(register_name, "C[%u]", reg->idx);
01098                 else
01099                     sprintf(register_name, "C%u", reg->idx);
01100             }
01101             break;
01102 
01103         case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
01104             if (pshader)
01105             {
01106                 if (reg_maps->shader_version.major == 1
01107                         && reg_maps->shader_version.minor <= 3)
01108                 {
01109                     /* In ps <= 1.3, Tx is a temporary register as destination to all instructions,
01110                      * and as source to most instructions. For some instructions it is the texcoord
01111                      * input. Those instructions know about the special use
01112                      */
01113                     sprintf(register_name, "T%u", reg->idx);
01114                 } else {
01115                     /* in ps 1.4 and 2.x Tx is always a (read-only) varying */
01116                     sprintf(register_name, "fragment.texcoord[%u]", reg->idx);
01117                 }
01118             }
01119             else
01120             {
01121                 if (reg_maps->shader_version.major == 1 || ctx->target_version >= NV2)
01122                 {
01123                     sprintf(register_name, "A%u", reg->idx);
01124                 }
01125                 else
01126                 {
01127                     sprintf(register_name, "A%u_SHADOW", reg->idx);
01128                 }
01129             }
01130             break;
01131 
01132         case WINED3DSPR_COLOROUT:
01133             if (ctx->cur_ps_args->super.srgb_correction && !reg->idx)
01134             {
01135                 strcpy(register_name, "TMP_COLOR");
01136             }
01137             else
01138             {
01139                 if(ctx->cur_ps_args->super.srgb_correction) FIXME("sRGB correction on higher render targets\n");
01140                 if (reg_maps->rt_mask > 1)
01141                 {
01142                     sprintf(register_name, "result.color[%u]", reg->idx);
01143                 }
01144                 else
01145                 {
01146                     strcpy(register_name, "result.color");
01147                 }
01148             }
01149             break;
01150 
01151         case WINED3DSPR_RASTOUT:
01152             if(reg->idx == 1) sprintf(register_name, "%s", ctx->fog_output);
01153             else sprintf(register_name, "%s", rastout_reg_names[reg->idx]);
01154             break;
01155 
01156         case WINED3DSPR_DEPTHOUT:
01157             strcpy(register_name, "result.depth");
01158             break;
01159 
01160         case WINED3DSPR_ATTROUT:
01161         /* case WINED3DSPR_OUTPUT: */
01162             if (pshader) sprintf(register_name, "oD[%u]", reg->idx);
01163             else strcpy(register_name, ctx->color_output[reg->idx]);
01164             break;
01165 
01166         case WINED3DSPR_TEXCRDOUT:
01167             if (pshader)
01168             {
01169                 sprintf(register_name, "oT[%u]", reg->idx);
01170             }
01171             else
01172             {
01173                 if (reg_maps->shader_version.major < 3)
01174                 {
01175                     strcpy(register_name, ctx->texcrd_output[reg->idx]);
01176                 }
01177                 else
01178                 {
01179                     strcpy(register_name, ctx->vs_output[reg->idx]);
01180                 }
01181             }
01182             break;
01183 
01184         case WINED3DSPR_LOOP:
01185             if(ctx->target_version >= NV2)
01186             {
01187                 /* Pshader has an implicitly declared loop index counter A0.x that cannot be renamed */
01188                 if(pshader) sprintf(register_name, "A0.x");
01189                 else sprintf(register_name, "aL.y");
01190             }
01191             else
01192             {
01193                 /* Unfortunately this code cannot return the value of ctx->aL here. An immediate value
01194                  * would be valid, but if aL is used for indexing(its only use), there's likely an offset,
01195                  * thus the result would be something like C[15 + 30], which is not valid in the ARB program
01196                  * grammar. So return a marker for the emulated aL and intercept it in constant and varying
01197                  * indexing
01198                  */
01199                 sprintf(register_name, "**aL_emul**");
01200             }
01201 
01202             break;
01203 
01204         case WINED3DSPR_CONSTINT:
01205             sprintf(register_name, "I%u", reg->idx);
01206             break;
01207 
01208         case WINED3DSPR_MISCTYPE:
01209             if (!reg->idx)
01210             {
01211                 sprintf(register_name, "vpos");
01212             }
01213             else if(reg->idx == 1)
01214             {
01215                 sprintf(register_name, "fragment.facing.x");
01216             }
01217             else
01218             {
01219                 FIXME("Unknown MISCTYPE register index %u\n", reg->idx);
01220             }
01221             break;
01222 
01223         default:
01224             FIXME("Unhandled register type %#x[%u]\n", reg->type, reg->idx);
01225             sprintf(register_name, "unrecognized_register[%u]", reg->idx);
01226             break;
01227     }
01228 }
01229 
01230 static void shader_arb_get_dst_param(const struct wined3d_shader_instruction *ins,
01231         const struct wined3d_shader_dst_param *wined3d_dst, char *str)
01232 {
01233     char register_name[255];
01234     char write_mask[6];
01235     BOOL is_color;
01236 
01237     shader_arb_get_register_name(ins, &wined3d_dst->reg, register_name, &is_color);
01238     strcpy(str, register_name);
01239 
01240     shader_arb_get_write_mask(ins, wined3d_dst, write_mask);
01241     strcat(str, write_mask);
01242 }
01243 
01244 static const char *shader_arb_get_fixup_swizzle(enum fixup_channel_source channel_source)
01245 {
01246     switch(channel_source)
01247     {
01248         case CHANNEL_SOURCE_ZERO: return "0";
01249         case CHANNEL_SOURCE_ONE: return "1";
01250         case CHANNEL_SOURCE_X: return "x";
01251         case CHANNEL_SOURCE_Y: return "y";
01252         case CHANNEL_SOURCE_Z: return "z";
01253         case CHANNEL_SOURCE_W: return "w";
01254         default:
01255             FIXME("Unhandled channel source %#x\n", channel_source);
01256             return "undefined";
01257     }
01258 }
01259 
01260 static void gen_color_correction(struct wined3d_shader_buffer *buffer, const char *reg,
01261         DWORD dst_mask, const char *one, const char *two, struct color_fixup_desc fixup)
01262 {
01263     DWORD mask;
01264 
01265     if (is_complex_fixup(fixup))
01266     {
01267         enum complex_fixup complex_fixup = get_complex_fixup(fixup);
01268         FIXME("Complex fixup (%#x) not supported\n", complex_fixup);
01269         return;
01270     }
01271 
01272     mask = 0;
01273     if (fixup.x_source != CHANNEL_SOURCE_X) mask |= WINED3DSP_WRITEMASK_0;
01274     if (fixup.y_source != CHANNEL_SOURCE_Y) mask |= WINED3DSP_WRITEMASK_1;
01275     if (fixup.z_source != CHANNEL_SOURCE_Z) mask |= WINED3DSP_WRITEMASK_2;
01276     if (fixup.w_source != CHANNEL_SOURCE_W) mask |= WINED3DSP_WRITEMASK_3;
01277     mask &= dst_mask;
01278 
01279     if (mask)
01280     {
01281         shader_addline(buffer, "SWZ %s, %s, %s, %s, %s, %s;\n", reg, reg,
01282                 shader_arb_get_fixup_swizzle(fixup.x_source), shader_arb_get_fixup_swizzle(fixup.y_source),
01283                 shader_arb_get_fixup_swizzle(fixup.z_source), shader_arb_get_fixup_swizzle(fixup.w_source));
01284     }
01285 
01286     mask = 0;
01287     if (fixup.x_sign_fixup) mask |= WINED3DSP_WRITEMASK_0;
01288     if (fixup.y_sign_fixup) mask |= WINED3DSP_WRITEMASK_1;
01289     if (fixup.z_sign_fixup) mask |= WINED3DSP_WRITEMASK_2;
01290     if (fixup.w_sign_fixup) mask |= WINED3DSP_WRITEMASK_3;
01291     mask &= dst_mask;
01292 
01293     if (mask)
01294     {
01295         char reg_mask[6];
01296         char *ptr = reg_mask;
01297 
01298         if (mask != WINED3DSP_WRITEMASK_ALL)
01299         {
01300             *ptr++ = '.';
01301             if (mask & WINED3DSP_WRITEMASK_0) *ptr++ = 'x';
01302             if (mask & WINED3DSP_WRITEMASK_1) *ptr++ = 'y';
01303             if (mask & WINED3DSP_WRITEMASK_2) *ptr++ = 'z';
01304             if (mask & WINED3DSP_WRITEMASK_3) *ptr++ = 'w';
01305         }
01306         *ptr = '\0';
01307 
01308         shader_addline(buffer, "MAD %s%s, %s, %s, -%s;\n", reg, reg_mask, reg, two, one);
01309     }
01310 }
01311 
01312 static const char *shader_arb_get_modifier(const struct wined3d_shader_instruction *ins)
01313 {
01314     DWORD mod;
01315     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
01316     if (!ins->dst_count) return "";
01317 
01318     mod = ins->dst[0].modifiers;
01319 
01320     /* Silently ignore PARTIALPRECISION if its not supported */
01321     if(priv->target_version == ARB) mod &= ~WINED3DSPDM_PARTIALPRECISION;
01322 
01323     if(mod & WINED3DSPDM_MSAMPCENTROID)
01324     {
01325         FIXME("Unhandled modifier WINED3DSPDM_MSAMPCENTROID\n");
01326         mod &= ~WINED3DSPDM_MSAMPCENTROID;
01327     }
01328 
01329     switch(mod)
01330     {
01331         case WINED3DSPDM_SATURATE | WINED3DSPDM_PARTIALPRECISION:
01332             return "H_SAT";
01333 
01334         case WINED3DSPDM_SATURATE:
01335             return "_SAT";
01336 
01337         case WINED3DSPDM_PARTIALPRECISION:
01338             return "H";
01339 
01340         case 0:
01341             return "";
01342 
01343         default:
01344             FIXME("Unknown modifiers 0x%08x\n", mod);
01345             return "";
01346     }
01347 }
01348 
01349 #define TEX_PROJ        0x1
01350 #define TEX_BIAS        0x2
01351 #define TEX_LOD         0x4
01352 #define TEX_DERIV       0x10
01353 
01354 static void shader_hw_sample(const struct wined3d_shader_instruction *ins, DWORD sampler_idx,
01355         const char *dst_str, const char *coord_reg, WORD flags, const char *dsx, const char *dsy)
01356 {
01357     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
01358     DWORD sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
01359     const struct wined3d_shader *shader = ins->ctx->shader;
01360     const struct wined3d_texture *texture;
01361     const char *tex_type;
01362     BOOL np2_fixup = FALSE;
01363     struct wined3d_device *device = shader->device;
01364     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
01365     const char *mod;
01366     BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type);
01367 
01368     /* D3D vertex shader sampler IDs are vertex samplers(0-3), not global d3d samplers */
01369     if(!pshader) sampler_idx += MAX_FRAGMENT_SAMPLERS;
01370 
01371     switch(sampler_type) {
01372         case WINED3DSTT_1D:
01373             tex_type = "1D";
01374             break;
01375 
01376         case WINED3DSTT_2D:
01377             texture = device->stateBlock->state.textures[sampler_idx];
01378             if (texture && texture->target == GL_TEXTURE_RECTANGLE_ARB)
01379             {
01380                 tex_type = "RECT";
01381             } else {
01382                 tex_type = "2D";
01383             }
01384             if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
01385             {
01386                 if (priv->cur_np2fixup_info->super.active & (1 << sampler_idx))
01387                 {
01388                     if (flags) FIXME("Only ordinary sampling from NP2 textures is supported.\n");
01389                     else np2_fixup = TRUE;
01390                 }
01391             }
01392             break;
01393 
01394         case WINED3DSTT_VOLUME:
01395             tex_type = "3D";
01396             break;
01397 
01398         case WINED3DSTT_CUBE:
01399             tex_type = "CUBE";
01400             break;
01401 
01402         default:
01403             ERR("Unexpected texture type %d\n", sampler_type);
01404             tex_type = "";
01405     }
01406 
01407     /* TEX, TXL, TXD and TXP do not support the "H" modifier,
01408      * so don't use shader_arb_get_modifier
01409      */
01410     if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) mod = "_SAT";
01411     else mod = "";
01412 
01413     /* Fragment samplers always have indentity mapping */
01414     if(sampler_idx >= MAX_FRAGMENT_SAMPLERS)
01415     {
01416         sampler_idx = priv->cur_vs_args->vertex.samplers[sampler_idx - MAX_FRAGMENT_SAMPLERS];
01417     }
01418 
01419     if (flags & TEX_DERIV)
01420     {
01421         if(flags & TEX_PROJ) FIXME("Projected texture sampling with custom derivatives\n");
01422         if(flags & TEX_BIAS) FIXME("Biased texture sampling with custom derivatives\n");
01423         shader_addline(buffer, "TXD%s %s, %s, %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg,
01424                        dsx, dsy,sampler_idx, tex_type);
01425     }
01426     else if(flags & TEX_LOD)
01427     {
01428         if(flags & TEX_PROJ) FIXME("Projected texture sampling with explicit lod\n");
01429         if(flags & TEX_BIAS) FIXME("Biased texture sampling with explicit lod\n");
01430         shader_addline(buffer, "TXL%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg,
01431                        sampler_idx, tex_type);
01432     }
01433     else if (flags & TEX_BIAS)
01434     {
01435         /* Shouldn't be possible, but let's check for it */
01436         if(flags & TEX_PROJ) FIXME("Biased and Projected texture sampling\n");
01437         /* TXB takes the 4th component of the source vector automatically, as d3d. Nothing more to do */
01438         shader_addline(buffer, "TXB%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, sampler_idx, tex_type);
01439     }
01440     else if (flags & TEX_PROJ)
01441     {
01442         shader_addline(buffer, "TXP%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, sampler_idx, tex_type);
01443     }
01444     else
01445     {
01446         if (np2_fixup)
01447         {
01448             const unsigned char idx = priv->cur_np2fixup_info->super.idx[sampler_idx];
01449             shader_addline(buffer, "MUL TA, np2fixup[%u].%s, %s;\n", idx >> 1,
01450                            (idx % 2) ? "zwxy" : "xyzw", coord_reg);
01451 
01452             shader_addline(buffer, "TEX%s %s, TA, texture[%u], %s;\n", mod, dst_str, sampler_idx, tex_type);
01453         }
01454         else
01455             shader_addline(buffer, "TEX%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, sampler_idx, tex_type);
01456     }
01457 
01458     if (pshader)
01459     {
01460         gen_color_correction(buffer, dst_str, ins->dst[0].write_mask,
01461                 arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_ONE),
01462                 arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_TWO),
01463                 priv->cur_ps_args->super.color_fixup[sampler_idx]);
01464     }
01465 }
01466 
01467 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins,
01468         const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr)
01469 {
01470     /* Generate a line that does the input modifier computation and return the input register to use */
01471     BOOL is_color = FALSE;
01472     char regstr[256];
01473     char swzstr[20];
01474     int insert_line;
01475     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
01476     struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
01477     const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
01478     const char *two = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_TWO);
01479 
01480     /* Assume a new line will be added */
01481     insert_line = 1;
01482 
01483     /* Get register name */
01484     shader_arb_get_register_name(ins, &src->reg, regstr, &is_color);
01485     shader_arb_get_swizzle(src, is_color, swzstr);
01486 
01487     switch (src->modifiers)
01488     {
01489     case WINED3DSPSM_NONE:
01490         sprintf(outregstr, "%s%s", regstr, swzstr);
01491         insert_line = 0;
01492         break;
01493     case WINED3DSPSM_NEG:
01494         sprintf(outregstr, "-%s%s", regstr, swzstr);
01495         insert_line = 0;
01496         break;
01497     case WINED3DSPSM_BIAS:
01498         shader_addline(buffer, "ADD T%c, %s, -coefdiv.x;\n", 'A' + tmpreg, regstr);
01499         break;
01500     case WINED3DSPSM_BIASNEG:
01501         shader_addline(buffer, "ADD T%c, -%s, coefdiv.x;\n", 'A' + tmpreg, regstr);
01502         break;
01503     case WINED3DSPSM_SIGN:
01504         shader_addline(buffer, "MAD T%c, %s, %s, -%s;\n", 'A' + tmpreg, regstr, two, one);
01505         break;
01506     case WINED3DSPSM_SIGNNEG:
01507         shader_addline(buffer, "MAD T%c, %s, -%s, %s;\n", 'A' + tmpreg, regstr, two, one);
01508         break;
01509     case WINED3DSPSM_COMP:
01510         shader_addline(buffer, "SUB T%c, %s, %s;\n", 'A' + tmpreg, one, regstr);
01511         break;
01512     case WINED3DSPSM_X2:
01513         shader_addline(buffer, "ADD T%c, %s, %s;\n", 'A' + tmpreg, regstr, regstr);
01514         break;
01515     case WINED3DSPSM_X2NEG:
01516         shader_addline(buffer, "ADD T%c, -%s, -%s;\n", 'A' + tmpreg, regstr, regstr);
01517         break;
01518     case WINED3DSPSM_DZ:
01519         shader_addline(buffer, "RCP T%c, %s.z;\n", 'A' + tmpreg, regstr);
01520         shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg);
01521         break;
01522     case WINED3DSPSM_DW:
01523         shader_addline(buffer, "RCP T%c, %s.w;\n", 'A' + tmpreg, regstr);
01524         shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg);
01525         break;
01526     case WINED3DSPSM_ABS:
01527         if(ctx->target_version >= NV2) {
01528             sprintf(outregstr, "|%s%s|", regstr, swzstr);
01529             insert_line = 0;
01530         } else {
01531             shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr);
01532         }
01533         break;
01534     case WINED3DSPSM_ABSNEG:
01535         if(ctx->target_version >= NV2) {
01536             sprintf(outregstr, "-|%s%s|", regstr, swzstr);
01537         } else {
01538             shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr);
01539             sprintf(outregstr, "-T%c%s", 'A' + tmpreg, swzstr);
01540         }
01541         insert_line = 0;
01542         break;
01543     default:
01544         sprintf(outregstr, "%s%s", regstr, swzstr);
01545         insert_line = 0;
01546     }
01547 
01548     /* Return modified or original register, with swizzle */
01549     if (insert_line)
01550         sprintf(outregstr, "T%c%s", 'A' + tmpreg, swzstr);
01551 }
01552 
01553 static void pshader_hw_bem(const struct wined3d_shader_instruction *ins)
01554 {
01555     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
01556     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
01557     char dst_name[50];
01558     char src_name[2][50];
01559     DWORD sampler_code = dst->reg.idx;
01560 
01561     shader_arb_get_dst_param(ins, dst, dst_name);
01562 
01563     /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed
01564      *
01565      * Keep in mind that src_name[1] can be "TB" and src_name[0] can be "TA" because modifiers like _x2 are valid
01566      * with bem. So delay loading the first parameter until after the perturbation calculation which needs two
01567      * temps is done.
01568      */
01569     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
01570     shader_addline(buffer, "SWZ TA, bumpenvmat%d, x, z, 0, 0;\n", sampler_code);
01571     shader_addline(buffer, "DP3 TC.r, TA, %s;\n", src_name[1]);
01572     shader_addline(buffer, "SWZ TA, bumpenvmat%d, y, w, 0, 0;\n", sampler_code);
01573     shader_addline(buffer, "DP3 TC.g, TA, %s;\n", src_name[1]);
01574 
01575     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
01576     shader_addline(buffer, "ADD %s, %s, TC;\n", dst_name, src_name[0]);
01577 }
01578 
01579 static DWORD negate_modifiers(DWORD mod, char *extra_char)
01580 {
01581     *extra_char = ' ';
01582     switch(mod)
01583     {
01584         case WINED3DSPSM_NONE:      return WINED3DSPSM_NEG;
01585         case WINED3DSPSM_NEG:       return WINED3DSPSM_NONE;
01586         case WINED3DSPSM_BIAS:      return WINED3DSPSM_BIASNEG;
01587         case WINED3DSPSM_BIASNEG:   return WINED3DSPSM_BIAS;
01588         case WINED3DSPSM_SIGN:      return WINED3DSPSM_SIGNNEG;
01589         case WINED3DSPSM_SIGNNEG:   return WINED3DSPSM_SIGN;
01590         case WINED3DSPSM_COMP:      *extra_char = '-'; return WINED3DSPSM_COMP;
01591         case WINED3DSPSM_X2:        return WINED3DSPSM_X2NEG;
01592         case WINED3DSPSM_X2NEG:     return WINED3DSPSM_X2;
01593         case WINED3DSPSM_DZ:        *extra_char = '-'; return WINED3DSPSM_DZ;
01594         case WINED3DSPSM_DW:        *extra_char = '-'; return WINED3DSPSM_DW;
01595         case WINED3DSPSM_ABS:       return WINED3DSPSM_ABSNEG;
01596         case WINED3DSPSM_ABSNEG:    return WINED3DSPSM_ABS;
01597     }
01598     FIXME("Unknown modifier %u\n", mod);
01599     return mod;
01600 }
01601 
01602 static void pshader_hw_cnd(const struct wined3d_shader_instruction *ins)
01603 {
01604     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
01605     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
01606     char dst_name[50];
01607     char src_name[3][50];
01608     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
01609             ins->ctx->reg_maps->shader_version.minor);
01610 
01611     shader_arb_get_dst_param(ins, dst, dst_name);
01612     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
01613 
01614     /* The coissue flag changes the semantic of the cnd instruction in <= 1.3 shaders */
01615     if (shader_version <= WINED3D_SHADER_VERSION(1, 3) && ins->coissue)
01616     {
01617         shader_addline(buffer, "MOV%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[1]);
01618     }
01619     else
01620     {
01621         struct wined3d_shader_src_param src0_copy = ins->src[0];
01622         char extra_neg;
01623 
01624         /* src0 may have a negate srcmod set, so we can't blindly add "-" to the name */
01625         src0_copy.modifiers = negate_modifiers(src0_copy.modifiers, &extra_neg);
01626 
01627         shader_arb_get_src_param(ins, &src0_copy, 0, src_name[0]);
01628         shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
01629         shader_addline(buffer, "ADD TA, %c%s, coefdiv.x;\n", extra_neg, src_name[0]);
01630         shader_addline(buffer, "CMP%s %s, TA, %s, %s;\n", shader_arb_get_modifier(ins),
01631                 dst_name, src_name[1], src_name[2]);
01632     }
01633 }
01634 
01635 static void pshader_hw_cmp(const struct wined3d_shader_instruction *ins)
01636 {
01637     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
01638     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
01639     char dst_name[50];
01640     char src_name[3][50];
01641 
01642     shader_arb_get_dst_param(ins, dst, dst_name);
01643 
01644     /* Generate input register names (with modifiers) */
01645     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
01646     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
01647     shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
01648 
01649     shader_addline(buffer, "CMP%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins),
01650             dst_name, src_name[0], src_name[2], src_name[1]);
01651 }
01652 
01655 static void pshader_hw_dp2add(const struct wined3d_shader_instruction *ins)
01656 {
01657     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
01658     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
01659     char dst_name[50];
01660     char src_name[3][50];
01661     struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
01662 
01663     shader_arb_get_dst_param(ins, dst, dst_name);
01664     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
01665     shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
01666 
01667     if(ctx->target_version >= NV3)
01668     {
01669         /* GL_NV_fragment_program2 has a 1:1 matching instruction */
01670         shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
01671         shader_addline(buffer, "DP2A%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins),
01672                        dst_name, src_name[0], src_name[1], src_name[2]);
01673     }
01674     else if(ctx->target_version >= NV2)
01675     {
01676         /* dst.x = src2.?, src0.x, src1.x + src0.y * src1.y
01677          * dst.y = src2.?, src0.x, src1.z + src0.y * src1.w
01678          * dst.z = src2.?, src0.x, src1.x + src0.y * src1.y
01679          * dst.z = src2.?, src0.x, src1.z + src0.y * src1.w
01680          *
01681          * Make sure that src1.zw = src1.xy, then we get a classic dp2add
01682          *
01683          * .xyxy and other swizzles that we could get with this are not valid in
01684          * plain ARBfp, but luckily the NV extension grammar lifts this limitation.
01685          */
01686         struct wined3d_shader_src_param tmp_param = ins->src[1];
01687         DWORD swizzle = tmp_param.swizzle & 0xf; /* Selects .xy */
01688         tmp_param.swizzle = swizzle | (swizzle << 4); /* Creates .xyxy */
01689 
01690         shader_arb_get_src_param(ins, &tmp_param, 1, src_name[1]);
01691 
01692         shader_addline(buffer, "X2D%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins),
01693                        dst_name, src_name[2], src_name[0], src_name[1]);
01694     }
01695     else
01696     {
01697         shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
01698         /* Emulate a DP2 with a DP3 and 0.0. Don't use the dest as temp register, it could be src[1] or src[2]
01699         * src_name[0] can be TA, but TA is a private temp for modifiers, so it is save to overwrite
01700         */
01701         shader_addline(buffer, "MOV TA, %s;\n", src_name[0]);
01702         shader_addline(buffer, "MOV TA.z, 0.0;\n");
01703         shader_addline(buffer, "DP3 TA, TA, %s;\n", src_name[1]);
01704         shader_addline(buffer, "ADD%s %s, TA, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[2]);
01705     }
01706 }
01707 
01708 /* Map the opcode 1-to-1 to the GL code */
01709 static void shader_hw_map2gl(const struct wined3d_shader_instruction *ins)
01710 {
01711     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
01712     const char *instruction;
01713     char arguments[256], dst_str[50];
01714     unsigned int i;
01715     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
01716 
01717     switch (ins->handler_idx)
01718     {
01719         case WINED3DSIH_ABS: instruction = "ABS"; break;
01720         case WINED3DSIH_ADD: instruction = "ADD"; break;
01721         case WINED3DSIH_CRS: instruction = "XPD"; break;
01722         case WINED3DSIH_DP3: instruction = "DP3"; break;
01723         case WINED3DSIH_DP4: instruction = "DP4"; break;
01724         case WINED3DSIH_DST: instruction = "DST"; break;
01725         case WINED3DSIH_FRC: instruction = "FRC"; break;
01726         case WINED3DSIH_LIT: instruction = "LIT"; break;
01727         case WINED3DSIH_LRP: instruction = "LRP"; break;
01728         case WINED3DSIH_MAD: instruction = "MAD"; break;
01729         case WINED3DSIH_MAX: instruction = "MAX"; break;
01730         case WINED3DSIH_MIN: instruction = "MIN"; break;
01731         case WINED3DSIH_MOV: instruction = "MOV"; break;
01732         case WINED3DSIH_MUL: instruction = "MUL"; break;
01733         case WINED3DSIH_SGE: instruction = "SGE"; break;
01734         case WINED3DSIH_SLT: instruction = "SLT"; break;
01735         case WINED3DSIH_SUB: instruction = "SUB"; break;
01736         case WINED3DSIH_MOVA:instruction = "ARR"; break;
01737         case WINED3DSIH_DSX: instruction = "DDX"; break;
01738         default: instruction = "";
01739             FIXME("Unhandled opcode %#x\n", ins->handler_idx);
01740             break;
01741     }
01742 
01743     /* Note that shader_arb_add_dst_param() adds spaces. */
01744     arguments[0] = '\0';
01745     shader_arb_get_dst_param(ins, dst, dst_str);
01746     for (i = 0; i < ins->src_count; ++i)
01747     {
01748         char operand[100];
01749         strcat(arguments, ", ");
01750         shader_arb_get_src_param(ins, &ins->src[i], i, operand);
01751         strcat(arguments, operand);
01752     }
01753     shader_addline(buffer, "%s%s %s%s;\n", instruction, shader_arb_get_modifier(ins), dst_str, arguments);
01754 }
01755 
01756 static void shader_hw_nop(const struct wined3d_shader_instruction *ins)
01757 {
01758     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
01759     shader_addline(buffer, "NOP;\n");
01760 }
01761 
01762 static void shader_hw_mov(const struct wined3d_shader_instruction *ins)
01763 {
01764     const struct wined3d_shader *shader = ins->ctx->shader;
01765     const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
01766     BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type);
01767     struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
01768     const char *zero = arb_get_helper_value(reg_maps->shader_version.type, ARB_ZERO);
01769     const char *one = arb_get_helper_value(reg_maps->shader_version.type, ARB_ONE);
01770     const char *two = arb_get_helper_value(reg_maps->shader_version.type, ARB_TWO);
01771 
01772     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
01773     char src0_param[256];
01774 
01775     if (ins->handler_idx == WINED3DSIH_MOVA)
01776     {
01777         const struct arb_vshader_private *shader_data = shader->backend_data;
01778         char write_mask[6];
01779         const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET);
01780 
01781         if(ctx->target_version >= NV2) {
01782             shader_hw_map2gl(ins);
01783             return;
01784         }
01785         shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param);
01786         shader_arb_get_write_mask(ins, &ins->dst[0], write_mask);
01787 
01788         /* This implements the mova formula used in GLSL. The first two instructions
01789          * prepare the sign() part. Note that it is fine to have my_sign(0.0) = 1.0
01790          * in this case:
01791          * mova A0.x, 0.0
01792          *
01793          * A0.x = arl(floor(abs(0.0) + 0.5) * 1.0) = floor(0.5) = 0.0 since arl does a floor
01794          *
01795          * The ARL is performed when A0 is used - the requested component is read from A0_SHADOW into
01796          * A0.x. We can use the overwritten component of A0_shadow as temporary storage for the sign.
01797          */
01798         shader_addline(buffer, "SGE A0_SHADOW%s, %s, %s;\n", write_mask, src0_param, zero);
01799         shader_addline(buffer, "MAD A0_SHADOW%s, A0_SHADOW, %s, -%s;\n", write_mask, two, one);
01800 
01801         shader_addline(buffer, "ABS TA%s, %s;\n", write_mask, src0_param);
01802         shader_addline(buffer, "ADD TA%s, TA, rel_addr_const.x;\n", write_mask);
01803         shader_addline(buffer, "FLR TA%s, TA;\n", write_mask);
01804         if (shader_data->rel_offset)
01805         {
01806             shader_addline(buffer, "ADD TA%s, TA, %s;\n", write_mask, offset);
01807         }
01808         shader_addline(buffer, "MUL A0_SHADOW%s, TA, A0_SHADOW;\n", write_mask);
01809 
01810         ((struct shader_arb_ctx_priv *)ins->ctx->backend_data)->addr_reg[0] = '\0';
01811     }
01812     else if (reg_maps->shader_version.major == 1
01813           && !shader_is_pshader_version(reg_maps->shader_version.type)
01814           && ins->dst[0].reg.type == WINED3DSPR_ADDR)
01815     {
01816         const struct arb_vshader_private *shader_data = shader->backend_data;
01817         src0_param[0] = '\0';
01818 
01819         if (shader_data->rel_offset)
01820         {
01821             const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET);
01822             shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param);
01823             shader_addline(buffer, "ADD TA.x, %s, %s;\n", src0_param, offset);
01824             shader_addline(buffer, "ARL A0.x, TA.x;\n");
01825         }
01826         else
01827         {
01828             /* Apple's ARB_vertex_program implementation does not accept an ARL source argument
01829              * with more than one component. Thus replicate the first source argument over all
01830              * 4 components. For example, .xyzw -> .x (or better: .xxxx), .zwxy -> .z, etc) */
01831             struct wined3d_shader_src_param tmp_src = ins->src[0];
01832             tmp_src.swizzle = (tmp_src.swizzle & 0x3) * 0x55;
01833             shader_arb_get_src_param(ins, &tmp_src, 0, src0_param);
01834             shader_addline(buffer, "ARL A0.x, %s;\n", src0_param);
01835         }
01836     }
01837     else if (ins->dst[0].reg.type == WINED3DSPR_COLOROUT && !ins->dst[0].reg.idx && pshader)
01838     {
01839         if (ctx->cur_ps_args->super.srgb_correction && shader->u.ps.color0_mov)
01840         {
01841             shader_addline(buffer, "#mov handled in srgb write code\n");
01842             return;
01843         }
01844         shader_hw_map2gl(ins);
01845     }
01846     else
01847     {
01848         shader_hw_map2gl(ins);
01849     }
01850 }
01851 
01852 static void pshader_hw_texkill(const struct wined3d_shader_instruction *ins)
01853 {
01854     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
01855     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
01856     char reg_dest[40];
01857 
01858     /* No swizzles are allowed in d3d's texkill. PS 1.x ignores the 4th component as documented,
01859      * but >= 2.0 honors it (undocumented, but tested by the d3d9 testsuite)
01860      */
01861     shader_arb_get_dst_param(ins, dst, reg_dest);
01862 
01863     if (ins->ctx->reg_maps->shader_version.major >= 2)
01864     {
01865         const char *kilsrc = "TA";
01866         BOOL is_color;
01867 
01868         shader_arb_get_register_name(ins, &dst->reg, reg_dest, &is_color);
01869         if(dst->write_mask == WINED3DSP_WRITEMASK_ALL)
01870         {
01871             kilsrc = reg_dest;
01872         }
01873         else
01874         {
01875             /* Sigh. KIL doesn't support swizzles/writemasks. KIL passes a writemask, but ".xy" for example
01876              * is not valid as a swizzle in ARB (needs ".xyyy"). Use SWZ to load the register properly, and set
01877              * masked out components to 0(won't kill)
01878              */
01879             char x = '0', y = '0', z = '0', w = '0';
01880             if(dst->write_mask & WINED3DSP_WRITEMASK_0) x = 'x';
01881             if(dst->write_mask & WINED3DSP_WRITEMASK_1) y = 'y';
01882             if(dst->write_mask & WINED3DSP_WRITEMASK_2) z = 'z';
01883             if(dst->write_mask & WINED3DSP_WRITEMASK_3) w = 'w';
01884             shader_addline(buffer, "SWZ TA, %s, %c, %c, %c, %c;\n", reg_dest, x, y, z, w);
01885         }
01886         shader_addline(buffer, "KIL %s;\n", kilsrc);
01887     } else {
01888         /* ARB fp doesn't like swizzles on the parameter of the KIL instruction. To mask the 4th component,
01889          * copy the register into our general purpose TMP variable, overwrite .w and pass TMP to KIL
01890          *
01891          * ps_1_3 shaders use the texcoord incarnation of the Tx register. ps_1_4 shaders can use the same,
01892          * or pass in any temporary register(in shader phase 2)
01893          */
01894         if(ins->ctx->reg_maps->shader_version.minor <= 3) {
01895             sprintf(reg_dest, "fragment.texcoord[%u]", dst->reg.idx);
01896         } else {
01897             shader_arb_get_dst_param(ins, dst, reg_dest);
01898         }
01899         shader_addline(buffer, "SWZ TA, %s, x, y, z, 1;\n", reg_dest);
01900         shader_addline(buffer, "KIL TA;\n");
01901     }
01902 }
01903 
01904 static void pshader_hw_tex(const struct wined3d_shader_instruction *ins)
01905 {
01906     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
01907     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
01908     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
01909             ins->ctx->reg_maps->shader_version.minor);
01910     struct wined3d_shader_src_param src;
01911 
01912     char reg_dest[40];
01913     char reg_coord[40];
01914     DWORD reg_sampler_code;
01915     WORD myflags = 0;
01916 
01917     /* All versions have a destination register */
01918     shader_arb_get_dst_param(ins, dst, reg_dest);
01919 
01920     /* 1.0-1.4: Use destination register number as texture code.
01921        2.0+: Use provided sampler number as texure code. */
01922     if (shader_version < WINED3D_SHADER_VERSION(2,0))
01923         reg_sampler_code = dst->reg.idx;
01924     else
01925         reg_sampler_code = ins->src[1].reg.idx;
01926 
01927     /* 1.0-1.3: Use the texcoord varying.
01928        1.4+: Use provided coordinate source register. */
01929     if (shader_version < WINED3D_SHADER_VERSION(1,4))
01930         sprintf(reg_coord, "fragment.texcoord[%u]", reg_sampler_code);
01931     else {
01932         /* TEX is the only instruction that can handle DW and DZ natively */
01933         src = ins->src[0];
01934         if(src.modifiers == WINED3DSPSM_DW) src.modifiers = WINED3DSPSM_NONE;
01935         if(src.modifiers == WINED3DSPSM_DZ) src.modifiers = WINED3DSPSM_NONE;
01936         shader_arb_get_src_param(ins, &src, 0, reg_coord);
01937     }
01938 
01939     /* projection flag:
01940      * 1.1, 1.2, 1.3: Use WINED3D_TSS_TEXTURETRANSFORMFLAGS
01941      * 1.4: Use WINED3DSPSM_DZ or WINED3DSPSM_DW on src[0]
01942      * 2.0+: Use WINED3DSI_TEXLD_PROJECT on the opcode
01943      */
01944     if (shader_version < WINED3D_SHADER_VERSION(1,4))
01945     {
01946         DWORD flags = 0;
01947         if (reg_sampler_code < MAX_TEXTURES)
01948             flags = priv->cur_ps_args->super.tex_transform >> reg_sampler_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT;
01949         if (flags & WINED3D_PSARGS_PROJECTED)
01950             myflags |= TEX_PROJ;
01951     }
01952     else if (shader_version < WINED3D_SHADER_VERSION(2,0))
01953     {
01954         enum wined3d_shader_src_modifier src_mod = ins->src[0].modifiers;
01955         if (src_mod == WINED3DSPSM_DZ)
01956         {
01957             /* TXP cannot handle DZ natively, so move the z coordinate to .w. reg_coord is a read-only
01958              * varying register, so we need a temp reg
01959              */
01960             shader_addline(ins->ctx->buffer, "SWZ TA, %s, x, y, z, z;\n", reg_coord);
01961             strcpy(reg_coord, "TA");
01962             myflags |= TEX_PROJ;
01963         } else if(src_mod == WINED3DSPSM_DW) {
01964             myflags |= TEX_PROJ;
01965         }
01966     } else {
01967         if (ins->flags & WINED3DSI_TEXLD_PROJECT) myflags |= TEX_PROJ;
01968         if (ins->flags & WINED3DSI_TEXLD_BIAS) myflags |= TEX_BIAS;
01969     }
01970     shader_hw_sample(ins, reg_sampler_code, reg_dest, reg_coord, myflags, NULL, NULL);
01971 }
01972 
01973 static void pshader_hw_texcoord(const struct wined3d_shader_instruction *ins)
01974 {
01975     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
01976     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
01977     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
01978             ins->ctx->reg_maps->shader_version.minor);
01979     char dst_str[50];
01980 
01981     if (shader_version < WINED3D_SHADER_VERSION(1,4))
01982     {
01983         DWORD reg = dst->reg.idx;
01984 
01985         shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
01986         shader_addline(buffer, "MOV_SAT %s, fragment.texcoord[%u];\n", dst_str, reg);
01987     } else {
01988         char reg_src[40];
01989 
01990         shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src);
01991         shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
01992         shader_addline(buffer, "MOV %s, %s;\n", dst_str, reg_src);
01993    }
01994 }
01995 
01996 static void pshader_hw_texreg2ar(const struct wined3d_shader_instruction *ins)
01997 {
01998      struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
01999      DWORD flags = 0;
02000 
02001      DWORD reg1 = ins->dst[0].reg.idx;
02002      char dst_str[50];
02003      char src_str[50];
02004 
02005      /* Note that texreg2ar treats Tx as a temporary register, not as a varying */
02006      shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
02007      shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
02008      /* Move .x first in case src_str is "TA" */
02009      shader_addline(buffer, "MOV TA.y, %s.x;\n", src_str);
02010      shader_addline(buffer, "MOV TA.x, %s.w;\n", src_str);
02011      if (reg1 < MAX_TEXTURES)
02012      {
02013          struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
02014          flags = priv->cur_ps_args->super.tex_transform >> reg1 * WINED3D_PSARGS_TEXTRANSFORM_SHIFT;
02015      }
02016      shader_hw_sample(ins, reg1, dst_str, "TA", flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
02017 }
02018 
02019 static void pshader_hw_texreg2gb(const struct wined3d_shader_instruction *ins)
02020 {
02021      struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02022 
02023      DWORD reg1 = ins->dst[0].reg.idx;
02024      char dst_str[50];
02025      char src_str[50];
02026 
02027      /* Note that texreg2gb treats Tx as a temporary register, not as a varying */
02028      shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
02029      shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
02030      shader_addline(buffer, "MOV TA.x, %s.y;\n", src_str);
02031      shader_addline(buffer, "MOV TA.y, %s.z;\n", src_str);
02032      shader_hw_sample(ins, reg1, dst_str, "TA", 0, NULL, NULL);
02033 }
02034 
02035 static void pshader_hw_texreg2rgb(const struct wined3d_shader_instruction *ins)
02036 {
02037     DWORD reg1 = ins->dst[0].reg.idx;
02038     char dst_str[50];
02039     char src_str[50];
02040 
02041     /* Note that texreg2rg treats Tx as a temporary register, not as a varying */
02042     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
02043     shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
02044     shader_hw_sample(ins, reg1, dst_str, src_str, 0, NULL, NULL);
02045 }
02046 
02047 static void pshader_hw_texbem(const struct wined3d_shader_instruction *ins)
02048 {
02049     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
02050     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
02051     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02052     char reg_coord[40], dst_reg[50], src_reg[50];
02053     DWORD reg_dest_code;
02054 
02055     /* All versions have a destination register. The Tx where the texture coordinates come
02056      * from is the varying incarnation of the texture register
02057      */
02058     reg_dest_code = dst->reg.idx;
02059     shader_arb_get_dst_param(ins, &ins->dst[0], dst_reg);
02060     shader_arb_get_src_param(ins, &ins->src[0], 0, src_reg);
02061     sprintf(reg_coord, "fragment.texcoord[%u]", reg_dest_code);
02062 
02063     /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed
02064      * The Tx in which the perturbation map is stored is the tempreg incarnation of the texture register
02065      *
02066      * GL_NV_fragment_program_option could handle this in one instruction via X2D:
02067      * X2D TA.xy, fragment.texcoord, T%u, bumpenvmat%u.xzyw
02068      *
02069      * However, the NV extensions are never enabled for <= 2.0 shaders because of the performance penalty that
02070      * comes with it, and texbem is an 1.x only instruction. No 1.x instruction forces us to enable the NV
02071      * extension.
02072      */
02073     shader_addline(buffer, "SWZ TB, bumpenvmat%d, x, z, 0, 0;\n", reg_dest_code);
02074     shader_addline(buffer, "DP3 TA.x, TB, %s;\n", src_reg);
02075     shader_addline(buffer, "SWZ TB, bumpenvmat%d, y, w, 0, 0;\n", reg_dest_code);
02076     shader_addline(buffer, "DP3 TA.y, TB, %s;\n", src_reg);
02077 
02078     /* with projective textures, texbem only divides the static texture coord, not the displacement,
02079      * so we can't let the GL handle this.
02080      */
02081     if ((priv->cur_ps_args->super.tex_transform >> reg_dest_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT)
02082             & WINED3D_PSARGS_PROJECTED)
02083     {
02084         shader_addline(buffer, "RCP TB.w, %s.w;\n", reg_coord);
02085         shader_addline(buffer, "MUL TB.xy, %s, TB.w;\n", reg_coord);
02086         shader_addline(buffer, "ADD TA.xy, TA, TB;\n");
02087     } else {
02088         shader_addline(buffer, "ADD TA.xy, TA, %s;\n", reg_coord);
02089     }
02090 
02091     shader_hw_sample(ins, reg_dest_code, dst_reg, "TA", 0, NULL, NULL);
02092 
02093     if (ins->handler_idx == WINED3DSIH_TEXBEML)
02094     {
02095         /* No src swizzles are allowed, so this is ok */
02096         shader_addline(buffer, "MAD TA, %s.z, luminance%d.x, luminance%d.y;\n",
02097                        src_reg, reg_dest_code, reg_dest_code);
02098         shader_addline(buffer, "MUL %s, %s, TA;\n", dst_reg, dst_reg);
02099     }
02100 }
02101 
02102 static void pshader_hw_texm3x2pad(const struct wined3d_shader_instruction *ins)
02103 {
02104     DWORD reg = ins->dst[0].reg.idx;
02105     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02106     char src0_name[50], dst_name[50];
02107     BOOL is_color;
02108     struct wined3d_shader_register tmp_reg = ins->dst[0].reg;
02109 
02110     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
02111     /* The next instruction will be a texm3x2tex or texm3x2depth that writes to the uninitialized
02112      * T<reg+1> register. Use this register to store the calculated vector
02113      */
02114     tmp_reg.idx = reg + 1;
02115     shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color);
02116     shader_addline(buffer, "DP3 %s.x, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name);
02117 }
02118 
02119 static void pshader_hw_texm3x2tex(const struct wined3d_shader_instruction *ins)
02120 {
02121     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
02122     DWORD flags;
02123     DWORD reg = ins->dst[0].reg.idx;
02124     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02125     char dst_str[50];
02126     char src0_name[50];
02127     char dst_reg[50];
02128     BOOL is_color;
02129 
02130     /* We know that we're writing to the uninitialized T<reg> register, so use it for temporary storage */
02131     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
02132 
02133     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
02134     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
02135     shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
02136     flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
02137     shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
02138 }
02139 
02140 static void pshader_hw_texm3x3pad(const struct wined3d_shader_instruction *ins)
02141 {
02142     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
02143     DWORD reg = ins->dst[0].reg.idx;
02144     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02145     char src0_name[50], dst_name[50];
02146     struct wined3d_shader_register tmp_reg = ins->dst[0].reg;
02147     BOOL is_color;
02148 
02149     /* There are always 2 texm3x3pad instructions followed by one texm3x3[tex,vspec, ...] instruction, with
02150      * incrementing ins->dst[0].register_idx numbers. So the pad instruction already knows the final destination
02151      * register, and this register is uninitialized(otherwise the assembler complains that it is 'redeclared')
02152      */
02153     tmp_reg.idx = reg + 2 - tex_mx->current_row;
02154     shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color);
02155 
02156     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
02157     shader_addline(buffer, "DP3 %s.%c, fragment.texcoord[%u], %s;\n",
02158                    dst_name, 'x' + tex_mx->current_row, reg, src0_name);
02159     tex_mx->texcoord_w[tex_mx->current_row++] = reg;
02160 }
02161 
02162 static void pshader_hw_texm3x3tex(const struct wined3d_shader_instruction *ins)
02163 {
02164     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
02165     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
02166     DWORD flags;
02167     DWORD reg = ins->dst[0].reg.idx;
02168     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02169     char dst_str[50];
02170     char src0_name[50], dst_name[50];
02171     BOOL is_color;
02172 
02173     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
02174     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
02175     shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name);
02176 
02177     /* Sample the texture using the calculated coordinates */
02178     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
02179     flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
02180     shader_hw_sample(ins, reg, dst_str, dst_name, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
02181     tex_mx->current_row = 0;
02182 }
02183 
02184 static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins)
02185 {
02186     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
02187     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
02188     DWORD flags;
02189     DWORD reg = ins->dst[0].reg.idx;
02190     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02191     char dst_str[50];
02192     char src0_name[50];
02193     char dst_reg[50];
02194     BOOL is_color;
02195 
02196     /* Get the dst reg without writemask strings. We know this register is uninitialized, so we can use all
02197      * components for temporary data storage
02198      */
02199     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
02200     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
02201     shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
02202 
02203     /* Construct the eye-ray vector from w coordinates */
02204     shader_addline(buffer, "MOV TB.x, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[0]);
02205     shader_addline(buffer, "MOV TB.y, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[1]);
02206     shader_addline(buffer, "MOV TB.z, fragment.texcoord[%u].w;\n", reg);
02207 
02208     /* Calculate reflection vector
02209      */
02210     shader_addline(buffer, "DP3 %s.w, %s, TB;\n", dst_reg, dst_reg);
02211     /* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */
02212     shader_addline(buffer, "DP3 TB.w, %s, %s;\n", dst_reg, dst_reg);
02213     shader_addline(buffer, "RCP TB.w, TB.w;\n");
02214     shader_addline(buffer, "MUL %s.w, %s.w, TB.w;\n", dst_reg, dst_reg);
02215     shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
02216     shader_addline(buffer, "MAD %s, coefmul.x, %s, -TB;\n", dst_reg, dst_reg);
02217 
02218     /* Sample the texture using the calculated coordinates */
02219     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
02220     flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
02221     shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
02222     tex_mx->current_row = 0;
02223 }
02224 
02225 static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins)
02226 {
02227     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
02228     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
02229     DWORD flags;
02230     DWORD reg = ins->dst[0].reg.idx;
02231     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02232     char dst_str[50];
02233     char src0_name[50];
02234     char src1_name[50];
02235     char dst_reg[50];
02236     BOOL is_color;
02237 
02238     shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
02239     shader_arb_get_src_param(ins, &ins->src[0], 1, src1_name);
02240     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
02241     /* Note: dst_reg.xy is input here, generated by two texm3x3pad instructions */
02242     shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
02243 
02244     /* Calculate reflection vector.
02245      *
02246      *                   dot(N, E)
02247      * dst_reg.xyz = 2 * --------- * N - E
02248      *                   dot(N, N)
02249      *
02250      * Which normalizes the normal vector
02251      */
02252     shader_addline(buffer, "DP3 %s.w, %s, %s;\n", dst_reg, dst_reg, src1_name);
02253     shader_addline(buffer, "DP3 TC.w, %s, %s;\n", dst_reg, dst_reg);
02254     shader_addline(buffer, "RCP TC.w, TC.w;\n");
02255     shader_addline(buffer, "MUL %s.w, %s.w, TC.w;\n", dst_reg, dst_reg);
02256     shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
02257     shader_addline(buffer, "MAD %s, coefmul.x, %s, -%s;\n", dst_reg, dst_reg, src1_name);
02258 
02259     /* Sample the texture using the calculated coordinates */
02260     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
02261     flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
02262     shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
02263     tex_mx->current_row = 0;
02264 }
02265 
02266 static void pshader_hw_texdepth(const struct wined3d_shader_instruction *ins)
02267 {
02268     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
02269     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02270     char dst_name[50];
02271     const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
02272     const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
02273 
02274     /* texdepth has an implicit destination, the fragment depth value. It's only parameter,
02275      * which is essentially an input, is the destination register because it is the first
02276      * parameter. According to the msdn, this must be register r5, but let's keep it more flexible
02277      * here(writemasks/swizzles are not valid on texdepth)
02278      */
02279     shader_arb_get_dst_param(ins, dst, dst_name);
02280 
02281     /* According to the msdn, the source register(must be r5) is unusable after
02282      * the texdepth instruction, so we're free to modify it
02283      */
02284     shader_addline(buffer, "MIN %s.y, %s.y, %s;\n", dst_name, dst_name, one);
02285 
02286     /* How to deal with the special case dst_name.g == 0? if r != 0, then
02287      * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct
02288      * result. But if r = 0.0, then 0 * inf = 0, which is incorrect.
02289      */
02290     shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name);
02291     shader_addline(buffer, "MUL TA.x, %s.x, %s.y;\n", dst_name, dst_name);
02292     shader_addline(buffer, "MIN TA.x, TA.x, %s;\n", one);
02293     shader_addline(buffer, "MAX result.depth, TA.x, %s;\n", zero);
02294 }
02295 
02299 static void pshader_hw_texdp3tex(const struct wined3d_shader_instruction *ins)
02300 {
02301     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02302     DWORD sampler_idx = ins->dst[0].reg.idx;
02303     char src0[50];
02304     char dst_str[50];
02305 
02306     shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
02307     shader_addline(buffer, "MOV TB, 0.0;\n");
02308     shader_addline(buffer, "DP3 TB.x, fragment.texcoord[%u], %s;\n", sampler_idx, src0);
02309 
02310     shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
02311     shader_hw_sample(ins, sampler_idx, dst_str, "TB", 0 /* Only one coord, can't be projected */, NULL, NULL);
02312 }
02313 
02316 static void pshader_hw_texdp3(const struct wined3d_shader_instruction *ins)
02317 {
02318     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
02319     char src0[50];
02320     char dst_str[50];
02321     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02322 
02323     /* Handle output register */
02324     shader_arb_get_dst_param(ins, dst, dst_str);
02325     shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
02326     shader_addline(buffer, "DP3 %s, fragment.texcoord[%u], %s;\n", dst_str, dst->reg.idx, src0);
02327 }
02328 
02331 static void pshader_hw_texm3x3(const struct wined3d_shader_instruction *ins)
02332 {
02333     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
02334     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02335     char dst_str[50], dst_name[50];
02336     char src0[50];
02337     BOOL is_color;
02338 
02339     shader_arb_get_dst_param(ins, dst, dst_str);
02340     shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
02341     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
02342     shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx, src0);
02343     shader_addline(buffer, "MOV %s, %s;\n", dst_str, dst_name);
02344 }
02345 
02351 static void pshader_hw_texm3x2depth(const struct wined3d_shader_instruction *ins)
02352 {
02353     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02354     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
02355     char src0[50], dst_name[50];
02356     BOOL is_color;
02357     const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
02358     const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
02359 
02360     shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
02361     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
02362     shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx, src0);
02363 
02364     /* How to deal with the special case dst_name.g == 0? if r != 0, then
02365      * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct
02366      * result. But if r = 0.0, then 0 * inf = 0, which is incorrect.
02367      */
02368     shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name);
02369     shader_addline(buffer, "MUL %s.x, %s.x, %s.y;\n", dst_name, dst_name, dst_name);
02370     shader_addline(buffer, "MIN %s.x, %s.x, %s;\n", dst_name, dst_name, one);
02371     shader_addline(buffer, "MAX result.depth, %s.x, %s;\n", dst_name, zero);
02372 }
02373 
02376 static void shader_hw_mnxn(const struct wined3d_shader_instruction *ins)
02377 {
02378     int i;
02379     int nComponents = 0;
02380     struct wined3d_shader_dst_param tmp_dst = {{0}};
02381     struct wined3d_shader_src_param tmp_src[2] = {{{0}}};
02382     struct wined3d_shader_instruction tmp_ins;
02383 
02384     memset(&tmp_ins, 0, sizeof(tmp_ins));
02385 
02386     /* Set constants for the temporary argument */
02387     tmp_ins.ctx = ins->ctx;
02388     tmp_ins.dst_count = 1;
02389     tmp_ins.dst = &tmp_dst;
02390     tmp_ins.src_count = 2;
02391     tmp_ins.src = tmp_src;
02392 
02393     switch(ins->handler_idx)
02394     {
02395         case WINED3DSIH_M4x4:
02396             nComponents = 4;
02397             tmp_ins.handler_idx = WINED3DSIH_DP4;
02398             break;
02399         case WINED3DSIH_M4x3:
02400             nComponents = 3;
02401             tmp_ins.handler_idx = WINED3DSIH_DP4;
02402             break;
02403         case WINED3DSIH_M3x4:
02404             nComponents = 4;
02405             tmp_ins.handler_idx = WINED3DSIH_DP3;
02406             break;
02407         case WINED3DSIH_M3x3:
02408             nComponents = 3;
02409             tmp_ins.handler_idx = WINED3DSIH_DP3;
02410             break;
02411         case WINED3DSIH_M3x2:
02412             nComponents = 2;
02413             tmp_ins.handler_idx = WINED3DSIH_DP3;
02414             break;
02415         default:
02416             FIXME("Unhandled opcode %#x\n", ins->handler_idx);
02417             break;
02418     }
02419 
02420     tmp_dst = ins->dst[0];
02421     tmp_src[0] = ins->src[0];
02422     tmp_src[1] = ins->src[1];
02423     for (i = 0; i < nComponents; i++) {
02424         tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i;
02425         shader_hw_map2gl(&tmp_ins);
02426         ++tmp_src[1].reg.idx;
02427     }
02428 }
02429 
02430 static void shader_hw_rcp(const struct wined3d_shader_instruction *ins)
02431 {
02432     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02433 
02434     char dst[50];
02435     char src[50];
02436 
02437     shader_arb_get_dst_param(ins, &ins->dst[0], dst); /* Destination */
02438     shader_arb_get_src_param(ins, &ins->src[0], 0, src);
02439     if (ins->src[0].swizzle == WINED3DSP_NOSWIZZLE)
02440     {
02441         /* Dx sdk says .x is used if no swizzle is given, but our test shows that
02442          * .w is used
02443          */
02444         strcat(src, ".w");
02445     }
02446 
02447     shader_addline(buffer, "RCP%s %s, %s;\n", shader_arb_get_modifier(ins), dst, src);
02448 }
02449 
02450 static void shader_hw_scalar_op(const struct wined3d_shader_instruction *ins)
02451 {
02452     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02453     const char *instruction;
02454 
02455     char dst[50];
02456     char src[50];
02457 
02458     switch(ins->handler_idx)
02459     {
02460         case WINED3DSIH_RSQ:  instruction = "RSQ"; break;
02461         case WINED3DSIH_RCP:  instruction = "RCP"; break;
02462         case WINED3DSIH_EXP:  instruction = "EX2"; break;
02463         case WINED3DSIH_EXPP: instruction = "EXP"; break;
02464         default: instruction = "";
02465             FIXME("Unhandled opcode %#x\n", ins->handler_idx);
02466             break;
02467     }
02468 
02469     shader_arb_get_dst_param(ins, &ins->dst[0], dst); /* Destination */
02470     shader_arb_get_src_param(ins, &ins->src[0], 0, src);
02471     if (ins->src[0].swizzle == WINED3DSP_NOSWIZZLE)
02472     {
02473         /* Dx sdk says .x is used if no swizzle is given, but our test shows that
02474          * .w is used
02475          */
02476         strcat(src, ".w");
02477     }
02478 
02479     shader_addline(buffer, "%s%s %s, %s;\n", instruction, shader_arb_get_modifier(ins), dst, src);
02480 }
02481 
02482 static void shader_hw_nrm(const struct wined3d_shader_instruction *ins)
02483 {
02484     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02485     char dst_name[50];
02486     char src_name[50];
02487     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
02488     BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type);
02489     const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
02490 
02491     shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
02492     shader_arb_get_src_param(ins, &ins->src[0], 1 /* Use TB */, src_name);
02493 
02494     /* In D3D, NRM of a vector with length zero returns zero. Catch this situation, as
02495      * otherwise NRM or RSQ would return NaN */
02496     if(pshader && priv->target_version >= NV3)
02497     {
02498         /* GL_NV_fragment_program2's NRM needs protection against length zero vectors too
02499          *
02500          * TODO: Find out if DP3+NRM+MOV is really faster than DP3+RSQ+MUL
02501          */
02502         shader_addline(buffer, "DP3C TA, %s, %s;\n", src_name, src_name);
02503         shader_addline(buffer, "NRM%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name);
02504         shader_addline(buffer, "MOV %s (EQ), %s;\n", dst_name, zero);
02505     }
02506     else if(priv->target_version >= NV2)
02507     {
02508         shader_addline(buffer, "DP3C TA.x, %s, %s;\n", src_name, src_name);
02509         shader_addline(buffer, "RSQ TA.x (NE), TA.x;\n");
02510         shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name,
02511                        src_name);
02512     }
02513     else
02514     {
02515         const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
02516 
02517         shader_addline(buffer, "DP3 TA.x, %s, %s;\n", src_name, src_name);
02518         /* Pass any non-zero value to RSQ if the input vector has a length of zero. The
02519          * RSQ result doesn't matter, as long as multiplying it by 0 returns 0.
02520          */
02521         shader_addline(buffer, "SGE TA.y, -TA.x, %s;\n", zero);
02522         shader_addline(buffer, "MAD TA.x, %s, TA.y, TA.x;\n", one);
02523 
02524         shader_addline(buffer, "RSQ TA.x, TA.x;\n");
02525         /* dst.w = src[0].w * 1 / (src.x^2 + src.y^2 + src.z^2)^(1/2) according to msdn*/
02526         shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name,
02527                     src_name);
02528     }
02529 }
02530 
02531 static void shader_hw_lrp(const struct wined3d_shader_instruction *ins)
02532 {
02533     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02534     char dst_name[50];
02535     char src_name[3][50];
02536 
02537     /* ARB_fragment_program has a convenient LRP instruction */
02538     if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) {
02539         shader_hw_map2gl(ins);
02540         return;
02541     }
02542 
02543     shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
02544     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
02545     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
02546     shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
02547 
02548     shader_addline(buffer, "SUB TA, %s, %s;\n", src_name[1], src_name[2]);
02549     shader_addline(buffer, "MAD%s %s, %s, TA, %s;\n", shader_arb_get_modifier(ins),
02550                    dst_name, src_name[0], src_name[2]);
02551 }
02552 
02553 static void shader_hw_sincos(const struct wined3d_shader_instruction *ins)
02554 {
02555     /* This instruction exists in ARB, but the d3d instruction takes two extra parameters which
02556      * must contain fixed constants. So we need a separate function to filter those constants and
02557      * can't use map2gl
02558      */
02559     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02560     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
02561     const struct wined3d_shader_dst_param *dst = &ins->dst[0];
02562     char dst_name[50];
02563     char src_name0[50], src_name1[50], src_name2[50];
02564     BOOL is_color;
02565 
02566     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
02567     if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) {
02568         shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
02569         /* No modifiers are supported on SCS */
02570         shader_addline(buffer, "SCS %s, %s;\n", dst_name, src_name0);
02571 
02572         if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE)
02573         {
02574             shader_arb_get_register_name(ins, &dst->reg, src_name0, &is_color);
02575             shader_addline(buffer, "MOV_SAT %s, %s;\n", dst_name, src_name0);
02576         }
02577     } else if(priv->target_version >= NV2) {
02578         shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color);
02579 
02580         /* Sincos writemask must be .x, .y or .xy */
02581         if(dst->write_mask & WINED3DSP_WRITEMASK_0)
02582             shader_addline(buffer, "COS%s %s.x, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0);
02583         if(dst->write_mask & WINED3DSP_WRITEMASK_1)
02584             shader_addline(buffer, "SIN%s %s.y, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0);
02585     } else {
02586         /* Approximate sine and cosine with a taylor series, as per math textbook. The application passes 8
02587          * helper constants(D3DSINCOSCONST1 and D3DSINCOSCONST2) in src1 and src2.
02588          *
02589          * sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ...
02590          * cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + ...
02591          *
02592          * The constants we get are:
02593          *
02594          *  +1   +1,     -1     -1     +1      +1      -1       -1
02595          *      ---- ,  ---- , ---- , ----- , ----- , ----- , ------
02596          *      1!*2    2!*4   3!*8   4!*16   5!*32   6!*64   7!*128
02597          *
02598          * If used with x^2, x^3, x^4 etc they calculate sin(x/2) and cos(x/2):
02599          *
02600          * (x/2)^2 = x^2 / 4
02601          * (x/2)^3 = x^3 / 8
02602          * (x/2)^4 = x^4 / 16
02603          * (x/2)^5 = x^5 / 32
02604          * etc
02605          *
02606          * To get the final result:
02607          * sin(x) = 2 * sin(x/2) * cos(x/2)
02608          * cos(x) = cos(x/2)^2 - sin(x/2)^2
02609          * (from sin(x+y) and cos(x+y) rules)
02610          *
02611          * As per MSDN, dst.z is undefined after the operation, and so is
02612          * dst.x and dst.y if they're masked out by the writemask. Ie
02613          * sincos dst.y, src1, c0, c1
02614          * returns the sine in dst.y. dst.x and dst.z are undefined, dst.w is not touched. The assembler
02615          * vsa.exe also stops with an error if the dest register is the same register as the source
02616          * register. This means we can use dest.xyz as temporary storage. The assembler vsa.exe output also
02617          * indicates that sincos consumes 8 instruction slots in vs_2_0(and, strangely, in vs_3_0).
02618          */
02619         shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);
02620         shader_arb_get_src_param(ins, &ins->src[2], 2, src_name2);
02621         shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color);
02622 
02623         shader_addline(buffer, "MUL %s.x, %s, %s;\n", dst_name, src_name0, src_name0);  /* x ^ 2 */
02624         shader_addline(buffer, "MUL TA.y, %s.x, %s;\n", dst_name, src_name0);           /* x ^ 3 */
02625         shader_addline(buffer, "MUL %s.y, TA.y, %s;\n", dst_name, src_name0);           /* x ^ 4 */
02626         shader_addline(buffer, "MUL TA.z, %s.y, %s;\n", dst_name, src_name0);           /* x ^ 5 */
02627         shader_addline(buffer, "MUL %s.z, TA.z, %s;\n", dst_name, src_name0);           /* x ^ 6 */
02628         shader_addline(buffer, "MUL TA.w, %s.z, %s;\n", dst_name, src_name0);           /* x ^ 7 */
02629 
02630         /* sin(x/2)
02631          *
02632          * Unfortunately we don't get the constants in a DP4-capable form. Is there a way to
02633          * properly merge that with MULs in the code above?
02634          * The swizzles .yz and xw however fit into the .yzxw swizzle added to ps_2_0. Maybe
02635          * we can merge the sine and cosine MAD rows to calculate them together.
02636          */
02637         shader_addline(buffer, "MUL TA.x, %s, %s.w;\n", src_name0, src_name2); /* x^1, +1/(1!*2) */
02638         shader_addline(buffer, "MAD TA.x, TA.y, %s.x, TA.x;\n", src_name2); /* -1/(3!*8) */
02639         shader_addline(buffer, "MAD TA.x, TA.z, %s.w, TA.x;\n", src_name1); /* +1/(5!*32) */
02640         shader_addline(buffer, "MAD TA.x, TA.w, %s.x, TA.x;\n", src_name1); /* -1/(7!*128) */
02641 
02642         /* cos(x/2) */
02643         shader_addline(buffer, "MAD TA.y, %s.x, %s.y, %s.z;\n", dst_name, src_name2, src_name2); /* -1/(2!*4), +1.0 */
02644         shader_addline(buffer, "MAD TA.y, %s.y, %s.z, TA.y;\n", dst_name, src_name1); /* +1/(4!*16) */
02645         shader_addline(buffer, "MAD TA.y, %s.z, %s.y, TA.y;\n", dst_name, src_name1); /* -1/(6!*64) */
02646 
02647         if(dst->write_mask & WINED3DSP_WRITEMASK_0) {
02648             /* cos x */
02649             shader_addline(buffer, "MUL TA.z, TA.y, TA.y;\n");
02650             shader_addline(buffer, "MAD %s.x, -TA.x, TA.x, TA.z;\n", dst_name);
02651         }
02652         if(dst->write_mask & WINED3DSP_WRITEMASK_1) {
02653             /* sin x */
02654             shader_addline(buffer, "MUL %s.y, TA.x, TA.y;\n", dst_name);
02655             shader_addline(buffer, "ADD %s.y, %s.y, %s.y;\n", dst_name, dst_name, dst_name);
02656         }
02657     }
02658 }
02659 
02660 static void shader_hw_sgn(const struct wined3d_shader_instruction *ins)
02661 {
02662     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02663     char dst_name[50];
02664     char src_name[50];
02665     struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
02666 
02667     shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
02668     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name);
02669 
02670     /* SGN is only valid in vertex shaders */
02671     if(ctx->target_version >= NV2) {
02672         shader_addline(buffer, "SSG%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name);
02673         return;
02674     }
02675 
02676     /* If SRC > 0.0, -SRC < SRC = TRUE, otherwise false.
02677      * if SRC < 0.0,  SRC < -SRC = TRUE. If neither is true, src = 0.0
02678      */
02679     if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) {
02680         shader_addline(buffer, "SLT %s, -%s, %s;\n", dst_name, src_name, src_name);
02681     } else {
02682         /* src contains TA? Write to the dest first. This won't overwrite our destination.
02683          * Then use TA, and calculate the final result
02684          *
02685          * Not reading from TA? Store the first result in TA to avoid overwriting the
02686          * destination if src reg = dst reg
02687          */
02688         if(strstr(src_name, "TA"))
02689         {
02690             shader_addline(buffer, "SLT %s,  %s, -%s;\n", dst_name, src_name, src_name);
02691             shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name);
02692             shader_addline(buffer, "ADD %s, %s, -TA;\n", dst_name, dst_name);
02693         }
02694         else
02695         {
02696             shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name);
02697             shader_addline(buffer, "SLT %s,  %s, -%s;\n", dst_name, src_name, src_name);
02698             shader_addline(buffer, "ADD %s, TA, -%s;\n", dst_name, dst_name);
02699         }
02700     }
02701 }
02702 
02703 static void shader_hw_dsy(const struct wined3d_shader_instruction *ins)
02704 {
02705     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02706     char src[50];
02707     char dst[50];
02708     char dst_name[50];
02709     BOOL is_color;
02710 
02711     shader_arb_get_dst_param(ins, &ins->dst[0], dst);
02712     shader_arb_get_src_param(ins, &ins->src[0], 0, src);
02713     shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
02714 
02715     shader_addline(buffer, "DDY %s, %s;\n", dst, src);
02716     shader_addline(buffer, "MUL%s %s, %s, ycorrection.y;\n", shader_arb_get_modifier(ins), dst, dst_name);
02717 }
02718 
02719 static DWORD abs_modifier(DWORD mod, BOOL *need_abs)
02720 {
02721     *need_abs = FALSE;
02722 
02723     switch(mod)
02724     {
02725         case WINED3DSPSM_NONE:      return WINED3DSPSM_ABS;
02726         case WINED3DSPSM_NEG:       return WINED3DSPSM_ABS;
02727         case WINED3DSPSM_BIAS:      *need_abs = TRUE; return WINED3DSPSM_BIAS;
02728         case WINED3DSPSM_BIASNEG:   *need_abs = TRUE; return WINED3DSPSM_BIASNEG;
02729         case WINED3DSPSM_SIGN:      *need_abs = TRUE; return WINED3DSPSM_SIGN;
02730         case WINED3DSPSM_SIGNNEG:   *need_abs = TRUE; return WINED3DSPSM_SIGNNEG;
02731         case WINED3DSPSM_COMP:      *need_abs = TRUE; return WINED3DSPSM_COMP;
02732         case WINED3DSPSM_X2:        *need_abs = TRUE; return WINED3DSPSM_X2;
02733         case WINED3DSPSM_X2NEG:     *need_abs = TRUE; return WINED3DSPSM_X2NEG;
02734         case WINED3DSPSM_DZ:        *need_abs = TRUE; return WINED3DSPSM_DZ;
02735         case WINED3DSPSM_DW:        *need_abs = TRUE; return WINED3DSPSM_DW;
02736         case WINED3DSPSM_ABS:       return WINED3DSPSM_ABS;
02737         case WINED3DSPSM_ABSNEG:    return WINED3DSPSM_ABS;
02738     }
02739     FIXME("Unknown modifier %u\n", mod);
02740     return mod;
02741 }
02742 
02743 static void shader_hw_log(const struct wined3d_shader_instruction *ins)
02744 {
02745     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02746     char src0[50], dst[50];
02747     struct wined3d_shader_src_param src0_copy = ins->src[0];
02748     BOOL need_abs = FALSE;
02749     const char *instr;
02750 
02751     switch(ins->handler_idx)
02752     {
02753         case WINED3DSIH_LOG:  instr = "LG2"; break;
02754         case WINED3DSIH_LOGP: instr = "LOG"; break;
02755         default:
02756             ERR("Unexpected instruction %d\n", ins->handler_idx);
02757             return;
02758     }
02759 
02760     /* LOG and LOGP operate on the absolute value of the input */
02761     src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs);
02762 
02763     shader_arb_get_dst_param(ins, &ins->dst[0], dst);
02764     shader_arb_get_src_param(ins, &src0_copy, 0, src0);
02765 
02766     if(need_abs)
02767     {
02768         shader_addline(buffer, "ABS TA, %s;\n", src0);
02769         shader_addline(buffer, "%s%s %s, TA;\n", instr, shader_arb_get_modifier(ins), dst);
02770     }
02771     else
02772     {
02773         shader_addline(buffer, "%s%s %s, %s;\n", instr, shader_arb_get_modifier(ins), dst, src0);
02774     }
02775 }
02776 
02777 static void shader_hw_pow(const struct wined3d_shader_instruction *ins)
02778 {
02779     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02780     char src0[50], src1[50], dst[50];
02781     struct wined3d_shader_src_param src0_copy = ins->src[0];
02782     BOOL need_abs = FALSE;
02783     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
02784     const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
02785 
02786     /* POW operates on the absolute value of the input */
02787     src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs);
02788 
02789     shader_arb_get_dst_param(ins, &ins->dst[0], dst);
02790     shader_arb_get_src_param(ins, &src0_copy, 0, src0);
02791     shader_arb_get_src_param(ins, &ins->src[1], 1, src1);
02792 
02793     if (need_abs)
02794         shader_addline(buffer, "ABS TA.x, %s;\n", src0);
02795     else
02796         shader_addline(buffer, "MOV TA.x, %s;\n", src0);
02797 
02798     if (priv->target_version >= NV2)
02799     {
02800         shader_addline(buffer, "MOVC TA.y, %s;\n", src1);
02801         shader_addline(buffer, "POW%s %s, TA.x, TA.y;\n", shader_arb_get_modifier(ins), dst);
02802         shader_addline(buffer, "MOV %s (EQ.y), %s;\n", dst, one);
02803     }
02804     else
02805     {
02806         const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
02807         const char *flt_eps = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_EPS);
02808 
02809         shader_addline(buffer, "ABS TA.y, %s;\n", src1);
02810         shader_addline(buffer, "SGE TA.y, -TA.y, %s;\n", zero);
02811         /* Possibly add flt_eps to avoid getting float special values */
02812         shader_addline(buffer, "MAD TA.z, TA.y, %s, %s;\n", flt_eps, src1);
02813         shader_addline(buffer, "POW%s TA.x, TA.x, TA.z;\n", shader_arb_get_modifier(ins));
02814         shader_addline(buffer, "MAD TA.x, -TA.x, TA.y, TA.x;\n");
02815         shader_addline(buffer, "MAD %s, TA.y, %s, TA.x;\n", dst, one);
02816     }
02817 }
02818 
02819 static void shader_hw_loop(const struct wined3d_shader_instruction *ins)
02820 {
02821     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02822     char src_name[50];
02823     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
02824 
02825     /* src0 is aL */
02826     shader_arb_get_src_param(ins, &ins->src[1], 0, src_name);
02827 
02828     if(vshader)
02829     {
02830         struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
02831         struct list *e = list_head(&priv->control_frames);
02832         struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
02833 
02834         if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n");
02835         /* The constant loader makes sure to load -1 into iX.w */
02836         shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name);
02837         shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop);
02838         shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop);
02839     }
02840     else
02841     {
02842         shader_addline(buffer, "LOOP %s;\n", src_name);
02843     }
02844 }
02845 
02846 static void shader_hw_rep(const struct wined3d_shader_instruction *ins)
02847 {
02848     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02849     char src_name[50];
02850     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
02851 
02852     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name);
02853 
02854     /* The constant loader makes sure to load -1 into iX.w */
02855     if(vshader)
02856     {
02857         struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
02858         struct list *e = list_head(&priv->control_frames);
02859         struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
02860 
02861         if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n");
02862 
02863         shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name);
02864         shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop);
02865         shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop);
02866     }
02867     else
02868     {
02869         shader_addline(buffer, "REP %s;\n", src_name);
02870     }
02871 }
02872 
02873 static void shader_hw_endloop(const struct wined3d_shader_instruction *ins)
02874 {
02875     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02876     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
02877 
02878     if(vshader)
02879     {
02880         struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
02881         struct list *e = list_head(&priv->control_frames);
02882         struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
02883 
02884         shader_addline(buffer, "ARAC aL.xy, aL;\n");
02885         shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop);
02886         shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop);
02887 
02888         if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n");
02889     }
02890     else
02891     {
02892         shader_addline(buffer, "ENDLOOP;\n");
02893     }
02894 }
02895 
02896 static void shader_hw_endrep(const struct wined3d_shader_instruction *ins)
02897 {
02898     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02899     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
02900 
02901     if(vshader)
02902     {
02903         struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
02904         struct list *e = list_head(&priv->control_frames);
02905         struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
02906 
02907         shader_addline(buffer, "ARAC aL.xy, aL;\n");
02908         shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop);
02909         shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop);
02910 
02911         if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n");
02912     }
02913     else
02914     {
02915         shader_addline(buffer, "ENDREP;\n");
02916     }
02917 }
02918 
02919 static const struct control_frame *find_last_loop(const struct shader_arb_ctx_priv *priv)
02920 {
02921     struct control_frame *control_frame;
02922 
02923     LIST_FOR_EACH_ENTRY(control_frame, &priv->control_frames, struct control_frame, entry)
02924     {
02925         if(control_frame->type == LOOP || control_frame->type == REP) return control_frame;
02926     }
02927     ERR("Could not find loop for break\n");
02928     return NULL;
02929 }
02930 
02931 static void shader_hw_break(const struct wined3d_shader_instruction *ins)
02932 {
02933     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02934     const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data);
02935     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
02936 
02937     if(vshader)
02938     {
02939         shader_addline(buffer, "BRA loop_%u_end;\n", control_frame->no.loop);
02940     }
02941     else
02942     {
02943         shader_addline(buffer, "BRK;\n");
02944     }
02945 }
02946 
02947 static const char *get_compare(enum wined3d_shader_rel_op op)
02948 {
02949     switch (op)
02950     {
02951         case WINED3D_SHADER_REL_OP_GT: return "GT";
02952         case WINED3D_SHADER_REL_OP_EQ: return "EQ";
02953         case WINED3D_SHADER_REL_OP_GE: return "GE";
02954         case WINED3D_SHADER_REL_OP_LT: return "LT";
02955         case WINED3D_SHADER_REL_OP_NE: return "NE";
02956         case WINED3D_SHADER_REL_OP_LE: return "LE";
02957         default:
02958             FIXME("Unrecognized operator %#x.\n", op);
02959             return "(\?\?)";
02960     }
02961 }
02962 
02963 static enum wined3d_shader_rel_op invert_compare(enum wined3d_shader_rel_op op)
02964 {
02965     switch (op)
02966     {
02967         case WINED3D_SHADER_REL_OP_GT: return WINED3D_SHADER_REL_OP_LE;
02968         case WINED3D_SHADER_REL_OP_EQ: return WINED3D_SHADER_REL_OP_NE;
02969         case WINED3D_SHADER_REL_OP_GE: return WINED3D_SHADER_REL_OP_LT;
02970         case WINED3D_SHADER_REL_OP_LT: return WINED3D_SHADER_REL_OP_GE;
02971         case WINED3D_SHADER_REL_OP_NE: return WINED3D_SHADER_REL_OP_EQ;
02972         case WINED3D_SHADER_REL_OP_LE: return WINED3D_SHADER_REL_OP_GT;
02973         default:
02974             FIXME("Unrecognized operator %#x.\n", op);
02975             return -1;
02976     }
02977 }
02978 
02979 static void shader_hw_breakc(const struct wined3d_shader_instruction *ins)
02980 {
02981     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02982     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
02983     const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data);
02984     char src_name0[50];
02985     char src_name1[50];
02986     const char *comp = get_compare(ins->flags);
02987 
02988     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
02989     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);
02990 
02991     if(vshader)
02992     {
02993         /* SUBC CC, src0, src1" works only in pixel shaders, so use TA to throw
02994          * away the subtraction result
02995          */
02996         shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
02997         shader_addline(buffer, "BRA loop_%u_end (%s.x);\n", control_frame->no.loop, comp);
02998     }
02999     else
03000     {
03001         shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
03002         shader_addline(buffer, "BRK (%s.x);\n", comp);
03003     }
03004 }
03005 
03006 static void shader_hw_ifc(const struct wined3d_shader_instruction *ins)
03007 {
03008     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
03009     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
03010     struct list *e = list_head(&priv->control_frames);
03011     struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
03012     const char *comp;
03013     char src_name0[50];
03014     char src_name1[50];
03015     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
03016 
03017     shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
03018     shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);
03019 
03020     if(vshader)
03021     {
03022         /* Invert the flag. We jump to the else label if the condition is NOT true */
03023         comp = get_compare(invert_compare(ins->flags));
03024         shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
03025         shader_addline(buffer, "BRA ifc_%u_else (%s.x);\n", control_frame->no.ifc, comp);
03026     }
03027     else
03028     {
03029         comp = get_compare(ins->flags);
03030         shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
03031         shader_addline(buffer, "IF %s.x;\n", comp);
03032     }
03033 }
03034 
03035 static void shader_hw_else(const struct wined3d_shader_instruction *ins)
03036 {
03037     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
03038     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
03039     struct list *e = list_head(&priv->control_frames);
03040     struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
03041     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
03042 
03043     if(vshader)
03044     {
03045         shader_addline(buffer, "BRA ifc_%u_endif;\n", control_frame->no.ifc);
03046         shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc);
03047         control_frame->had_else = TRUE;
03048     }
03049     else
03050     {
03051         shader_addline(buffer, "ELSE;\n");
03052     }
03053 }
03054 
03055 static void shader_hw_endif(const struct wined3d_shader_instruction *ins)
03056 {
03057     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
03058     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
03059     struct list *e = list_head(&priv->control_frames);
03060     struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
03061     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
03062 
03063     if(vshader)
03064     {
03065         if(control_frame->had_else)
03066         {
03067             shader_addline(buffer, "ifc_%u_endif:\n", control_frame->no.ifc);
03068         }
03069         else
03070         {
03071             shader_addline(buffer, "#No else branch. else is endif\n");
03072             shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc);
03073         }
03074     }
03075     else
03076     {
03077         shader_addline(buffer, "ENDIF;\n");
03078     }
03079 }
03080 
03081 static void shader_hw_texldd(const struct wined3d_shader_instruction *ins)
03082 {
03083     DWORD sampler_idx = ins->src[1].reg.idx;
03084     char reg_dest[40];
03085     char reg_src[3][40];
03086     WORD flags = TEX_DERIV;
03087 
03088     shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest);
03089     shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src[0]);
03090     shader_arb_get_src_param(ins, &ins->src[2], 1, reg_src[1]);
03091     shader_arb_get_src_param(ins, &ins->src[3], 2, reg_src[2]);
03092 
03093     if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ;
03094     if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS;
03095 
03096     shader_hw_sample(ins, sampler_idx, reg_dest, reg_src[0], flags, reg_src[1], reg_src[2]);
03097 }
03098 
03099 static void shader_hw_texldl(const struct wined3d_shader_instruction *ins)
03100 {
03101     DWORD sampler_idx = ins->src[1].reg.idx;
03102     char reg_dest[40];
03103     char reg_coord[40];
03104     WORD flags = TEX_LOD;
03105 
03106     shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest);
03107     shader_arb_get_src_param(ins, &ins->src[0], 0, reg_coord);
03108 
03109     if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ;
03110     if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS;
03111 
03112     shader_hw_sample(ins, sampler_idx, reg_dest, reg_coord, flags, NULL, NULL);
03113 }
03114 
03115 static void shader_hw_label(const struct wined3d_shader_instruction *ins)
03116 {
03117     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
03118     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
03119 
03120     priv->in_main_func = FALSE;
03121     /* Call instructions activate the NV extensions, not labels and rets. If there is an uncalled
03122      * subroutine, don't generate a label that will make GL complain
03123      */
03124     if(priv->target_version == ARB) return;
03125 
03126     shader_addline(buffer, "l%u:\n", ins->src[0].reg.idx);
03127 }
03128 
03129 static void vshader_add_footer(struct shader_arb_ctx_priv *priv_ctx,
03130         const struct arb_vshader_private *shader_data, const struct arb_vs_compile_args *args,
03131         const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info,
03132         struct wined3d_shader_buffer *buffer)
03133 {
03134     unsigned int i;
03135 
03136     /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used
03137      * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE),
03138      * the fog frag coord is thrown away. If the fog frag coord is used, but not written by
03139      * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0)
03140      */
03141     if (args->super.fog_src == VS_FOG_Z)
03142         shader_addline(buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
03143     else if (!reg_maps->fog)
03144         /* posFixup.x is always 1.0, so we can safely use it */
03145         shader_addline(buffer, "ADD result.fogcoord, posFixup.x, -posFixup.x;\n");
03146 
03147     /* Clipplanes are always stored without y inversion */
03148     if (use_nv_clip(gl_info) && priv_ctx->target_version >= NV2)
03149     {
03150         if (args->super.clip_enabled)
03151         {
03152             for (i = 0; i < priv_ctx->vs_clipplanes; i++)
03153             {
03154                 shader_addline(buffer, "DP4 result.clip[%u].x, TMP_OUT, state.clip[%u].plane;\n", i, i);
03155             }
03156         }
03157     }
03158     else if (args->clip.boolclip.clip_texcoord)
03159     {
03160         unsigned int cur_clip = 0;
03161         char component[4] = {'x', 'y', 'z', 'w'};
03162         const char *zero = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ZERO);
03163 
03164         for (i = 0; i < gl_info->limits.clipplanes; ++i)
03165         {
03166             if (args->clip.boolclip.clipplane_mask & (1 << i))
03167             {
03168                 shader_addline(buffer, "DP4 TA.%c, TMP_OUT, state.clip[%u].plane;\n",
03169                                component[cur_clip++], i);
03170             }
03171         }
03172         switch (cur_clip)
03173         {
03174             case 0:
03175                 shader_addline(buffer, "MOV TA, %s;\n", zero);
03176                 break;
03177             case 1:
03178                 shader_addline(buffer, "MOV TA.yzw, %s;\n", zero);
03179                 break;
03180             case 2:
03181                 shader_addline(buffer, "MOV TA.zw, %s;\n", zero);
03182                 break;
03183             case 3:
03184                 shader_addline(buffer, "MOV TA.w, %s;\n", zero);
03185                 break;
03186         }
03187         shader_addline(buffer, "MOV result.texcoord[%u], TA;\n",
03188                        args->clip.boolclip.clip_texcoord - 1);
03189     }
03190 
03191     /* Write the final position.
03192      *
03193      * OpenGL coordinates specify the center of the pixel while d3d coords specify
03194      * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
03195      * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
03196      * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
03197      */
03198     shader_addline(buffer, "MUL TA, posFixup, TMP_OUT.w;\n");
03199     shader_addline(buffer, "ADD TMP_OUT.x, TMP_OUT.x, TA.z;\n");
03200     shader_addline(buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, TA.w;\n");
03201 
03202     /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
03203      * and the glsl equivalent
03204      */
03205     if (need_helper_const(shader_data, reg_maps, gl_info))
03206     {
03207         const char *two = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_TWO);
03208         shader_addline(buffer, "MAD TMP_OUT.z, TMP_OUT.z, %s, -TMP_OUT.w;\n", two);
03209     }
03210     else
03211     {
03212         shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, TMP_OUT.z;\n");
03213         shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, -TMP_OUT.w;\n");
03214     }
03215 
03216     shader_addline(buffer, "MOV result.position, TMP_OUT;\n");
03217 
03218     priv_ctx->footer_written = TRUE;
03219 }
03220 
03221 static void shader_hw_ret(const struct wined3d_shader_instruction *ins)
03222 {
03223     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
03224     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
03225     const struct wined3d_shader *shader = ins->ctx->shader;
03226     BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
03227 
03228     if(priv->target_version == ARB) return;
03229 
03230     if(vshader)
03231     {
03232         if (priv->in_main_func) vshader_add_footer(priv, shader->backend_data,
03233                 priv->cur_vs_args, ins->ctx->reg_maps, ins->ctx->gl_info, buffer);
03234     }
03235 
03236     shader_addline(buffer, "RET;\n");
03237 }
03238 
03239 static void shader_hw_call(const struct wined3d_shader_instruction *ins)
03240 {
03241     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
03242     shader_addline(buffer, "CAL l%u;\n", ins->src[0].reg.idx);
03243 }
03244 
03245 /* GL locking is done by the caller */
03246 static GLuint create_arb_blt_vertex_program(const struct wined3d_gl_info *gl_info)
03247 {
03248     GLuint program_id = 0;
03249     GLint pos;
03250 
03251     const char *blt_vprogram =
03252         "!!ARBvp1.0\n"
03253         "PARAM c[1] = { { 1, 0.5 } };\n"
03254         "MOV result.position, vertex.position;\n"
03255         "MOV result.color, c[0].x;\n"
03256         "MOV result.texcoord[0], vertex.texcoord[0];\n"
03257         "END\n";
03258 
03259     GL_EXTCALL(glGenProgramsARB(1, &program_id));
03260     GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, program_id));
03261     GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
03262             strlen(blt_vprogram), blt_vprogram));
03263     checkGLcall("glProgramStringARB()");
03264 
03265     glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
03266     if (pos != -1)
03267     {
03268         FIXME("Vertex program error at position %d: %s\n\n", pos,
03269             debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
03270         shader_arb_dump_program_source(blt_vprogram);
03271     }
03272     else
03273     {
03274         GLint native;
03275 
03276         GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
03277         checkGLcall("glGetProgramivARB()");
03278         if (!native) WARN("Program exceeds native resource limits.\n");
03279     }
03280 
03281     return program_id;
03282 }
03283 
03284 /* GL locking is done by the caller */
03285 static GLuint create_arb_blt_fragment_program(const struct wined3d_gl_info *gl_info,
03286         enum tex_types tex_type, BOOL masked)
03287 {
03288     GLuint program_id = 0;
03289     const char *fprogram;
03290     GLint pos;
03291 
03292     static const char * const blt_fprograms_full[tex_type_count] =
03293     {
03294         /* tex_1d */
03295         NULL,
03296         /* tex_2d */
03297         "!!ARBfp1.0\n"
03298         "TEMP R0;\n"
03299         "TEX R0.x, fragment.texcoord[0], texture[0], 2D;\n"
03300         "MOV result.depth.z, R0.x;\n"
03301         "END\n",
03302         /* tex_3d */
03303         NULL,
03304         /* tex_cube */
03305         "!!ARBfp1.0\n"
03306         "TEMP R0;\n"
03307         "TEX R0.x, fragment.texcoord[0], texture[0], CUBE;\n"
03308         "MOV result.depth.z, R0.x;\n"
03309         "END\n",
03310         /* tex_rect */
03311         "!!ARBfp1.0\n"
03312         "TEMP R0;\n"
03313         "TEX R0.x, fragment.texcoord[0], texture[0], RECT;\n"
03314         "MOV result.depth.z, R0.x;\n"
03315         "END\n",
03316     };
03317 
03318     static const char * const blt_fprograms_masked[tex_type_count] =
03319     {
03320         /* tex_1d */
03321         NULL,
03322         /* tex_2d */
03323         "!!ARBfp1.0\n"
03324         "PARAM mask = program.local[0];\n"
03325         "TEMP R0;\n"
03326         "SLT R0.xy, fragment.position, mask.zwzw;\n"
03327         "MUL R0.x, R0.x, R0.y;\n"
03328         "KIL -R0.x;\n"
03329         "TEX R0.x, fragment.texcoord[0], texture[0], 2D;\n"
03330         "MOV result.depth.z, R0.x;\n"
03331         "END\n",
03332         /* tex_3d */
03333         NULL,
03334         /* tex_cube */
03335         "!!ARBfp1.0\n"
03336         "PARAM mask = program.local[0];\n"
03337         "TEMP R0;\n"
03338         "SLT R0.xy, fragment.position, mask.zwzw;\n"
03339         "MUL R0.x, R0.x, R0.y;\n"
03340         "KIL -R0.x;\n"
03341         "TEX R0.x, fragment.texcoord[0], texture[0], CUBE;\n"
03342         "MOV result.depth.z, R0.x;\n"
03343         "END\n",
03344         /* tex_rect */
03345         "!!ARBfp1.0\n"
03346         "PARAM mask = program.local[0];\n"
03347         "TEMP R0;\n"
03348         "SLT R0.xy, fragment.position, mask.zwzw;\n"
03349         "MUL R0.x, R0.x, R0.y;\n"
03350         "KIL -R0.x;\n"
03351         "TEX R0.x, fragment.texcoord[0], texture[0], RECT;\n"
03352         "MOV result.depth.z, R0.x;\n"
03353         "END\n",
03354     };
03355 
03356     fprogram = masked ? blt_fprograms_masked[tex_type] : blt_fprograms_full[tex_type];
03357     if (!fprogram)
03358     {
03359         FIXME("tex_type %#x not supported, falling back to tex_2d\n", tex_type);
03360         tex_type = tex_2d;
03361         fprogram = masked ? blt_fprograms_masked[tex_type] : blt_fprograms_full[tex_type];
03362     }
03363 
03364     GL_EXTCALL(glGenProgramsARB(1, &program_id));
03365     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, program_id));
03366     GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(fprogram), fprogram));
03367     checkGLcall("glProgramStringARB()");
03368 
03369     glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
03370     if (pos != -1)
03371     {
03372         FIXME("Fragment program error at position %d: %s\n\n", pos,
03373             debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
03374         shader_arb_dump_program_source(fprogram);
03375     }
03376     else
03377     {
03378         GLint native;
03379 
03380         GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
03381         checkGLcall("glGetProgramivARB()");
03382         if (!native) WARN("Program exceeds native resource limits.\n");
03383     }
03384 
03385     return program_id;
03386 }
03387 
03388 static void arbfp_add_sRGB_correction(struct wined3d_shader_buffer *buffer, const char *fragcolor,
03389         const char *tmp1, const char *tmp2, const char *tmp3, const char *tmp4, BOOL condcode)
03390 {
03391     /* Perform sRGB write correction. See GLX_EXT_framebuffer_sRGB */
03392 
03393     if(condcode)
03394     {
03395         /* Sigh. MOVC CC doesn't work, so use one of the temps as dummy dest */
03396         shader_addline(buffer, "SUBC %s, %s.x, srgb_consts1.y;\n", tmp1, fragcolor);
03397         /* Calculate the > 0.0031308 case */
03398         shader_addline(buffer, "POW %s.x (GE), %s.x, srgb_consts1.z;\n", fragcolor, fragcolor);
03399         shader_addline(buffer, "POW %s.y (GE), %s.y, srgb_consts1.z;\n", fragcolor, fragcolor);
03400         shader_addline(buffer, "POW %s.z (GE), %s.z, srgb_consts1.z;\n", fragcolor, fragcolor);
03401         shader_addline(buffer, "MUL %s.xyz (GE), %s, srgb_consts1.w;\n", fragcolor, fragcolor);
03402         shader_addline(buffer, "SUB %s.xyz (GE), %s, srgb_consts2.x;\n", fragcolor, fragcolor);
03403         /* Calculate the < case */
03404         shader_addline(buffer, "MUL %s.xyz (LT), srgb_consts1.x, %s;\n", fragcolor, fragcolor);
03405     }
03406     else
03407     {
03408         /* Calculate the > 0.0031308 case */
03409         shader_addline(buffer, "POW %s.x, %s.x, srgb_consts1.z;\n", tmp1, fragcolor);
03410         shader_addline(buffer, "POW %s.y, %s.y, srgb_consts1.z;\n", tmp1, fragcolor);
03411         shader_addline(buffer, "POW %s.z, %s.z, srgb_consts1.z;\n", tmp1, fragcolor);
03412         shader_addline(buffer, "MUL %s, %s, srgb_consts1.w;\n", tmp1, tmp1);
03413         shader_addline(buffer, "SUB %s, %s, srgb_consts2.x;\n", tmp1, tmp1);
03414         /* Calculate the < case */
03415         shader_addline(buffer, "MUL %s, srgb_consts1.x, %s;\n", tmp2, fragcolor);
03416         /* Get 1.0 / 0.0 masks for > 0.0031308 and < 0.0031308 */
03417         shader_addline(buffer, "SLT %s, srgb_consts1.y, %s;\n", tmp3, fragcolor);
03418         shader_addline(buffer, "SGE %s, srgb_consts1.y, %s;\n", tmp4, fragcolor);
03419         /* Store the components > 0.0031308 in the destination */
03420         shader_addline(buffer, "MUL %s.xyz, %s, %s;\n", fragcolor, tmp1, tmp3);
03421         /* Add the components that are < 0.0031308 */
03422         shader_addline(buffer, "MAD %s.xyz, %s, %s, %s;\n", fragcolor, tmp2, tmp4, fragcolor);
03423         /* Move everything into result.color at once. Nvidia hardware cannot handle partial
03424         * result.color writes(.rgb first, then .a), or handle overwriting already written
03425         * components. The assembler uses a temporary register in this case, which is usually
03426         * not allocated from one of our registers that were used earlier.
03427         */
03428     }
03429     /* [0.0;1.0] clamping. Not needed, this is done implicitly */
03430 }
03431 
03432 static const DWORD *find_loop_control_values(const struct wined3d_shader *shader, DWORD idx)
03433 {
03434     const struct wined3d_shader_lconst *constant;
03435 
03436     LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry)
03437     {
03438         if (constant->idx == idx)
03439         {
03440             return constant->value;
03441         }
03442     }
03443     return NULL;
03444 }
03445 
03446 static void init_ps_input(const struct wined3d_shader *shader,
03447         const struct arb_ps_compile_args *args, struct shader_arb_ctx_priv *priv)
03448 {
03449     static const char * const texcoords[8] =
03450     {
03451         "fragment.texcoord[0]", "fragment.texcoord[1]", "fragment.texcoord[2]", "fragment.texcoord[3]",
03452         "fragment.texcoord[4]", "fragment.texcoord[5]", "fragment.texcoord[6]", "fragment.texcoord[7]"
03453     };
03454     unsigned int i;
03455     const struct wined3d_shader_signature_element *sig = shader->input_signature;
03456     const char *semantic_name;
03457     DWORD semantic_idx;
03458 
03459     switch(args->super.vp_mode)
03460     {
03461         case pretransformed:
03462         case fixedfunction:
03463             /* The pixelshader has to collect the varyings on its own. In any case properly load
03464              * color0 and color1. In the case of pretransformed vertices also load texcoords. Set
03465              * other attribs to 0.0.
03466              *
03467              * For fixedfunction this behavior is correct, according to the tests. For pretransformed
03468              * we'd either need a replacement shader that can load other attribs like BINORMAL, or
03469              * load the texcoord attrib pointers to match the pixel shader signature
03470              */
03471             for(i = 0; i < MAX_REG_INPUT; i++)
03472             {
03473                 semantic_name = sig[i].semantic_name;
03474                 semantic_idx = sig[i].semantic_idx;
03475                 if (!semantic_name) continue;
03476 
03477                 if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR))
03478                 {
03479                     if (!semantic_idx) priv->ps_input[i] = "fragment.color.primary";
03480                     else if(semantic_idx == 1) priv->ps_input[i] = "fragment.color.secondary";
03481                     else priv->ps_input[i] = "0.0";
03482                 }
03483                 else if(args->super.vp_mode == fixedfunction)
03484                 {
03485                     priv->ps_input[i] = "0.0";
03486                 }
03487                 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD))
03488                 {
03489                     if(semantic_idx < 8) priv->ps_input[i] = texcoords[semantic_idx];
03490                     else priv->ps_input[i] = "0.0";
03491                 }
03492                 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG))
03493                 {
03494                     if (!semantic_idx) priv->ps_input[i] = "fragment.fogcoord";
03495                     else priv->ps_input[i] = "0.0";
03496                 }
03497                 else
03498                 {
03499                     priv->ps_input[i] = "0.0";
03500                 }
03501 
03502                 TRACE("v%u, semantic %s%u is %s\n", i, semantic_name, semantic_idx, priv->ps_input[i]);
03503             }
03504             break;
03505 
03506         case vertexshader:
03507             /* That one is easy. The vertex shaders provide v0-v7 in fragment.texcoord and v8 and v9 in
03508              * fragment.color
03509              */
03510             for(i = 0; i < 8; i++)
03511             {
03512                 priv->ps_input[i] = texcoords[i];
03513             }
03514             priv->ps_input[8] = "fragment.color.primary";
03515             priv->ps_input[9] = "fragment.color.secondary";
03516             break;
03517     }
03518 }
03519 
03520 /* GL locking is done by the caller */
03521 static GLuint shader_arb_generate_pshader(const struct wined3d_shader *shader,
03522         const struct wined3d_gl_info *gl_info, struct wined3d_shader_buffer *buffer,
03523         const struct arb_ps_compile_args *args, struct arb_ps_compiled_shader *compiled)
03524 {
03525     const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
03526     const struct wined3d_shader_lconst *lconst;
03527     const DWORD *function = shader->function;
03528     GLuint retval;
03529     char fragcolor[16];
03530     DWORD *lconst_map = local_const_mapping(shader), next_local;
03531     struct shader_arb_ctx_priv priv_ctx;
03532     BOOL dcl_td = FALSE;
03533     BOOL want_nv_prog = FALSE;
03534     struct arb_pshader_private *shader_priv = shader->backend_data;
03535     GLint errPos;
03536     DWORD map;
03537 
03538     char srgbtmp[4][4];
03539     unsigned int i, found = 0;
03540 
03541     for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
03542     {
03543         if (!(map & 1)
03544                 || (shader->u.ps.color0_mov && i == shader->u.ps.color0_reg)
03545                 || (reg_maps->shader_version.major < 2 && !i))
03546             continue;
03547 
03548         sprintf(srgbtmp[found], "R%u", i);
03549         ++found;
03550         if (found == 4) break;
03551     }
03552 
03553     switch(found) {
03554         case 0:
03555             sprintf(srgbtmp[0], "TA");
03556             sprintf(srgbtmp[1], "TB");
03557             sprintf(srgbtmp[2], "TC");
03558             sprintf(srgbtmp[3], "TD");
03559             dcl_td = TRUE;
03560             break;
03561         case 1:
03562             sprintf(srgbtmp[1], "TA");
03563             sprintf(srgbtmp[2], "TB");
03564             sprintf(srgbtmp[3], "TC");
03565             break;
03566         case 2:
03567             sprintf(srgbtmp[2], "TA");
03568             sprintf(srgbtmp[3], "TB");
03569             break;
03570         case 3:
03571             sprintf(srgbtmp[3], "TA");
03572             break;
03573         case 4:
03574             break;
03575     }
03576 
03577     /*  Create the hw ARB shader */
03578     memset(&priv_ctx, 0, sizeof(priv_ctx));
03579     priv_ctx.cur_ps_args = args;
03580     priv_ctx.compiled_fprog = compiled;
03581     priv_ctx.cur_np2fixup_info = &compiled->np2fixup_info;
03582     init_ps_input(shader, args, &priv_ctx);
03583     list_init(&priv_ctx.control_frames);
03584 
03585     /* Avoid enabling NV_fragment_program* if we do not need it.
03586      *
03587      * Enabling GL_NV_fragment_program_option causes the driver to occupy a temporary register,
03588      * and it slows down the shader execution noticeably(about 5%). Usually our instruction emulation
03589      * is faster than what we gain from using higher native instructions. There are some things though
03590      * that cannot be emulated. In that case enable the extensions.
03591      * If the extension is enabled, instruction handlers that support both ways will use it.
03592      *
03593      * Testing shows no performance difference between OPTION NV_fragment_program2 and NV_fragment_program.
03594      * So enable the best we can get.
03595      */
03596     if(reg_maps->usesdsx || reg_maps->usesdsy || reg_maps->loop_depth > 0 || reg_maps->usestexldd ||
03597        reg_maps->usestexldl || reg_maps->usesfacing || reg_maps->usesifc || reg_maps->usescall)
03598     {
03599         want_nv_prog = TRUE;
03600     }
03601 
03602     shader_addline(buffer, "!!ARBfp1.0\n");
03603     if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM2])
03604     {
03605         shader_addline(buffer, "OPTION NV_fragment_program2;\n");
03606         priv_ctx.target_version = NV3;
03607     }
03608     else if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION])
03609     {
03610         shader_addline(buffer, "OPTION NV_fragment_program;\n");
03611         priv_ctx.target_version = NV2;
03612     } else {
03613         if(want_nv_prog)
03614         {
03615             /* This is an error - either we're advertising the wrong shader version, or aren't enforcing some
03616              * limits properly
03617              */
03618             ERR("The shader requires instructions that are not available in plain GL_ARB_fragment_program\n");
03619             ERR("Try GLSL\n");
03620         }
03621         priv_ctx.target_version = ARB;
03622     }
03623 
03624     if (reg_maps->rt_mask > 1)
03625     {
03626         shader_addline(buffer, "OPTION ARB_draw_buffers;\n");
03627     }
03628 
03629     if (reg_maps->shader_version.major < 3)
03630     {
03631         switch(args->super.fog) {
03632             case FOG_OFF:
03633                 break;
03634             case FOG_LINEAR:
03635                 shader_addline(buffer, "OPTION ARB_fog_linear;\n");
03636                 break;
03637             case FOG_EXP:
03638                 shader_addline(buffer, "OPTION ARB_fog_exp;\n");
03639                 break;
03640             case FOG_EXP2:
03641                 shader_addline(buffer, "OPTION ARB_fog_exp2;\n");
03642                 break;
03643         }
03644     }
03645 
03646     /* For now always declare the temps. At least the Nvidia assembler optimizes completely
03647      * unused temps away(but occupies them for the whole shader if they're used once). Always
03648      * declaring them avoids tricky bookkeeping work
03649      */
03650     shader_addline(buffer, "TEMP TA;\n");      /* Used for modifiers */
03651     shader_addline(buffer, "TEMP TB;\n");      /* Used for modifiers */
03652     shader_addline(buffer, "TEMP TC;\n");      /* Used for modifiers */
03653     if(dcl_td) shader_addline(buffer, "TEMP TD;\n"); /* Used for sRGB writing */
03654     shader_addline(buffer, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
03655     shader_addline(buffer, "PARAM coefmul = { 2, 4, 8, 16 };\n");
03656     shader_addline(buffer, "PARAM ps_helper_const = { 0.0, 1.0, %1.10f, 0.0 };\n", eps);
03657 
03658     if (reg_maps->shader_version.major < 2)
03659     {
03660         strcpy(fragcolor, "R0");
03661     }
03662     else
03663     {
03664         if (args->super.srgb_correction)
03665         {
03666             if (shader->u.ps.color0_mov)
03667             {
03668                 sprintf(fragcolor, "R%u", shader->u.ps.color0_reg);
03669             }
03670             else
03671             {
03672                 shader_addline(buffer, "TEMP TMP_COLOR;\n");
03673                 strcpy(fragcolor, "TMP_COLOR");
03674             }
03675         } else {
03676             strcpy(fragcolor, "result.color");
03677         }
03678     }
03679 
03680     if(args->super.srgb_correction) {
03681         shader_addline(buffer, "PARAM srgb_consts1 = {%f, %f, %f, %f};\n",
03682                        srgb_mul_low, srgb_cmp, srgb_pow, srgb_mul_high);
03683         shader_addline(buffer, "PARAM srgb_consts2 = {%f, %f, %f, %f};\n",
03684                        srgb_sub_high, 0.0, 0.0, 0.0);
03685     }
03686 
03687     /* Base Declarations */
03688     next_local = shader_generate_arb_declarations(shader, reg_maps,
03689             buffer, gl_info, lconst_map, NULL, &priv_ctx);
03690 
03691     for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i)
03692     {
03693         unsigned char bump_const;
03694 
03695         if (!(map & 1)) continue;
03696 
03697         bump_const = compiled->numbumpenvmatconsts;
03698         compiled->bumpenvmatconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED;
03699         compiled->bumpenvmatconst[bump_const].texunit = i;
03700         compiled->luminanceconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED;
03701         compiled->luminanceconst[bump_const].texunit = i;
03702 
03703         /* We can fit the constants into the constant limit for sure because texbem, texbeml, bem and beml are only supported
03704          * in 1.x shaders, and GL_ARB_fragment_program has a constant limit of 24 constants. So in the worst case we're loading
03705          * 8 shader constants, 8 bump matrices and 8 luminance parameters and are perfectly fine. (No NP2 fixup on bumpmapped
03706          * textures due to conditional NP2 restrictions)
03707          *
03708          * Use local constants to load the bump env parameters, not program.env. This avoids collisions with d3d constants of
03709          * shaders in newer shader models. Since the bump env parameters have to share their space with NP2 fixup constants,
03710          * their location is shader dependent anyway and they cannot be loaded globally.
03711          */
03712         compiled->bumpenvmatconst[bump_const].const_num = next_local++;
03713         shader_addline(buffer, "PARAM bumpenvmat%d = program.local[%d];\n",
03714                        i, compiled->bumpenvmatconst[bump_const].const_num);
03715         compiled->numbumpenvmatconsts = bump_const + 1;
03716 
03717         if (!(reg_maps->luminanceparams & (1 << i))) continue;
03718 
03719         compiled->luminanceconst[bump_const].const_num = next_local++;
03720         shader_addline(buffer, "PARAM luminance%d = program.local[%d];\n",
03721                        i, compiled->luminanceconst[bump_const].const_num);
03722     }
03723 
03724     for(i = 0; i < MAX_CONST_I; i++)
03725     {
03726         compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED;
03727         if (reg_maps->integer_constants & (1 << i) && priv_ctx.target_version >= NV2)
03728         {
03729             const DWORD *control_values = find_loop_control_values(shader, i);
03730 
03731             if(control_values)
03732             {
03733                 shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i,
03734                                 control_values[0], control_values[1], control_values[2]);
03735             }
03736             else
03737             {
03738                 compiled->int_consts[i] = next_local;
03739                 compiled->num_int_consts++;
03740                 shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++);
03741             }
03742         }
03743     }
03744 
03745     if(reg_maps->vpos || reg_maps->usesdsy)
03746     {
03747         compiled->ycorrection = next_local;
03748         shader_addline(buffer, "PARAM ycorrection = program.local[%u];\n", next_local++);
03749 
03750         if(reg_maps->vpos)
03751         {
03752             shader_addline(buffer, "TEMP vpos;\n");
03753             /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen).
03754              * ycorrection.y: -1.0(onscreen), 1.0(offscreen)
03755              * ycorrection.z: 1.0
03756              * ycorrection.w: 0.0
03757              */
03758             shader_addline(buffer, "MAD vpos, fragment.position, ycorrection.zyww, ycorrection.wxww;\n");
03759             shader_addline(buffer, "FLR vpos.xy, vpos;\n");
03760         }
03761     }
03762     else
03763     {
03764         compiled->ycorrection = WINED3D_CONST_NUM_UNUSED;
03765     }
03766 
03767     /* Load constants to fixup NP2 texcoords if there are still free constants left:
03768      * Constants (texture dimensions) for the NP2 fixup are loaded as local program parameters. This will consume
03769      * at most 8 (MAX_FRAGMENT_SAMPLERS / 2) parameters, which is highly unlikely, since the application had to
03770      * use 16 NP2 textures at the same time. In case that we run out of constants the fixup is simply not
03771      * applied / activated. This will probably result in wrong rendering of the texture, but will save us from
03772      * shader compilation errors and the subsequent errors when drawing with this shader. */
03773     if (priv_ctx.cur_ps_args->super.np2_fixup) {
03774         unsigned char cur_fixup_sampler = 0;
03775 
03776         struct arb_ps_np2fixup_info* const fixup = priv_ctx.cur_np2fixup_info;
03777         const WORD map = priv_ctx.cur_ps_args->super.np2_fixup;
03778         const UINT max_lconsts = gl_info->limits.arb_ps_local_constants;
03779 
03780         fixup->offset = next_local;
03781         fixup->super.active = 0;
03782 
03783         for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) {
03784             if (!(map & (1 << i))) continue;
03785 
03786             if (fixup->offset + (cur_fixup_sampler >> 1) < max_lconsts) {
03787                 fixup->super.active |= (1 << i);
03788                 fixup->super.idx[i] = cur_fixup_sampler++;
03789             } else {
03790                 FIXME("No free constant found to load NP2 fixup data into shader. "
03791                       "Sampling from this texture will probably look wrong.\n");
03792                 break;
03793             }
03794         }
03795 
03796         fixup->super.num_consts = (cur_fixup_sampler + 1) >> 1;
03797         if (fixup->super.num_consts) {
03798             shader_addline(buffer, "PARAM np2fixup[%u] = { program.env[%u..%u] };\n",
03799                            fixup->super.num_consts, fixup->offset, fixup->super.num_consts + fixup->offset - 1);
03800         }
03801     }
03802 
03803     if (shader_priv->clipplane_emulation != ~0U && args->clip)
03804     {
03805         shader_addline(buffer, "KIL fragment.texcoord[%u];\n", shader_priv->clipplane_emulation);
03806     }
03807 
03808     /* Base Shader Body */
03809     shader_generate_main(shader, buffer, reg_maps, function, &priv_ctx);
03810 
03811     if(args->super.srgb_correction) {
03812         arbfp_add_sRGB_correction(buffer, fragcolor, srgbtmp[0], srgbtmp[1], srgbtmp[2], srgbtmp[3],
03813                                   priv_ctx.target_version >= NV2);
03814     }
03815 
03816     if(strcmp(fragcolor, "result.color")) {
03817         shader_addline(buffer, "MOV result.color, %s;\n", fragcolor);
03818     }
03819     shader_addline(buffer, "END\n");
03820 
03821     /* TODO: change to resource.glObjectHandle or something like that */
03822     GL_EXTCALL(glGenProgramsARB(1, &retval));
03823 
03824     TRACE("Creating a hw pixel shader, prg=%d\n", retval);
03825     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, retval));
03826 
03827     TRACE("Created hw pixel shader, prg=%d\n", retval);
03828     /* Create the program and check for errors */
03829     GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
03830                buffer->bsize, buffer->buffer));
03831     checkGLcall("glProgramStringARB()");
03832 
03833     glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
03834     if (errPos != -1)
03835     {
03836         FIXME("HW PixelShader Error at position %d: %s\n\n",
03837               errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
03838         shader_arb_dump_program_source(buffer->buffer);
03839         retval = 0;
03840     }
03841     else
03842     {
03843         GLint native;
03844 
03845         GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
03846         checkGLcall("glGetProgramivARB()");
03847         if (!native) WARN("Program exceeds native resource limits.\n");
03848     }
03849 
03850     /* Load immediate constants */
03851     if (lconst_map)
03852     {
03853         LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
03854         {
03855             const float *value = (const float *)lconst->value;
03856             GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, lconst_map[lconst->idx], value));
03857             checkGLcall("glProgramLocalParameter4fvARB");
03858         }
03859         HeapFree(GetProcessHeap(), 0, lconst_map);
03860     }
03861 
03862     return retval;
03863 }
03864 
03865 static int compare_sig(const struct wined3d_shader_signature_element *sig1, const struct wined3d_shader_signature_element *sig2)
03866 {
03867     unsigned int i;
03868     int ret;
03869 
03870     for(i = 0; i < MAX_REG_INPUT; i++)
03871     {
03872         if (!sig1[i].semantic_name || !sig2[i].semantic_name)
03873         {
03874             /* Compare pointers, not contents. One string is NULL(element does not exist), the other one is not NULL */
03875             if(sig1[i].semantic_name != sig2[i].semantic_name) return sig1[i].semantic_name < sig2[i].semantic_name ? -1 : 1;
03876             continue;
03877         }
03878 
03879         if ((ret = strcmp(sig1[i].semantic_name, sig2[i].semantic_name))) return ret;
03880         if(sig1[i].semantic_idx    != sig2[i].semantic_idx)    return sig1[i].semantic_idx    < sig2[i].semantic_idx    ? -1 : 1;
03881         if(sig1[i].sysval_semantic != sig2[i].sysval_semantic) return sig1[i].sysval_semantic < sig2[i].sysval_semantic ? -1 : 1;
03882         if(sig1[i].component_type  != sig2[i].component_type)  return sig1[i].component_type  < sig2[i].component_type  ? -1 : 1;
03883         if(sig1[i].register_idx    != sig2[i].register_idx)    return sig1[i].register_idx    < sig2[i].register_idx    ? -1 : 1;
03884         if(sig1[i].mask            != sig2[i].mask)            return sig1[i].mask            < sig2[i].mask            ? -1 : 1;
03885     }
03886     return 0;
03887 }
03888 
03889 static struct wined3d_shader_signature_element *clone_sig(const struct wined3d_shader_signature_element *sig)
03890 {
03891     struct wined3d_shader_signature_element *new;
03892     int i;
03893     char *name;
03894 
03895     new = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*new) * MAX_REG_INPUT);
03896     for(i = 0; i < MAX_REG_INPUT; i++)
03897     {
03898         if (!sig[i].semantic_name) continue;
03899 
03900         new[i] = sig[i];
03901         /* Clone the semantic string */
03902         name = HeapAlloc(GetProcessHeap(), 0, strlen(sig[i].semantic_name) + 1);
03903         strcpy(name, sig[i].semantic_name);
03904         new[i].semantic_name = name;
03905     }
03906     return new;
03907 }
03908 
03909 static DWORD find_input_signature(struct shader_arb_priv *priv, const struct wined3d_shader_signature_element *sig)
03910 {
03911     struct wine_rb_entry *entry = wine_rb_get(&priv->signature_tree, sig);
03912     struct ps_signature *found_sig;
03913 
03914     if (entry)
03915     {
03916         found_sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry);
03917         TRACE("Found existing signature %u\n", found_sig->idx);
03918         return found_sig->idx;
03919     }
03920     found_sig = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*sig));
03921     found_sig->sig = clone_sig(sig);
03922     found_sig->idx = priv->ps_sig_number++;
03923     TRACE("New signature stored and assigned number %u\n", found_sig->idx);
03924     if(wine_rb_put(&priv->signature_tree, sig, &found_sig->entry) == -1)
03925     {
03926         ERR("Failed to insert program entry.\n");
03927     }
03928     return found_sig->idx;
03929 }
03930 
03931 static void init_output_registers(const struct wined3d_shader *shader, DWORD sig_num,
03932         struct shader_arb_ctx_priv *priv_ctx, struct arb_vs_compiled_shader *compiled)
03933 {
03934     unsigned int i, j;
03935     static const char * const texcoords[8] =
03936     {
03937         "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]",
03938         "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]"
03939     };
03940     struct wined3d_device *device = shader->device;
03941     const struct wined3d_shader_signature_element *sig;
03942     const char *semantic_name;
03943     DWORD semantic_idx, reg_idx;
03944 
03945     /* Write generic input varyings 0 to 7 to result.texcoord[], varying 8 to result.color.primary
03946      * and varying 9 to result.color.secondary
03947      */
03948     static const char * const decl_idx_to_string[MAX_REG_INPUT] =
03949     {
03950         "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]",
03951         "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]",
03952         "result.color.primary", "result.color.secondary"
03953     };
03954 
03955     if(sig_num == ~0)
03956     {
03957         TRACE("Pixel shader uses builtin varyings\n");
03958         /* Map builtins to builtins */
03959         for(i = 0; i < 8; i++)
03960         {
03961             priv_ctx->texcrd_output[i] = texcoords[i];
03962         }
03963         priv_ctx->color_output[0] = "result.color.primary";
03964         priv_ctx->color_output[1] = "result.color.secondary";
03965         priv_ctx->fog_output = "result.fogcoord";
03966 
03967         /* Map declared regs to builtins. Use "TA" to /dev/null unread output */
03968         for (i = 0; i < (sizeof(shader->output_signature) / sizeof(*shader->output_signature)); ++i)
03969         {
03970             semantic_name = shader->output_signature[i].semantic_name;
03971             if (!semantic_name) continue;
03972 
03973             if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION))
03974             {
03975                 TRACE("o%u is TMP_OUT\n", i);
03976                 if (!shader->output_signature[i].semantic_idx) priv_ctx->vs_output[i] = "TMP_OUT";
03977                 else priv_ctx->vs_output[i] = "TA";
03978             }
03979             else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE))
03980             {
03981                 TRACE("o%u is result.pointsize\n", i);
03982                 if (!shader->output_signature[i].semantic_idx) priv_ctx->vs_output[i] = "result.pointsize";
03983                 else priv_ctx->vs_output[i] = "TA";
03984             }
03985             else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR))
03986             {
03987                 TRACE("o%u is result.color.?, idx %u\n", i, shader->output_signature[i].semantic_idx);
03988                 if (!shader->output_signature[i].semantic_idx)
03989                     priv_ctx->vs_output[i] = "result.color.primary";
03990                 else if (shader->output_signature[i].semantic_idx == 1)
03991                     priv_ctx->vs_output[i] = "result.color.secondary";
03992                 else priv_ctx->vs_output[i] = "TA";
03993             }
03994             else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD))
03995             {
03996                 TRACE("o%u is %s\n", i, texcoords[shader->output_signature[i].semantic_idx]);
03997                 if (shader->output_signature[i].semantic_idx >= 8) priv_ctx->vs_output[i] = "TA";
03998                 else priv_ctx->vs_output[i] = texcoords[shader->output_signature[i].semantic_idx];
03999             }
04000             else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG))
04001             {
04002                 TRACE("o%u is result.fogcoord\n", i);
04003                 if (shader->output_signature[i].semantic_idx > 0) priv_ctx->vs_output[i] = "TA";
04004                 else priv_ctx->vs_output[i] = "result.fogcoord";
04005             }
04006             else
04007             {
04008                 priv_ctx->vs_output[i] = "TA";
04009             }
04010         }
04011         return;
04012     }
04013 
04014     /* Instead of searching for the signature in the signature list, read the one from the current pixel shader.
04015      * Its maybe not the shader where the signature came from, but it is the same signature and faster to find
04016      */
04017     sig = device->stateBlock->state.pixel_shader->input_signature;
04018     TRACE("Pixel shader uses declared varyings\n");
04019 
04020     /* Map builtin to declared. /dev/null the results by default to the TA temp reg */
04021     for(i = 0; i < 8; i++)
04022     {
04023         priv_ctx->texcrd_output[i] = "TA";
04024     }
04025     priv_ctx->color_output[0] = "TA";
04026     priv_ctx->color_output[1] = "TA";
04027     priv_ctx->fog_output = "TA";
04028 
04029     for(i = 0; i < MAX_REG_INPUT; i++)
04030     {
04031         semantic_name = sig[i].semantic_name;
04032         semantic_idx = sig[i].semantic_idx;
04033         reg_idx = sig[i].register_idx;
04034         if (!semantic_name) continue;
04035 
04036         /* If a declared input register is not written by builtin arguments, don't write to it.
04037          * GL_NV_vertex_program makes sure the input defaults to 0.0, which is correct with D3D
04038          *
04039          * Don't care about POSITION and PSIZE here - this is a builtin vertex shader, position goes
04040          * to TMP_OUT in any case
04041          */
04042         if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD))
04043         {
04044             if(semantic_idx < 8) priv_ctx->texcrd_output[semantic_idx] = decl_idx_to_string[reg_idx];
04045         }
04046         else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR))
04047         {
04048             if(semantic_idx < 2) priv_ctx->color_output[semantic_idx] = decl_idx_to_string[reg_idx];
04049         }
04050         else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG))
04051         {
04052             if (!semantic_idx) priv_ctx->fog_output = decl_idx_to_string[reg_idx];
04053         }
04054         else
04055         {
04056             continue;
04057         }
04058 
04059         if (!strcmp(decl_idx_to_string[reg_idx], "result.color.primary")
04060                 || !strcmp(decl_idx_to_string[reg_idx], "result.color.secondary"))
04061         {
04062             compiled->need_color_unclamp = TRUE;
04063         }
04064     }
04065 
04066     /* Map declared to declared */
04067     for (i = 0; i < (sizeof(shader->output_signature) / sizeof(*shader->output_signature)); ++i)
04068     {
04069         /* Write unread output to TA to throw them away */
04070         priv_ctx->vs_output[i] = "TA";
04071         semantic_name = shader->output_signature[i].semantic_name;
04072         if (!semantic_name) continue;
04073 
04074         if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION)
04075                 && !shader->output_signature[i].semantic_idx)
04076         {
04077             priv_ctx->vs_output[i] = "TMP_OUT";
04078             continue;
04079         }
04080         else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE)
04081                 && !shader->output_signature[i].semantic_idx)
04082         {
04083             priv_ctx->vs_output[i] = "result.pointsize";
04084             continue;
04085         }
04086 
04087         for(j = 0; j < MAX_REG_INPUT; j++)
04088         {
04089             if (!sig[j].semantic_name) continue;
04090 
04091             if (!strcmp(sig[j].semantic_name, semantic_name)
04092                     && sig[j].semantic_idx == shader->output_signature[i].semantic_idx)
04093             {
04094                 priv_ctx->vs_output[i] = decl_idx_to_string[sig[j].register_idx];
04095 
04096                 if (!strcmp(priv_ctx->vs_output[i], "result.color.primary")
04097                         || !strcmp(priv_ctx->vs_output[i], "result.color.secondary"))
04098                 {
04099                     compiled->need_color_unclamp = TRUE;
04100                 }
04101             }
04102         }
04103     }
04104 }
04105 
04106 /* GL locking is done by the caller */
04107 static GLuint shader_arb_generate_vshader(const struct wined3d_shader *shader,
04108         const struct wined3d_gl_info *gl_info, struct wined3d_shader_buffer *buffer,
04109         const struct arb_vs_compile_args *args, struct arb_vs_compiled_shader *compiled)
04110 {
04111     const struct arb_vshader_private *shader_data = shader->backend_data;
04112     const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
04113     const struct wined3d_shader_lconst *lconst;
04114     const DWORD *function = shader->function;
04115     GLuint ret;
04116     DWORD next_local, *lconst_map = local_const_mapping(shader);
04117     struct shader_arb_ctx_priv priv_ctx;
04118     unsigned int i;
04119     GLint errPos;
04120 
04121     memset(&priv_ctx, 0, sizeof(priv_ctx));
04122     priv_ctx.cur_vs_args = args;
04123     list_init(&priv_ctx.control_frames);
04124     init_output_registers(shader, args->ps_signature, &priv_ctx, compiled);
04125 
04126     /*  Create the hw ARB shader */
04127     shader_addline(buffer, "!!ARBvp1.0\n");
04128 
04129     /* Always enable the NV extension if available. Unlike fragment shaders, there is no
04130      * mesurable performance penalty, and we can always make use of it for clipplanes.
04131      */
04132     if (gl_info->supported[NV_VERTEX_PROGRAM3])
04133     {
04134         shader_addline(buffer, "OPTION NV_vertex_program3;\n");
04135         priv_ctx.target_version = NV3;
04136         shader_addline(buffer, "ADDRESS aL;\n");
04137     }
04138     else if (gl_info->supported[NV_VERTEX_PROGRAM2_OPTION])
04139     {
04140         shader_addline(buffer, "OPTION NV_vertex_program2;\n");
04141         priv_ctx.target_version = NV2;
04142         shader_addline(buffer, "ADDRESS aL;\n");
04143     } else {
04144         priv_ctx.target_version = ARB;
04145     }
04146 
04147     shader_addline(buffer, "TEMP TMP_OUT;\n");
04148     if (need_helper_const(shader_data, reg_maps, gl_info))
04149     {
04150         shader_addline(buffer, "PARAM helper_const = { 0.0, 1.0, 2.0, %1.10f};\n", eps);
04151     }
04152     if (need_rel_addr_const(shader_data, reg_maps, gl_info))
04153     {
04154         shader_addline(buffer, "PARAM rel_addr_const = { 0.5, %d.0, 0.0, 0.0 };\n", shader_data->rel_offset);
04155         shader_addline(buffer, "TEMP A0_SHADOW;\n");
04156     }
04157 
04158     shader_addline(buffer, "TEMP TA;\n");
04159     shader_addline(buffer, "TEMP TB;\n");
04160 
04161     /* Base Declarations */
04162     next_local = shader_generate_arb_declarations(shader, reg_maps, buffer,
04163             gl_info, lconst_map, &priv_ctx.vs_clipplanes, &priv_ctx);
04164 
04165     for(i = 0; i < MAX_CONST_I; i++)
04166     {
04167         compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED;
04168         if(reg_maps->integer_constants & (1 << i) && priv_ctx.target_version >= NV2)
04169         {
04170             const DWORD *control_values = find_loop_control_values(shader, i);
04171 
04172             if(control_values)
04173             {
04174                 shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i,
04175                                 control_values[0], control_values[1], control_values[2]);
04176             }
04177             else
04178             {
04179                 compiled->int_consts[i] = next_local;
04180                 compiled->num_int_consts++;
04181                 shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++);
04182             }
04183         }
04184     }
04185 
04186     /* We need a constant to fixup the final position */
04187     shader_addline(buffer, "PARAM posFixup = program.local[%u];\n", next_local);
04188     compiled->pos_fixup = next_local++;
04189 
04190     /* Initialize output parameters. GL_ARB_vertex_program does not require special initialization values
04191      * for output parameters. D3D in theory does not do that either, but some applications depend on a
04192      * proper initialization of the secondary color, and programs using the fixed function pipeline without
04193      * a replacement shader depend on the texcoord.w being set properly.
04194      *
04195      * GL_NV_vertex_program defines that all output values are initialized to {0.0, 0.0, 0.0, 1.0}. This
04196      * assertion is in effect even when using GL_ARB_vertex_program without any NV specific additions. So
04197      * skip this if NV_vertex_program is supported. Otherwise, initialize the secondary color. For the tex-
04198      * coords, we have a flag in the opengl caps. Many cards do not require the texcoord being set, and
04199      * this can eat a number of instructions, so skip it unless this cap is set as well
04200      */
04201     if (!gl_info->supported[NV_VERTEX_PROGRAM])
04202     {
04203         struct wined3d_device *device = shader->device;
04204         const char *color_init = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_0001);
04205         shader_addline(buffer, "MOV result.color.secondary, %s;\n", color_init);
04206 
04207         if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W && !device->frag_pipe->ffp_proj_control)
04208         {
04209             int i;
04210             const char *one = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ONE);
04211             for(i = 0; i < min(8, MAX_REG_TEXCRD); i++)
04212             {
04213                 if (reg_maps->texcoord_mask[i] && reg_maps->texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL)
04214                     shader_addline(buffer, "MOV result.texcoord[%u].w, %s\n", i, one);
04215             }
04216         }
04217     }
04218 
04219     /* The shader starts with the main function */
04220     priv_ctx.in_main_func = TRUE;
04221     /* Base Shader Body */
04222     shader_generate_main(shader, buffer, reg_maps, function, &priv_ctx);
04223 
04224     if (!priv_ctx.footer_written) vshader_add_footer(&priv_ctx,
04225             shader_data, args, reg_maps, gl_info, buffer);
04226 
04227     shader_addline(buffer, "END\n");
04228 
04229     /* TODO: change to resource.glObjectHandle or something like that */
04230     GL_EXTCALL(glGenProgramsARB(1, &ret));
04231 
04232     TRACE("Creating a hw vertex shader, prg=%d\n", ret);
04233     GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, ret));
04234 
04235     TRACE("Created hw vertex shader, prg=%d\n", ret);
04236     /* Create the program and check for errors */
04237     GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
04238                buffer->bsize, buffer->buffer));
04239     checkGLcall("glProgramStringARB()");
04240 
04241     glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos);
04242     if (errPos != -1)
04243     {
04244         FIXME("HW VertexShader Error at position %d: %s\n\n",
04245               errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
04246         shader_arb_dump_program_source(buffer->buffer);
04247         ret = -1;
04248     }
04249     else
04250     {
04251         GLint native;
04252 
04253         GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
04254         checkGLcall("glGetProgramivARB()");
04255         if (!native) WARN("Program exceeds native resource limits.\n");
04256 
04257         /* Load immediate constants */
04258         if (lconst_map)
04259         {
04260             LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
04261             {
04262                 const float *value = (const float *)lconst->value;
04263                 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, lconst_map[lconst->idx], value));
04264             }
04265         }
04266     }
04267     HeapFree(GetProcessHeap(), 0, lconst_map);
04268 
04269     return ret;
04270 }
04271 
04272 /* GL locking is done by the caller */
04273 static struct arb_ps_compiled_shader *find_arb_pshader(struct wined3d_shader *shader,
04274         const struct arb_ps_compile_args *args)
04275 {
04276     struct wined3d_device *device = shader->device;
04277     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
04278     UINT i;
04279     DWORD new_size;
04280     struct arb_ps_compiled_shader *new_array;
04281     struct wined3d_shader_buffer buffer;
04282     struct arb_pshader_private *shader_data;
04283     GLuint ret;
04284 
04285     if (!shader->backend_data)
04286     {
04287         struct shader_arb_priv *priv = device->shader_priv;
04288 
04289         shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
04290         shader_data = shader->backend_data;
04291         shader_data->clamp_consts = shader->reg_maps.shader_version.major == 1;
04292 
04293         if (shader->reg_maps.shader_version.major < 3)
04294             shader_data->input_signature_idx = ~0;
04295         else
04296             shader_data->input_signature_idx = find_input_signature(priv, shader->input_signature);
04297 
04298         TRACE("Shader got assigned input signature index %u\n", shader_data->input_signature_idx);
04299 
04300         if (!device->vs_clipping)
04301             shader_data->clipplane_emulation = shader_find_free_input_register(&shader->reg_maps,
04302                     gl_info->limits.texture_stages - 1);
04303         else
04304             shader_data->clipplane_emulation = ~0U;
04305     }
04306     shader_data = shader->backend_data;
04307 
04308     /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
04309      * so a linear search is more performant than a hashmap or a binary search
04310      * (cache coherency etc)
04311      */
04312     for (i = 0; i < shader_data->num_gl_shaders; ++i)
04313     {
04314         if (!memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args)))
04315             return &shader_data->gl_shaders[i];
04316     }
04317 
04318     TRACE("No matching GL shader found, compiling a new shader\n");
04319     if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
04320         if (shader_data->num_gl_shaders)
04321         {
04322             new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
04323             new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders,
04324                                     new_size * sizeof(*shader_data->gl_shaders));
04325         } else {
04326             new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders));
04327             new_size = 1;
04328         }
04329 
04330         if(!new_array) {
04331             ERR("Out of memory\n");
04332             return 0;
04333         }
04334         shader_data->gl_shaders = new_array;
04335         shader_data->shader_array_size = new_size;
04336     }
04337 
04338     shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
04339 
04340     pixelshader_update_samplers(&shader->reg_maps, device->stateBlock->state.textures);
04341 
04342     if (!shader_buffer_init(&buffer))
04343     {
04344         ERR("Failed to initialize shader buffer.\n");
04345         return 0;
04346     }
04347 
04348     ret = shader_arb_generate_pshader(shader, gl_info, &buffer, args,
04349             &shader_data->gl_shaders[shader_data->num_gl_shaders]);
04350     shader_buffer_free(&buffer);
04351     shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret;
04352 
04353     return &shader_data->gl_shaders[shader_data->num_gl_shaders++];
04354 }
04355 
04356 static inline BOOL vs_args_equal(const struct arb_vs_compile_args *stored, const struct arb_vs_compile_args *new,
04357                                  const DWORD use_map, BOOL skip_int) {
04358     if((stored->super.swizzle_map & use_map) != new->super.swizzle_map) return FALSE;
04359     if(stored->super.clip_enabled != new->super.clip_enabled) return FALSE;
04360     if(stored->super.fog_src != new->super.fog_src) return FALSE;
04361     if(stored->clip.boolclip_compare != new->clip.boolclip_compare) return FALSE;
04362     if(stored->ps_signature != new->ps_signature) return FALSE;
04363     if(stored->vertex.samplers_compare != new->vertex.samplers_compare) return FALSE;
04364     if(skip_int) return TRUE;
04365 
04366     return !memcmp(stored->loop_ctrl, new->loop_ctrl, sizeof(stored->loop_ctrl));
04367 }
04368 
04369 static struct arb_vs_compiled_shader *find_arb_vshader(struct wined3d_shader *shader,
04370         const struct arb_vs_compile_args *args)
04371 {
04372     struct wined3d_device *device = shader->device;
04373     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
04374     DWORD use_map = device->strided_streams.use_map;
04375     UINT i;
04376     DWORD new_size;
04377     struct arb_vs_compiled_shader *new_array;
04378     struct wined3d_shader_buffer buffer;
04379     struct arb_vshader_private *shader_data;
04380     GLuint ret;
04381 
04382     if (!shader->backend_data)
04383     {
04384         const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
04385 
04386         shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
04387         shader_data = shader->backend_data;
04388 
04389         if ((gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT)
04390                 && reg_maps->min_rel_offset <= reg_maps->max_rel_offset)
04391         {
04392             if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 127)
04393             {
04394                 FIXME("The difference between the minimum and maximum relative offset is > 127.\n");
04395                 FIXME("Which this OpenGL implementation does not support. Try using GLSL.\n");
04396                 FIXME("Min: %u, Max: %u.\n", reg_maps->min_rel_offset, reg_maps->max_rel_offset);
04397             }
04398             else if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 63)
04399                 shader_data->rel_offset = reg_maps->min_rel_offset + 63;
04400             else if (reg_maps->max_rel_offset > 63)
04401                 shader_data->rel_offset = reg_maps->min_rel_offset;
04402         }
04403     }
04404     shader_data = shader->backend_data;
04405 
04406     /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
04407      * so a linear search is more performant than a hashmap or a binary search
04408      * (cache coherency etc)
04409      */
04410     for(i = 0; i < shader_data->num_gl_shaders; i++) {
04411         if (vs_args_equal(&shader_data->gl_shaders[i].args, args,
04412                 use_map, gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]))
04413         {
04414             return &shader_data->gl_shaders[i];
04415         }
04416     }
04417 
04418     TRACE("No matching GL shader found, compiling a new shader\n");
04419 
04420     if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
04421         if (shader_data->num_gl_shaders)
04422         {
04423             new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
04424             new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders,
04425                                     new_size * sizeof(*shader_data->gl_shaders));
04426         } else {
04427             new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders));
04428             new_size = 1;
04429         }
04430 
04431         if(!new_array) {
04432             ERR("Out of memory\n");
04433             return 0;
04434         }
04435         shader_data->gl_shaders = new_array;
04436         shader_data->shader_array_size = new_size;
04437     }
04438 
04439     shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
04440 
04441     if (!shader_buffer_init(&buffer))
04442     {
04443         ERR("Failed to initialize shader buffer.\n");
04444         return 0;
04445     }
04446 
04447     ret = shader_arb_generate_vshader(shader, gl_info, &buffer, args,
04448             &shader_data->gl_shaders[shader_data->num_gl_shaders]);
04449     shader_buffer_free(&buffer);
04450     shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret;
04451 
04452     return &shader_data->gl_shaders[shader_data->num_gl_shaders++];
04453 }
04454 
04455 static void find_arb_ps_compile_args(const struct wined3d_state *state,
04456         const struct wined3d_shader *shader, struct arb_ps_compile_args *args)
04457 {
04458     struct wined3d_device *device = shader->device;
04459     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
04460     int i;
04461     WORD int_skip;
04462 
04463     find_ps_compile_args(state, shader, &args->super);
04464 
04465     /* This forces all local boolean constants to 1 to make them stateblock independent */
04466     args->bools = shader->reg_maps.local_bool_consts;
04467 
04468     for(i = 0; i < MAX_CONST_B; i++)
04469     {
04470         if (state->ps_consts_b[i])
04471             args->bools |= ( 1 << i);
04472     }
04473 
04474     /* Only enable the clip plane emulation KIL if at least one clipplane is enabled. The KIL instruction
04475      * is quite expensive because it forces the driver to disable early Z discards. It is cheaper to
04476      * duplicate the shader than have a no-op KIL instruction in every shader
04477      */
04478     if (!device->vs_clipping && use_vs(state)
04479             && state->render_states[WINED3D_RS_CLIPPING]
04480             && state->render_states[WINED3D_RS_CLIPPLANEENABLE])
04481         args->clip = 1;
04482     else
04483         args->clip = 0;
04484 
04485     /* Skip if unused or local, or supported natively */
04486     int_skip = ~shader->reg_maps.integer_constants | shader->reg_maps.local_int_consts;
04487     if (int_skip == 0xffff || gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION])
04488     {
04489         memset(args->loop_ctrl, 0, sizeof(args->loop_ctrl));
04490         return;
04491     }
04492 
04493     for(i = 0; i < MAX_CONST_I; i++)
04494     {
04495         if(int_skip & (1 << i))
04496         {
04497             args->loop_ctrl[i][0] = 0;
04498             args->loop_ctrl[i][1] = 0;
04499             args->loop_ctrl[i][2] = 0;
04500         }
04501         else
04502         {
04503             args->loop_ctrl[i][0] = state->ps_consts_i[i * 4];
04504             args->loop_ctrl[i][1] = state->ps_consts_i[i * 4 + 1];
04505             args->loop_ctrl[i][2] = state->ps_consts_i[i * 4 + 2];
04506         }
04507     }
04508 }
04509 
04510 static void find_arb_vs_compile_args(const struct wined3d_state *state,
04511         const struct wined3d_shader *shader, struct arb_vs_compile_args *args)
04512 {
04513     struct wined3d_device *device = shader->device;
04514     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
04515     int i;
04516     WORD int_skip;
04517 
04518     find_vs_compile_args(state, shader, &args->super);
04519 
04520     args->clip.boolclip_compare = 0;
04521     if (use_ps(state))
04522     {
04523         const struct wined3d_shader *ps = state->pixel_shader;
04524         const struct arb_pshader_private *shader_priv = ps->backend_data;
04525         args->ps_signature = shader_priv->input_signature_idx;
04526 
04527         args->clip.boolclip.clip_texcoord = shader_priv->clipplane_emulation + 1;
04528     }
04529     else
04530     {
04531         args->ps_signature = ~0;
04532         if (!device->vs_clipping && device->adapter->fragment_pipe == &arbfp_fragment_pipeline)
04533         {
04534             args->clip.boolclip.clip_texcoord = ffp_clip_emul(state) ? gl_info->limits.texture_stages : 0;
04535         }
04536         /* Otherwise: Setting boolclip_compare set clip_texcoord to 0 */
04537     }
04538 
04539     if (args->clip.boolclip.clip_texcoord)
04540     {
04541         if (state->render_states[WINED3D_RS_CLIPPING])
04542             args->clip.boolclip.clipplane_mask = (unsigned char)state->render_states[WINED3D_RS_CLIPPLANEENABLE];
04543         /* clipplane_mask was set to 0 by setting boolclip_compare to 0 */
04544     }
04545 
04546     /* This forces all local boolean constants to 1 to make them stateblock independent */
04547     args->clip.boolclip.bools = shader->reg_maps.local_bool_consts;
04548     /* TODO: Figure out if it would be better to store bool constants as bitmasks in the stateblock */
04549     for(i = 0; i < MAX_CONST_B; i++)
04550     {
04551         if (state->vs_consts_b[i])
04552             args->clip.boolclip.bools |= ( 1 << i);
04553     }
04554 
04555     args->vertex.samplers[0] = device->texUnitMap[MAX_FRAGMENT_SAMPLERS + 0];
04556     args->vertex.samplers[1] = device->texUnitMap[MAX_FRAGMENT_SAMPLERS + 1];
04557     args->vertex.samplers[2] = device->texUnitMap[MAX_FRAGMENT_SAMPLERS + 2];
04558     args->vertex.samplers[3] = 0;
04559 
04560     /* Skip if unused or local */
04561     int_skip = ~shader->reg_maps.integer_constants | shader->reg_maps.local_int_consts;
04562     /* This is about flow control, not clipping. */
04563     if (int_skip == 0xffff || gl_info->supported[NV_VERTEX_PROGRAM2_OPTION])
04564     {
04565         memset(args->loop_ctrl, 0, sizeof(args->loop_ctrl));
04566         return;
04567     }
04568 
04569     for(i = 0; i < MAX_CONST_I; i++)
04570     {
04571         if(int_skip & (1 << i))
04572         {
04573             args->loop_ctrl[i][0] = 0;
04574             args->loop_ctrl[i][1] = 0;
04575             args->loop_ctrl[i][2] = 0;
04576         }
04577         else
04578         {
04579             args->loop_ctrl[i][0] = state->vs_consts_i[i * 4];
04580             args->loop_ctrl[i][1] = state->vs_consts_i[i * 4 + 1];
04581             args->loop_ctrl[i][2] = state->vs_consts_i[i * 4 + 2];
04582         }
04583     }
04584 }
04585 
04586 /* GL locking is done by the caller */
04587 static void shader_arb_select(const struct wined3d_context *context, BOOL usePS, BOOL useVS)
04588 {
04589     struct wined3d_device *device = context->swapchain->device;
04590     struct shader_arb_priv *priv = device->shader_priv;
04591     const struct wined3d_gl_info *gl_info = context->gl_info;
04592     const struct wined3d_state *state = &device->stateBlock->state;
04593     int i;
04594 
04595     /* Deal with pixel shaders first so the vertex shader arg function has the input signature ready */
04596     if (usePS)
04597     {
04598         struct wined3d_shader *ps = state->pixel_shader;
04599         struct arb_ps_compile_args compile_args;
04600         struct arb_ps_compiled_shader *compiled;
04601 
04602         TRACE("Using pixel shader %p.\n", ps);
04603         find_arb_ps_compile_args(state, ps, &compile_args);
04604         compiled = find_arb_pshader(ps, &compile_args);
04605         priv->current_fprogram_id = compiled->prgId;
04606         priv->compiled_fprog = compiled;
04607 
04608         /* Bind the fragment program */
04609         GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id));
04610         checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id);");
04611 
04612         if(!priv->use_arbfp_fixed_func) {
04613             /* Enable OpenGL fragment programs */
04614             glEnable(GL_FRAGMENT_PROGRAM_ARB);
04615             checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB);");
04616         }
04617         TRACE("(%p) : Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB\n",
04618                 device, priv->current_fprogram_id);
04619 
04620         /* Pixel Shader 1.x constants are clamped to [-1;1], Pixel Shader 2.0 constants are not. If switching between
04621          * a 1.x and newer shader, reload the first 8 constants
04622          */
04623         if (priv->last_ps_const_clamped != ((struct arb_pshader_private *)ps->backend_data)->clamp_consts)
04624         {
04625             priv->last_ps_const_clamped = ((struct arb_pshader_private *)ps->backend_data)->clamp_consts;
04626             priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, 8);
04627             for(i = 0; i < 8; i++)
04628             {
04629                 priv->pshader_const_dirty[i] = 1;
04630             }
04631             /* Also takes care of loading local constants */
04632             shader_arb_load_constants(context, TRUE, FALSE);
04633         }
04634         else
04635         {
04636             UINT rt_height = state->fb->render_targets[0]->resource.height;
04637             shader_arb_ps_local_constants(compiled, context, state, rt_height);
04638         }
04639 
04640         /* Force constant reloading for the NP2 fixup (see comment in shader_glsl_select for more info) */
04641         if (compiled->np2fixup_info.super.active)
04642             shader_arb_load_np2fixup_constants(priv, gl_info, state);
04643     }
04644     else if (gl_info->supported[ARB_FRAGMENT_PROGRAM] && !priv->use_arbfp_fixed_func)
04645     {
04646         /* Disable only if we're not using arbfp fixed function fragment processing. If this is used,
04647         * keep GL_FRAGMENT_PROGRAM_ARB enabled, and the fixed function pipeline will bind the fixed function
04648         * replacement shader
04649         */
04650         glDisable(GL_FRAGMENT_PROGRAM_ARB);
04651         checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
04652         priv->current_fprogram_id = 0;
04653     }
04654 
04655     if (useVS)
04656     {
04657         struct wined3d_shader *vs = state->vertex_shader;
04658         struct arb_vs_compile_args compile_args;
04659         struct arb_vs_compiled_shader *compiled;
04660 
04661         TRACE("Using vertex shader %p\n", vs);
04662         find_arb_vs_compile_args(state, vs, &compile_args);
04663         compiled = find_arb_vshader(vs, &compile_args);
04664         priv->current_vprogram_id = compiled->prgId;
04665         priv->compiled_vprog = compiled;
04666 
04667         /* Bind the vertex program */
04668         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id));
04669         checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id);");
04670 
04671         /* Enable OpenGL vertex programs */
04672         glEnable(GL_VERTEX_PROGRAM_ARB);
04673         checkGLcall("glEnable(GL_VERTEX_PROGRAM_ARB);");
04674         TRACE("(%p) : Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB\n", device, priv->current_vprogram_id);
04675         shader_arb_vs_local_constants(compiled, context, state);
04676 
04677         if(priv->last_vs_color_unclamp != compiled->need_color_unclamp) {
04678             priv->last_vs_color_unclamp = compiled->need_color_unclamp;
04679 
04680             if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT])
04681             {
04682                 GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, !compiled->need_color_unclamp));
04683                 checkGLcall("glClampColorARB");
04684             } else {
04685                 FIXME("vertex color clamp needs to be changed, but extension not supported.\n");
04686             }
04687         }
04688     }
04689     else if (gl_info->supported[ARB_VERTEX_PROGRAM])
04690     {
04691         priv->current_vprogram_id = 0;
04692         glDisable(GL_VERTEX_PROGRAM_ARB);
04693         checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)");
04694     }
04695 }
04696 
04697 /* GL locking is done by the caller */
04698 static void shader_arb_select_depth_blt(void *shader_priv, const struct wined3d_gl_info *gl_info,
04699         enum tex_types tex_type, const SIZE *ds_mask_size)
04700 {
04701     const float mask[] = {0.0f, 0.0f, (float)ds_mask_size->cx, (float)ds_mask_size->cy};
04702     BOOL masked = ds_mask_size->cx && ds_mask_size->cy;
04703     struct shader_arb_priv *priv = shader_priv;
04704     GLuint *blt_fprogram;
04705 
04706     if (!priv->depth_blt_vprogram_id) priv->depth_blt_vprogram_id = create_arb_blt_vertex_program(gl_info);
04707     GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->depth_blt_vprogram_id));
04708     glEnable(GL_VERTEX_PROGRAM_ARB);
04709 
04710     blt_fprogram = masked ? &priv->depth_blt_fprogram_id_masked[tex_type] : &priv->depth_blt_fprogram_id_full[tex_type];
04711     if (!*blt_fprogram) *blt_fprogram = create_arb_blt_fragment_program(gl_info, tex_type, masked);
04712     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, *blt_fprogram));
04713     if (masked) GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0, mask));
04714     glEnable(GL_FRAGMENT_PROGRAM_ARB);
04715 }
04716 
04717 /* GL locking is done by the caller */
04718 static void shader_arb_deselect_depth_blt(void *shader_priv, const struct wined3d_gl_info *gl_info)
04719 {
04720     struct shader_arb_priv *priv = shader_priv;
04721 
04722     if (priv->current_vprogram_id) {
04723         GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id));
04724         checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vertexShader->prgId);");
04725 
04726         TRACE("Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB.\n", priv->current_vprogram_id);
04727     }
04728     else
04729     {
04730         glDisable(GL_VERTEX_PROGRAM_ARB);
04731         checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)");
04732     }
04733 
04734     if (priv->current_fprogram_id) {
04735         GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id));
04736         checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, pixelShader->prgId);");
04737 
04738         TRACE("Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB.\n", priv->current_fprogram_id);
04739     }
04740     else if(!priv->use_arbfp_fixed_func)
04741     {
04742         glDisable(GL_FRAGMENT_PROGRAM_ARB);
04743         checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
04744     }
04745 }
04746 
04747 static void shader_arb_destroy(struct wined3d_shader *shader)
04748 {
04749     struct wined3d_device *device = shader->device;
04750     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
04751 
04752     if (shader_is_pshader_version(shader->reg_maps.shader_version.type))
04753     {
04754         struct arb_pshader_private *shader_data = shader->backend_data;
04755         UINT i;
04756 
04757         if(!shader_data) return; /* This can happen if a shader was never compiled */
04758 
04759         if (shader_data->num_gl_shaders)
04760         {
04761             struct wined3d_context *context = context_acquire(device, NULL);
04762 
04763             ENTER_GL();
04764             for (i = 0; i < shader_data->num_gl_shaders; ++i)
04765             {
04766                 GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId));
04767                 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))");
04768             }
04769             LEAVE_GL();
04770 
04771             context_release(context);
04772         }
04773 
04774         HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
04775         HeapFree(GetProcessHeap(), 0, shader_data);
04776         shader->backend_data = NULL;
04777     }
04778     else
04779     {
04780         struct arb_vshader_private *shader_data = shader->backend_data;
04781         UINT i;
04782 
04783         if(!shader_data) return; /* This can happen if a shader was never compiled */
04784 
04785         if (shader_data->num_gl_shaders)
04786         {
04787             struct wined3d_context *context = context_acquire(device, NULL);
04788 
04789             ENTER_GL();
04790             for (i = 0; i < shader_data->num_gl_shaders; ++i)
04791             {
04792                 GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId));
04793                 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))");
04794             }
04795             LEAVE_GL();
04796 
04797             context_release(context);
04798         }
04799 
04800         HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
04801         HeapFree(GetProcessHeap(), 0, shader_data);
04802         shader->backend_data = NULL;
04803     }
04804 }
04805 
04806 static int sig_tree_compare(const void *key, const struct wine_rb_entry *entry)
04807 {
04808     struct ps_signature *e = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry);
04809     return compare_sig(key, e->sig);
04810 }
04811 
04812 static const struct wine_rb_functions sig_tree_functions =
04813 {
04814     wined3d_rb_alloc,
04815     wined3d_rb_realloc,
04816     wined3d_rb_free,
04817     sig_tree_compare
04818 };
04819 
04820 static HRESULT shader_arb_alloc(struct wined3d_device *device)
04821 {
04822     struct shader_arb_priv *priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*priv));
04823 
04824     priv->vshader_const_dirty = HeapAlloc(GetProcessHeap(), 0,
04825             sizeof(*priv->vshader_const_dirty) * device->d3d_vshader_constantF);
04826     if (!priv->vshader_const_dirty)
04827         goto fail;
04828     memset(priv->vshader_const_dirty, 1,
04829            sizeof(*priv->vshader_const_dirty) * device->d3d_vshader_constantF);
04830 
04831     priv->pshader_const_dirty = HeapAlloc(GetProcessHeap(), 0,
04832             sizeof(*priv->pshader_const_dirty) * device->d3d_pshader_constantF);
04833     if (!priv->pshader_const_dirty)
04834         goto fail;
04835     memset(priv->pshader_const_dirty, 1,
04836             sizeof(*priv->pshader_const_dirty) * device->d3d_pshader_constantF);
04837 
04838     if(wine_rb_init(&priv->signature_tree, &sig_tree_functions) == -1)
04839     {
04840         ERR("RB tree init failed\n");
04841         goto fail;
04842     }
04843     device->shader_priv = priv;
04844     return WINED3D_OK;
04845 
04846 fail:
04847     HeapFree(GetProcessHeap(), 0, priv->pshader_const_dirty);
04848     HeapFree(GetProcessHeap(), 0, priv->vshader_const_dirty);
04849     HeapFree(GetProcessHeap(), 0, priv);
04850     return E_OUTOFMEMORY;
04851 }
04852 
04853 static void release_signature(struct wine_rb_entry *entry, void *context)
04854 {
04855     struct ps_signature *sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry);
04856     int i;
04857     for(i = 0; i < MAX_REG_INPUT; i++)
04858     {
04859         HeapFree(GetProcessHeap(), 0, (char *) sig->sig[i].semantic_name);
04860     }
04861     HeapFree(GetProcessHeap(), 0, sig->sig);
04862     HeapFree(GetProcessHeap(), 0, sig);
04863 }
04864 
04865 /* Context activation is done by the caller. */
04866 static void shader_arb_free(struct wined3d_device *device)
04867 {
04868     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
04869     struct shader_arb_priv *priv = device->shader_priv;
04870     int i;
04871 
04872     ENTER_GL();
04873     if(priv->depth_blt_vprogram_id) {
04874         GL_EXTCALL(glDeleteProgramsARB(1, &priv->depth_blt_vprogram_id));
04875     }
04876     for (i = 0; i < tex_type_count; ++i)
04877     {
04878         if (priv->depth_blt_fprogram_id_full[i])
04879         {
04880             GL_EXTCALL(glDeleteProgramsARB(1, &priv->depth_blt_fprogram_id_full[i]));
04881         }
04882         if (priv->depth_blt_fprogram_id_masked[i])
04883         {
04884             GL_EXTCALL(glDeleteProgramsARB(1, &priv->depth_blt_fprogram_id_masked[i]));
04885         }
04886     }
04887     LEAVE_GL();
04888 
04889     wine_rb_destroy(&priv->signature_tree, release_signature, NULL);
04890     HeapFree(GetProcessHeap(), 0, priv->pshader_const_dirty);
04891     HeapFree(GetProcessHeap(), 0, priv->vshader_const_dirty);
04892     HeapFree(GetProcessHeap(), 0, device->shader_priv);
04893 }
04894 
04895 static void shader_arb_context_destroyed(void *shader_priv, const struct wined3d_context *context)
04896 {
04897     struct shader_arb_priv *priv = shader_priv;
04898 
04899     if (priv->last_context == context)
04900         priv->last_context = NULL;
04901 }
04902 
04903 static void shader_arb_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *caps)
04904 {
04905     if (gl_info->supported[ARB_VERTEX_PROGRAM])
04906     {
04907         DWORD vs_consts;
04908 
04909         /* 96 is the minimum allowed value of MAX_PROGRAM_ENV_PARAMETERS_ARB
04910          * for vertex programs. If the native limit is less than that it's
04911          * not very useful, and e.g. Mesa swrast returns 0, probably to
04912          * indicate it's a software implementation. */
04913         if (gl_info->limits.arb_vs_native_constants < 96)
04914             vs_consts = gl_info->limits.arb_vs_float_constants;
04915         else
04916             vs_consts = min(gl_info->limits.arb_vs_float_constants, gl_info->limits.arb_vs_native_constants);
04917 
04918         if (gl_info->supported[NV_VERTEX_PROGRAM3])
04919         {
04920             caps->VertexShaderVersion = 3;
04921             TRACE_(d3d_caps)("Hardware vertex shader version 3.0 enabled (NV_VERTEX_PROGRAM3)\n");
04922         }
04923         else if (vs_consts >= 256)
04924         {
04925             /* Shader Model 2.0 requires at least 256 vertex shader constants */
04926             caps->VertexShaderVersion = 2;
04927             TRACE_(d3d_caps)("Hardware vertex shader version 2.0 enabled (ARB_PROGRAM)\n");
04928         }
04929         else
04930         {
04931             caps->VertexShaderVersion = 1;
04932             TRACE_(d3d_caps)("Hardware vertex shader version 1.1 enabled (ARB_PROGRAM)\n");
04933         }
04934         caps->MaxVertexShaderConst = vs_consts;
04935     }
04936     else
04937     {
04938         caps->VertexShaderVersion = 0;
04939         caps->MaxVertexShaderConst = 0;
04940     }
04941 
04942     if (gl_info->supported[ARB_FRAGMENT_PROGRAM])
04943     {
04944         DWORD ps_consts;
04945 
04946         /* Similar as above for vertex programs, but the minimum for fragment
04947          * programs is 24. */
04948         if (gl_info->limits.arb_ps_native_constants < 24)
04949             ps_consts = gl_info->limits.arb_ps_float_constants;
04950         else
04951             ps_consts = min(gl_info->limits.arb_ps_float_constants, gl_info->limits.arb_ps_native_constants);
04952 
04953         if (gl_info->supported[NV_FRAGMENT_PROGRAM2])
04954         {
04955             caps->PixelShaderVersion = 3;
04956             TRACE_(d3d_caps)("Hardware pixel shader version 3.0 enabled (NV_FRAGMENT_PROGRAM2)\n");
04957         }
04958         else if (ps_consts >= 32)
04959         {
04960             /* Shader Model 2.0 requires at least 32 pixel shader constants */
04961             caps->PixelShaderVersion = 2;
04962             TRACE_(d3d_caps)("Hardware pixel shader version 2.0 enabled (ARB_PROGRAM)\n");
04963         }
04964         else
04965         {
04966             caps->PixelShaderVersion = 1;
04967             TRACE_(d3d_caps)("Hardware pixel shader version 1.4 enabled (ARB_PROGRAM)\n");
04968         }
04969         caps->PixelShader1xMaxValue = 8.0f;
04970         caps->MaxPixelShaderConst = ps_consts;
04971     }
04972     else
04973     {
04974         caps->PixelShaderVersion = 0;
04975         caps->PixelShader1xMaxValue = 0.0f;
04976         caps->MaxPixelShaderConst = 0;
04977     }
04978 
04979     caps->VSClipping = use_nv_clip(gl_info);
04980 }
04981 
04982 static BOOL shader_arb_color_fixup_supported(struct color_fixup_desc fixup)
04983 {
04984     if (TRACE_ON(d3d_shader) && TRACE_ON(d3d))
04985     {
04986         TRACE("Checking support for color_fixup:\n");
04987         dump_color_fixup_desc(fixup);
04988     }
04989 
04990     /* We support everything except complex conversions. */
04991     if (!is_complex_fixup(fixup))
04992     {
04993         TRACE("[OK]\n");
04994         return TRUE;
04995     }
04996 
04997     TRACE("[FAILED]\n");
04998     return FALSE;
04999 }
05000 
05001 static void shader_arb_add_instruction_modifiers(const struct wined3d_shader_instruction *ins) {
05002     DWORD shift;
05003     char write_mask[20], regstr[50];
05004     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
05005     BOOL is_color = FALSE;
05006     const struct wined3d_shader_dst_param *dst;
05007 
05008     if (!ins->dst_count) return;
05009 
05010     dst = &ins->dst[0];
05011     shift = dst->shift;
05012     if (!shift) return; /* Saturate alone is handled by the instructions */
05013 
05014     shader_arb_get_write_mask(ins, dst, write_mask);
05015     shader_arb_get_register_name(ins, &dst->reg, regstr, &is_color);
05016 
05017     /* Generate a line that does the output modifier computation
05018      * FIXME: _SAT vs shift? _SAT alone is already handled in the instructions, if this
05019      * maps problems in e.g. _d4_sat modify shader_arb_get_modifier
05020      */
05021     shader_addline(buffer, "MUL%s %s%s, %s, %s;\n", shader_arb_get_modifier(ins),
05022                    regstr, write_mask, regstr, shift_tab[shift]);
05023 }
05024 
05025 static const SHADER_HANDLER shader_arb_instruction_handler_table[WINED3DSIH_TABLE_SIZE] =
05026 {
05027     /* WINED3DSIH_ABS           */ shader_hw_map2gl,
05028     /* WINED3DSIH_ADD           */ shader_hw_map2gl,
05029     /* WINED3DSIH_AND           */ NULL,
05030     /* WINED3DSIH_BEM           */ pshader_hw_bem,
05031     /* WINED3DSIH_BREAK         */ shader_hw_break,
05032     /* WINED3DSIH_BREAKC        */ shader_hw_breakc,
05033     /* WINED3DSIH_BREAKP        */ NULL,
05034     /* WINED3DSIH_CALL          */ shader_hw_call,
05035     /* WINED3DSIH_CALLNZ        */ NULL,
05036     /* WINED3DSIH_CMP           */ pshader_hw_cmp,
05037     /* WINED3DSIH_CND           */ pshader_hw_cnd,
05038     /* WINED3DSIH_CRS           */ shader_hw_map2gl,
05039     /* WINED3DSIH_CUT           */ NULL,
05040     /* WINED3DSIH_DCL           */ NULL,
05041     /* WINED3DSIH_DEF           */ NULL,
05042     /* WINED3DSIH_DEFB          */ NULL,
05043     /* WINED3DSIH_DEFI          */ NULL,
05044     /* WINED3DSIH_DIV           */ NULL,
05045     /* WINED3DSIH_DP2ADD        */ pshader_hw_dp2add,
05046     /* WINED3DSIH_DP3           */ shader_hw_map2gl,
05047     /* WINED3DSIH_DP4           */ shader_hw_map2gl,
05048     /* WINED3DSIH_DST           */ shader_hw_map2gl,
05049     /* WINED3DSIH_DSX           */ shader_hw_map2gl,
05050     /* WINED3DSIH_DSY           */ shader_hw_dsy,
05051     /* WINED3DSIH_ELSE          */ shader_hw_else,
05052     /* WINED3DSIH_EMIT          */ NULL,
05053     /* WINED3DSIH_ENDIF         */ shader_hw_endif,
05054     /* WINED3DSIH_ENDLOOP       */ shader_hw_endloop,
05055     /* WINED3DSIH_ENDREP        */ shader_hw_endrep,
05056     /* WINED3DSIH_EQ            */ NULL,
05057     /* WINED3DSIH_EXP           */ shader_hw_scalar_op,
05058     /* WINED3DSIH_EXPP          */ shader_hw_scalar_op,
05059     /* WINED3DSIH_FRC           */ shader_hw_map2gl,
05060     /* WINED3DSIH_FTOI          */ NULL,
05061     /* WINED3DSIH_GE            */ NULL,
05062     /* WINED3DSIH_IADD          */ NULL,
05063     /* WINED3DSIH_IEQ           */ NULL,
05064     /* WINED3DSIH_IF            */ NULL /* Hardcoded into the shader */,
05065     /* WINED3DSIH_IFC           */ shader_hw_ifc,
05066     /* WINED3DSIH_IGE           */ NULL,
05067     /* WINED3DSIH_IMUL          */ NULL,
05068     /* WINED3DSIH_ITOF          */ NULL,
05069     /* WINED3DSIH_LABEL         */ shader_hw_label,
05070     /* WINED3DSIH_LD            */ NULL,
05071     /* WINED3DSIH_LIT           */ shader_hw_map2gl,
05072     /* WINED3DSIH_LOG           */ shader_hw_log,
05073     /* WINED3DSIH_LOGP          */ shader_hw_log,
05074     /* WINED3DSIH_LOOP          */ shader_hw_loop,
05075     /* WINED3DSIH_LRP           */ shader_hw_lrp,
05076     /* WINED3DSIH_LT            */ NULL,
05077     /* WINED3DSIH_M3x2          */ shader_hw_mnxn,
05078     /* WINED3DSIH_M3x3          */ shader_hw_mnxn,
05079     /* WINED3DSIH_M3x4          */ shader_hw_mnxn,
05080     /* WINED3DSIH_M4x3          */ shader_hw_mnxn,
05081     /* WINED3DSIH_M4x4          */ shader_hw_mnxn,
05082     /* WINED3DSIH_MAD           */ shader_hw_map2gl,
05083     /* WINED3DSIH_MAX           */ shader_hw_map2gl,
05084     /* WINED3DSIH_MIN           */ shader_hw_map2gl,
05085     /* WINED3DSIH_MOV           */ shader_hw_mov,
05086     /* WINED3DSIH_MOVA          */ shader_hw_mov,
05087     /* WINED3DSIH_MOVC          */ NULL,
05088     /* WINED3DSIH_MUL           */ shader_hw_map2gl,
05089     /* WINED3DSIH_NOP           */ shader_hw_nop,
05090     /* WINED3DSIH_NRM           */ shader_hw_nrm,
05091     /* WINED3DSIH_PHASE         */ NULL,
05092     /* WINED3DSIH_POW           */ shader_hw_pow,
05093     /* WINED3DSIH_RCP           */ shader_hw_rcp,
05094     /* WINED3DSIH_REP           */ shader_hw_rep,
05095     /* WINED3DSIH_RET           */ shader_hw_ret,
05096     /* WINED3DSIH_ROUND_NI      */ NULL,
05097     /* WINED3DSIH_RSQ           */ shader_hw_scalar_op,
05098     /* WINED3DSIH_SAMPLE        */ NULL,
05099     /* WINED3DSIH_SAMPLE_GRAD   */ NULL,
05100     /* WINED3DSIH_SAMPLE_LOD    */ NULL,
05101     /* WINED3DSIH_SETP          */ NULL,
05102     /* WINED3DSIH_SGE           */ shader_hw_map2gl,
05103     /* WINED3DSIH_SGN           */ shader_hw_sgn,
05104     /* WINED3DSIH_SINCOS        */ shader_hw_sincos,
05105     /* WINED3DSIH_SLT           */ shader_hw_map2gl,
05106     /* WINED3DSIH_SQRT          */ NULL,
05107     /* WINED3DSIH_SUB           */ shader_hw_map2gl,
05108     /* WINED3DSIH_TEX           */ pshader_hw_tex,
05109     /* WINED3DSIH_TEXBEM        */ pshader_hw_texbem,
05110     /* WINED3DSIH_TEXBEML       */ pshader_hw_texbem,
05111     /* WINED3DSIH_TEXCOORD      */ pshader_hw_texcoord,
05112     /* WINED3DSIH_TEXDEPTH      */ pshader_hw_texdepth,
05113     /* WINED3DSIH_TEXDP3        */ pshader_hw_texdp3,
05114     /* WINED3DSIH_TEXDP3TEX     */ pshader_hw_texdp3tex,
05115     /* WINED3DSIH_TEXKILL       */ pshader_hw_texkill,
05116     /* WINED3DSIH_TEXLDD        */ shader_hw_texldd,
05117     /* WINED3DSIH_TEXLDL        */ shader_hw_texldl,
05118     /* WINED3DSIH_TEXM3x2DEPTH  */ pshader_hw_texm3x2depth,
05119     /* WINED3DSIH_TEXM3x2PAD    */ pshader_hw_texm3x2pad,
05120     /* WINED3DSIH_TEXM3x2TEX    */ pshader_hw_texm3x2tex,
05121     /* WINED3DSIH_TEXM3x3       */ pshader_hw_texm3x3,
05122     /* WINED3DSIH_TEXM3x3DIFF   */ NULL,
05123     /* WINED3DSIH_TEXM3x3PAD    */ pshader_hw_texm3x3pad,
05124     /* WINED3DSIH_TEXM3x3SPEC   */ pshader_hw_texm3x3spec,
05125     /* WINED3DSIH_TEXM3x3TEX    */ pshader_hw_texm3x3tex,
05126     /* WINED3DSIH_TEXM3x3VSPEC  */ pshader_hw_texm3x3vspec,
05127     /* WINED3DSIH_TEXREG2AR     */ pshader_hw_texreg2ar,
05128     /* WINED3DSIH_TEXREG2GB     */ pshader_hw_texreg2gb,
05129     /* WINED3DSIH_TEXREG2RGB    */ pshader_hw_texreg2rgb,
05130     /* WINED3DSIH_UDIV          */ NULL,
05131     /* WINED3DSIH_USHR          */ NULL,
05132     /* WINED3DSIH_UTOF          */ NULL,
05133     /* WINED3DSIH_XOR           */ NULL,
05134 };
05135 
05136 static BOOL get_bool_const(const struct wined3d_shader_instruction *ins,
05137         const struct wined3d_shader *shader, DWORD idx)
05138 {
05139     const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
05140     BOOL vshader = shader_is_vshader_version(reg_maps->shader_version.type);
05141     const struct wined3d_shader_lconst *constant;
05142     WORD bools = 0;
05143     WORD flag = (1 << idx);
05144     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
05145 
05146     if (reg_maps->local_bool_consts & flag)
05147     {
05148         /* What good is a if(bool) with a hardcoded local constant? I don't know, but handle it */
05149         LIST_FOR_EACH_ENTRY(constant, &shader->constantsB, struct wined3d_shader_lconst, entry)
05150         {
05151             if (constant->idx == idx)
05152             {
05153                 return constant->value[0];
05154             }
05155         }
05156         ERR("Local constant not found\n");
05157         return FALSE;
05158     }
05159     else
05160     {
05161         if(vshader) bools = priv->cur_vs_args->clip.boolclip.bools;
05162         else bools = priv->cur_ps_args->bools;
05163         return bools & flag;
05164     }
05165 }
05166 
05167 static void get_loop_control_const(const struct wined3d_shader_instruction *ins,
05168         const struct wined3d_shader *shader, UINT idx, struct wined3d_shader_loop_control *loop_control)
05169 {
05170     const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
05171     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
05172 
05173     /* Integer constants can either be a local constant, or they can be stored in the shader
05174      * type specific compile args. */
05175     if (reg_maps->local_int_consts & (1 << idx))
05176     {
05177         const struct wined3d_shader_lconst *constant;
05178 
05179         LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry)
05180         {
05181             if (constant->idx == idx)
05182             {
05183                 loop_control->count = constant->value[0];
05184                 loop_control->start = constant->value[1];
05185                 /* Step is signed. */
05186                 loop_control->step = (int)constant->value[2];
05187                 return;
05188             }
05189         }
05190         /* If this happens the flag was set incorrectly */
05191         ERR("Local constant not found\n");
05192         loop_control->count = 0;
05193         loop_control->start = 0;
05194         loop_control->step = 0;
05195         return;
05196     }
05197 
05198     switch (reg_maps->shader_version.type)
05199     {
05200         case WINED3D_SHADER_TYPE_VERTEX:
05201             /* Count and aL start value are unsigned */
05202             loop_control->count = priv->cur_vs_args->loop_ctrl[idx][0];
05203             loop_control->start = priv->cur_vs_args->loop_ctrl[idx][1];
05204             /* Step is signed. */
05205             loop_control->step = ((char)priv->cur_vs_args->loop_ctrl[idx][2]);
05206             break;
05207 
05208         case WINED3D_SHADER_TYPE_PIXEL:
05209             loop_control->count = priv->cur_ps_args->loop_ctrl[idx][0];
05210             loop_control->start = priv->cur_ps_args->loop_ctrl[idx][1];
05211             loop_control->step = ((char)priv->cur_ps_args->loop_ctrl[idx][2]);
05212             break;
05213 
05214         default:
05215             FIXME("Unhandled shader type %#x.\n", reg_maps->shader_version.type);
05216             break;
05217     }
05218 }
05219 
05220 static void record_instruction(struct list *list, const struct wined3d_shader_instruction *ins)
05221 {
05222     unsigned int i;
05223     struct wined3d_shader_dst_param *dst_param = NULL;
05224     struct wined3d_shader_src_param *src_param = NULL, *rel_addr = NULL;
05225     struct recorded_instruction *rec = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*rec));
05226     if(!rec)
05227     {
05228         ERR("Out of memory\n");
05229         return;
05230     }
05231 
05232     rec->ins = *ins;
05233     dst_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*dst_param));
05234     if(!dst_param) goto free;
05235     *dst_param = *ins->dst;
05236     if(ins->dst->reg.rel_addr)
05237     {
05238         rel_addr = HeapAlloc(GetProcessHeap(), 0, sizeof(*dst_param->reg.rel_addr));
05239         if(!rel_addr) goto free;
05240         *rel_addr = *ins->dst->reg.rel_addr;
05241         dst_param->reg.rel_addr = rel_addr;
05242     }
05243     rec->ins.dst = dst_param;
05244 
05245     src_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*src_param) * ins->src_count);
05246     if(!src_param) goto free;
05247     for(i = 0; i < ins->src_count; i++)
05248     {
05249         src_param[i] = ins->src[i];
05250         if(ins->src[i].reg.rel_addr)
05251         {
05252             rel_addr = HeapAlloc(GetProcessHeap(), 0, sizeof(*rel_addr));
05253             if(!rel_addr) goto free;
05254             *rel_addr = *ins->src[i].reg.rel_addr;
05255             src_param[i].reg.rel_addr = rel_addr;
05256         }
05257     }
05258     rec->ins.src = src_param;
05259     list_add_tail(list, &rec->entry);
05260     return;
05261 
05262 free:
05263     ERR("Out of memory\n");
05264     if(dst_param)
05265     {
05266         HeapFree(GetProcessHeap(), 0, (void *) dst_param->reg.rel_addr);
05267         HeapFree(GetProcessHeap(), 0, dst_param);
05268     }
05269     if(src_param)
05270     {
05271         for(i = 0; i < ins->src_count; i++)
05272         {
05273             HeapFree(GetProcessHeap(), 0, (void *) src_param[i].reg.rel_addr);
05274         }
05275         HeapFree(GetProcessHeap(), 0, src_param);
05276     }
05277     HeapFree(GetProcessHeap(), 0, rec);
05278 }
05279 
05280 static void free_recorded_instruction(struct list *list)
05281 {
05282     struct recorded_instruction *rec_ins, *entry2;
05283     unsigned int i;
05284 
05285     LIST_FOR_EACH_ENTRY_SAFE(rec_ins, entry2, list, struct recorded_instruction, entry)
05286     {
05287         list_remove(&rec_ins->entry);
05288         if(rec_ins->ins.dst)
05289         {
05290             HeapFree(GetProcessHeap(), 0, (void *) rec_ins->ins.dst->reg.rel_addr);
05291             HeapFree(GetProcessHeap(), 0, (void *) rec_ins->ins.dst);
05292         }
05293         if(rec_ins->ins.src)
05294         {
05295             for(i = 0; i < rec_ins->ins.src_count; i++)
05296             {
05297                 HeapFree(GetProcessHeap(), 0, (void *) rec_ins->ins.src[i].reg.rel_addr);
05298             }
05299             HeapFree(GetProcessHeap(), 0, (void *) rec_ins->ins.src);
05300         }
05301         HeapFree(GetProcessHeap(), 0, rec_ins);
05302     }
05303 }
05304 
05305 static void shader_arb_handle_instruction(const struct wined3d_shader_instruction *ins) {
05306     SHADER_HANDLER hw_fct;
05307     struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
05308     const struct wined3d_shader *shader = ins->ctx->shader;
05309     struct control_frame *control_frame;
05310     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
05311     BOOL bool_const;
05312 
05313     if(ins->handler_idx == WINED3DSIH_LOOP || ins->handler_idx == WINED3DSIH_REP)
05314     {
05315         control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame));
05316         list_add_head(&priv->control_frames, &control_frame->entry);
05317 
05318         if(ins->handler_idx == WINED3DSIH_LOOP) control_frame->type = LOOP;
05319         if(ins->handler_idx == WINED3DSIH_REP) control_frame->type = REP;
05320 
05321         if(priv->target_version >= NV2)
05322         {
05323             control_frame->no.loop = priv->num_loops++;
05324             priv->loop_depth++;
05325         }
05326         else
05327         {
05328             /* Don't bother recording when we're in a not used if branch */
05329             if(priv->muted)
05330             {
05331                 return;
05332             }
05333 
05334             if(!priv->recording)
05335             {
05336                 list_init(&priv->record);
05337                 priv->recording = TRUE;
05338                 control_frame->outer_loop = TRUE;
05339                 get_loop_control_const(ins, shader, ins->src[0].reg.idx, &control_frame->loop_control);
05340                 return; /* Instruction is handled */
05341             }
05342             /* Record this loop in the outer loop's recording */
05343         }
05344     }
05345     else if(ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP)
05346     {
05347         if(priv->target_version >= NV2)
05348         {
05349             /* Nothing to do. The control frame is popped after the HW instr handler */
05350         }
05351         else
05352         {
05353             struct list *e = list_head(&priv->control_frames);
05354             control_frame = LIST_ENTRY(e, struct control_frame, entry);
05355             list_remove(&control_frame->entry);
05356 
05357             if(control_frame->outer_loop)
05358             {
05359                 unsigned int iteration;
05360                 int aL = 0;
05361                 struct list copy;
05362 
05363                 /* Turn off recording before playback */
05364                 priv->recording = FALSE;
05365 
05366                 /* Move the recorded instructions to a separate list and get them out of the private data
05367                  * structure. If there are nested loops, the shader_arb_handle_instruction below will
05368                  * be recorded again, thus priv->record might be overwritten
05369                  */
05370                 list_init(&copy);
05371                 list_move_tail(&copy, &priv->record);
05372                 list_init(&priv->record);
05373 
05374                 if(ins->handler_idx == WINED3DSIH_ENDLOOP)
05375                 {
05376                     shader_addline(buffer, "#unrolling loop: %u iterations, aL=%u, inc %d\n",
05377                                    control_frame->loop_control.count, control_frame->loop_control.start,
05378                                    control_frame->loop_control.step);
05379                     aL = control_frame->loop_control.start;
05380                 }
05381                 else
05382                 {
05383                     shader_addline(buffer, "#unrolling rep: %u iterations\n", control_frame->loop_control.count);
05384                 }
05385 
05386                 for (iteration = 0; iteration < control_frame->loop_control.count; ++iteration)
05387                 {
05388                     struct recorded_instruction *rec_ins;
05389                     if(ins->handler_idx == WINED3DSIH_ENDLOOP)
05390                     {
05391                         priv->aL = aL;
05392                         shader_addline(buffer, "#Iteration %u, aL=%d\n", iteration, aL);
05393                     }
05394                     else
05395                     {
05396                         shader_addline(buffer, "#Iteration %u\n", iteration);
05397                     }
05398 
05399                     LIST_FOR_EACH_ENTRY(rec_ins, &copy, struct recorded_instruction, entry)
05400                     {
05401                         shader_arb_handle_instruction(&rec_ins->ins);
05402                     }
05403 
05404                     if(ins->handler_idx == WINED3DSIH_ENDLOOP)
05405                     {
05406                         aL += control_frame->loop_control.step;
05407                     }
05408                 }
05409                 shader_addline(buffer, "#end loop/rep\n");
05410 
05411                 free_recorded_instruction(&copy);
05412                 HeapFree(GetProcessHeap(), 0, control_frame);
05413                 return; /* Instruction is handled */
05414             }
05415             else
05416             {
05417                 /* This is a nested loop. Proceed to the normal recording function */
05418                 HeapFree(GetProcessHeap(), 0, control_frame);
05419             }
05420         }
05421     }
05422 
05423     if(priv->recording)
05424     {
05425         record_instruction(&priv->record, ins);
05426         return;
05427     }
05428 
05429     /* boolean if */
05430     if(ins->handler_idx == WINED3DSIH_IF)
05431     {
05432         control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame));
05433         list_add_head(&priv->control_frames, &control_frame->entry);
05434         control_frame->type = IF;
05435 
05436         bool_const = get_bool_const(ins, shader, ins->src[0].reg.idx);
05437         if(ins->src[0].modifiers == WINED3DSPSM_NOT) bool_const = !bool_const;
05438         if (!priv->muted && !bool_const)
05439         {
05440             shader_addline(buffer, "#if(FALSE){\n");
05441             priv->muted = TRUE;
05442             control_frame->muting = TRUE;
05443         }
05444         else shader_addline(buffer, "#if(TRUE) {\n");
05445 
05446         return; /* Instruction is handled */
05447     }
05448     else if(ins->handler_idx == WINED3DSIH_IFC)
05449     {
05450         /* IF(bool) and if_cond(a, b) use the same ELSE and ENDIF tokens */
05451         control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame));
05452         control_frame->type = IFC;
05453         control_frame->no.ifc = priv->num_ifcs++;
05454         list_add_head(&priv->control_frames, &control_frame->entry);
05455     }
05456     else if(ins->handler_idx == WINED3DSIH_ELSE)
05457     {
05458         struct list *e = list_head(&priv->control_frames);
05459         control_frame = LIST_ENTRY(e, struct control_frame, entry);
05460 
05461         if(control_frame->type == IF)
05462         {
05463             shader_addline(buffer, "#} else {\n");
05464             if(!priv->muted && !control_frame->muting)
05465             {
05466                 priv->muted = TRUE;
05467                 control_frame->muting = TRUE;
05468             }
05469             else if(control_frame->muting) priv->muted = FALSE;
05470             return; /* Instruction is handled. */
05471         }
05472         /* In case of an ifc, generate a HW shader instruction */
05473     }
05474     else if(ins->handler_idx == WINED3DSIH_ENDIF)
05475     {
05476         struct list *e = list_head(&priv->control_frames);
05477         control_frame = LIST_ENTRY(e, struct control_frame, entry);
05478 
05479         if(control_frame->type == IF)
05480         {
05481             shader_addline(buffer, "#} endif\n");
05482             if(control_frame->muting) priv->muted = FALSE;
05483             list_remove(&control_frame->entry);
05484             HeapFree(GetProcessHeap(), 0, control_frame);
05485             return; /* Instruction is handled */
05486         }
05487     }
05488 
05489     if(priv->muted) return;
05490 
05491     /* Select handler */
05492     hw_fct = shader_arb_instruction_handler_table[ins->handler_idx];
05493 
05494     /* Unhandled opcode */
05495     if (!hw_fct)
05496     {
05497         FIXME("Backend can't handle opcode %#x\n", ins->handler_idx);
05498         return;
05499     }
05500     hw_fct(ins);
05501 
05502     if(ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP)
05503     {
05504         struct list *e = list_head(&priv->control_frames);
05505         control_frame = LIST_ENTRY(e, struct control_frame, entry);
05506         list_remove(&control_frame->entry);
05507         HeapFree(GetProcessHeap(), 0, control_frame);
05508         priv->loop_depth--;
05509     }
05510     else if(ins->handler_idx == WINED3DSIH_ENDIF)
05511     {
05512         /* Non-ifc ENDIFs don't reach that place because of the return in the if block above */
05513         struct list *e = list_head(&priv->control_frames);
05514         control_frame = LIST_ENTRY(e, struct control_frame, entry);
05515         list_remove(&control_frame->entry);
05516         HeapFree(GetProcessHeap(), 0, control_frame);
05517     }
05518 
05519 
05520     shader_arb_add_instruction_modifiers(ins);
05521 }
05522 
05523 const struct wined3d_shader_backend_ops arb_program_shader_backend =
05524 {
05525     shader_arb_handle_instruction,
05526     shader_arb_select,
05527     shader_arb_select_depth_blt,
05528     shader_arb_deselect_depth_blt,
05529     shader_arb_update_float_vertex_constants,
05530     shader_arb_update_float_pixel_constants,
05531     shader_arb_load_constants,
05532     shader_arb_load_np2fixup_constants,
05533     shader_arb_destroy,
05534     shader_arb_alloc,
05535     shader_arb_free,
05536     shader_arb_context_destroyed,
05537     shader_arb_get_caps,
05538     shader_arb_color_fixup_supported,
05539 };
05540 
05541 /* ARB_fragment_program fixed function pipeline replacement definitions */
05542 #define ARB_FFP_CONST_TFACTOR           0
05543 #define ARB_FFP_CONST_SPECULAR_ENABLE   ((ARB_FFP_CONST_TFACTOR) + 1)
05544 #define ARB_FFP_CONST_CONSTANT(i)       ((ARB_FFP_CONST_SPECULAR_ENABLE) + 1 + i)
05545 #define ARB_FFP_CONST_BUMPMAT(i)        ((ARB_FFP_CONST_CONSTANT(7)) + 1 + i)
05546 #define ARB_FFP_CONST_LUMINANCE(i)      ((ARB_FFP_CONST_BUMPMAT(7)) + 1 + i)
05547 
05548 struct arbfp_ffp_desc
05549 {
05550     struct ffp_frag_desc parent;
05551     GLuint shader;
05552     unsigned int num_textures_used;
05553 };
05554 
05555 /* Context activation and GL locking are done by the caller. */
05556 static void arbfp_enable(BOOL enable)
05557 {
05558     if(enable) {
05559         glEnable(GL_FRAGMENT_PROGRAM_ARB);
05560         checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)");
05561     } else {
05562         glDisable(GL_FRAGMENT_PROGRAM_ARB);
05563         checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
05564     }
05565 }
05566 
05567 static HRESULT arbfp_alloc(struct wined3d_device *device)
05568 {
05569     struct shader_arb_priv *priv;
05570     /* Share private data between the shader backend and the pipeline replacement, if both
05571      * are the arb implementation. This is needed to figure out whether ARBfp should be disabled
05572      * if no pixel shader is bound or not
05573      */
05574     if (device->shader_backend == &arb_program_shader_backend)
05575     {
05576         device->fragment_priv = device->shader_priv;
05577     }
05578     else
05579     {
05580         device->fragment_priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_arb_priv));
05581         if (!device->fragment_priv) return E_OUTOFMEMORY;
05582     }
05583     priv = device->fragment_priv;
05584     if (wine_rb_init(&priv->fragment_shaders, &wined3d_ffp_frag_program_rb_functions) == -1)
05585     {
05586         ERR("Failed to initialize rbtree.\n");
05587         HeapFree(GetProcessHeap(), 0, device->fragment_priv);
05588         return E_OUTOFMEMORY;
05589     }
05590     priv->use_arbfp_fixed_func = TRUE;
05591     return WINED3D_OK;
05592 }
05593 
05594 /* Context activation is done by the caller. */
05595 static void arbfp_free_ffpshader(struct wine_rb_entry *entry, void *context)
05596 {
05597     const struct wined3d_gl_info *gl_info = context;
05598     struct arbfp_ffp_desc *entry_arb = WINE_RB_ENTRY_VALUE(entry, struct arbfp_ffp_desc, parent.entry);
05599 
05600     ENTER_GL();
05601     GL_EXTCALL(glDeleteProgramsARB(1, &entry_arb->shader));
05602     checkGLcall("glDeleteProgramsARB(1, &entry_arb->shader)");
05603     HeapFree(GetProcessHeap(), 0, entry_arb);
05604     LEAVE_GL();
05605 }
05606 
05607 /* Context activation is done by the caller. */
05608 static void arbfp_free(struct wined3d_device *device)
05609 {
05610     struct shader_arb_priv *priv = device->fragment_priv;
05611 
05612     wine_rb_destroy(&priv->fragment_shaders, arbfp_free_ffpshader, &device->adapter->gl_info);
05613     priv->use_arbfp_fixed_func = FALSE;
05614 
05615     if (device->shader_backend != &arb_program_shader_backend)
05616     {
05617         HeapFree(GetProcessHeap(), 0, device->fragment_priv);
05618     }
05619 }
05620 
05621 static void arbfp_get_caps(const struct wined3d_gl_info *gl_info, struct fragment_caps *caps)
05622 {
05623     caps->PrimitiveMiscCaps = WINED3DPMISCCAPS_TSSARGTEMP;
05624     caps->TextureOpCaps =  WINED3DTEXOPCAPS_DISABLE                     |
05625                            WINED3DTEXOPCAPS_SELECTARG1                  |
05626                            WINED3DTEXOPCAPS_SELECTARG2                  |
05627                            WINED3DTEXOPCAPS_MODULATE4X                  |
05628                            WINED3DTEXOPCAPS_MODULATE2X                  |
05629                            WINED3DTEXOPCAPS_MODULATE                    |
05630                            WINED3DTEXOPCAPS_ADDSIGNED2X                 |
05631                            WINED3DTEXOPCAPS_ADDSIGNED                   |
05632                            WINED3DTEXOPCAPS_ADD                         |
05633                            WINED3DTEXOPCAPS_SUBTRACT                    |
05634                            WINED3DTEXOPCAPS_ADDSMOOTH                   |
05635                            WINED3DTEXOPCAPS_BLENDCURRENTALPHA           |
05636                            WINED3DTEXOPCAPS_BLENDFACTORALPHA            |
05637                            WINED3DTEXOPCAPS_BLENDTEXTUREALPHA           |
05638                            WINED3DTEXOPCAPS_BLENDDIFFUSEALPHA           |
05639                            WINED3DTEXOPCAPS_BLENDTEXTUREALPHAPM         |
05640                            WINED3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR      |
05641                            WINED3DTEXOPCAPS_MODULATECOLOR_ADDALPHA      |
05642                            WINED3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA   |
05643                            WINED3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR   |
05644                            WINED3DTEXOPCAPS_DOTPRODUCT3                 |
05645                            WINED3DTEXOPCAPS_MULTIPLYADD                 |
05646                            WINED3DTEXOPCAPS_LERP                        |
05647                            WINED3DTEXOPCAPS_BUMPENVMAP                  |
05648                            WINED3DTEXOPCAPS_BUMPENVMAPLUMINANCE;
05649 
05650     /* TODO: Implement WINED3DTEXOPCAPS_PREMODULATE */
05651 
05652     caps->MaxTextureBlendStages   = 8;
05653     caps->MaxSimultaneousTextures = min(gl_info->limits.fragment_samplers, 8);
05654 }
05655 
05656 static void state_texfactor_arbfp(struct wined3d_context *context,
05657         const struct wined3d_state *state, DWORD state_id)
05658 {
05659     struct wined3d_device *device = context->swapchain->device;
05660     const struct wined3d_gl_info *gl_info = context->gl_info;
05661     float col[4];
05662 
05663     /* Don't load the parameter if we're using an arbfp pixel shader,
05664      * otherwise we'll overwrite application provided constants. */
05665     if (device->shader_backend == &arb_program_shader_backend)
05666     {
05667         struct shader_arb_priv *priv;
05668 
05669         if (use_ps(state)) return;
05670 
05671         priv = device->shader_priv;
05672         priv->pshader_const_dirty[ARB_FFP_CONST_TFACTOR] = 1;
05673         priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_TFACTOR + 1);
05674     }
05675 
05676     D3DCOLORTOGLFLOAT4(state->render_states[WINED3D_RS_TEXTUREFACTOR], col);
05677     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, col));
05678     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, col)");
05679 }
05680 
05681 static void state_arb_specularenable(struct wined3d_context *context,
05682         const struct wined3d_state *state, DWORD state_id)
05683 {
05684     struct wined3d_device *device = context->swapchain->device;
05685     const struct wined3d_gl_info *gl_info = context->gl_info;
05686     float col[4];
05687 
05688     /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite
05689      * application provided constants
05690      */
05691     if (device->shader_backend == &arb_program_shader_backend)
05692     {
05693         struct shader_arb_priv *priv;
05694 
05695         if (use_ps(state)) return;
05696 
05697         priv = device->shader_priv;
05698         priv->pshader_const_dirty[ARB_FFP_CONST_SPECULAR_ENABLE] = 1;
05699         priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_SPECULAR_ENABLE + 1);
05700     }
05701 
05702     if (state->render_states[WINED3D_RS_SPECULARENABLE])
05703     {
05704         /* The specular color has no alpha */
05705         col[0] = 1.0f; col[1] = 1.0f;
05706         col[2] = 1.0f; col[3] = 0.0f;
05707     } else {
05708         col[0] = 0.0f; col[1] = 0.0f;
05709         col[2] = 0.0f; col[3] = 0.0f;
05710     }
05711     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col));
05712     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)");
05713 }
05714 
05715 static void set_bumpmat_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
05716 {
05717     DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1);
05718     struct wined3d_device *device = context->swapchain->device;
05719     const struct wined3d_gl_info *gl_info = context->gl_info;
05720     float mat[2][2];
05721 
05722     if (use_ps(state))
05723     {
05724         if (stage && (state->pixel_shader->reg_maps.bumpmat & (1 << stage)))
05725         {
05726             /* The pixel shader has to know the bump env matrix. Do a constants update if it isn't scheduled
05727              * anyway
05728              */
05729             if (!isStateDirty(context, STATE_PIXELSHADERCONSTANT))
05730                 context_apply_state(context, state, STATE_PIXELSHADERCONSTANT);
05731         }
05732 
05733         if(device->shader_backend == &arb_program_shader_backend) {
05734             /* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants */
05735             return;
05736         }
05737     }
05738     else if (device->shader_backend == &arb_program_shader_backend)
05739     {
05740         struct shader_arb_priv *priv = device->shader_priv;
05741         priv->pshader_const_dirty[ARB_FFP_CONST_BUMPMAT(stage)] = 1;
05742         priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_BUMPMAT(stage) + 1);
05743     }
05744 
05745     mat[0][0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT00]);
05746     mat[0][1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT01]);
05747     mat[1][0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT10]);
05748     mat[1][1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT11]);
05749 
05750     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0]));
05751     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])");
05752 }
05753 
05754 static void tex_bumpenvlum_arbfp(struct wined3d_context *context,
05755         const struct wined3d_state *state, DWORD state_id)
05756 {
05757     DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1);
05758     struct wined3d_device *device = context->swapchain->device;
05759     const struct wined3d_gl_info *gl_info = context->gl_info;
05760     float param[4];
05761 
05762     if (use_ps(state))
05763     {
05764         if (stage && (state->pixel_shader->reg_maps.luminanceparams & (1 << stage)))
05765         {
05766             /* The pixel shader has to know the luminance offset. Do a constants update if it
05767              * isn't scheduled anyway
05768              */
05769             if (!isStateDirty(context, STATE_PIXELSHADERCONSTANT))
05770                 context_apply_state(context, state, STATE_PIXELSHADERCONSTANT);
05771         }
05772 
05773         if(device->shader_backend == &arb_program_shader_backend) {
05774             /* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants */
05775             return;
05776         }
05777     }
05778     else if (device->shader_backend == &arb_program_shader_backend)
05779     {
05780         struct shader_arb_priv *priv = device->shader_priv;
05781         priv->pshader_const_dirty[ARB_FFP_CONST_LUMINANCE(stage)] = 1;
05782         priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_LUMINANCE(stage) + 1);
05783     }
05784 
05785     param[0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_LSCALE]);
05786     param[1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_LOFFSET]);
05787     param[2] = 0.0f;
05788     param[3] = 0.0f;
05789 
05790     GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param));
05791     checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)");
05792 }
05793 
05794 static const char *get_argreg(struct wined3d_shader_buffer *buffer, DWORD argnum, unsigned int stage, DWORD arg)
05795 {
05796     const char *ret;
05797 
05798     if(arg == ARG_UNUSED) return "unused"; /* This is the marker for unused registers */
05799 
05800     switch(arg & WINED3DTA_SELECTMASK) {
05801         case WINED3DTA_DIFFUSE:
05802             ret = "fragment.color.primary"; break;
05803 
05804         case WINED3DTA_CURRENT:
05805             if (!stage) ret = "fragment.color.primary";
05806             else ret = "ret";
05807             break;
05808 
05809         case WINED3DTA_TEXTURE:
05810             switch(stage) {
05811                 case 0: ret = "tex0"; break;
05812                 case 1: ret = "tex1"; break;
05813                 case 2: ret = "tex2"; break;
05814                 case 3: ret = "tex3"; break;
05815                 case 4: ret = "tex4"; break;
05816                 case 5: ret = "tex5"; break;
05817                 case 6: ret = "tex6"; break;
05818                 case 7: ret = "tex7"; break;
05819                 default: ret = "unknown texture";
05820             }
05821             break;
05822 
05823         case WINED3DTA_TFACTOR:
05824             ret = "tfactor"; break;
05825 
05826         case WINED3DTA_SPECULAR:
05827             ret = "fragment.color.secondary"; break;
05828 
05829         case WINED3DTA_TEMP:
05830             ret = "tempreg"; break;
05831 
05832         case WINED3DTA_CONSTANT:
05833             FIXME("Implement perstage constants\n");
05834             switch(stage) {
05835                 case 0: ret = "const0"; break;
05836                 case 1: ret = "const1"; break;
05837                 case 2: ret = "const2"; break;
05838                 case 3: ret = "const3"; break;
05839                 case 4: ret = "const4"; break;
05840                 case 5: ret = "const5"; break;
05841                 case 6: ret = "const6"; break;
05842                 case 7: ret = "const7"; break;
05843                 default: ret = "unknown constant";
05844             }
05845             break;
05846 
05847         default:
05848             return "unknown";
05849     }
05850 
05851     if(arg & WINED3DTA_COMPLEMENT) {
05852         shader_addline(buffer, "SUB arg%u, const.x, %s;\n", argnum, ret);
05853         if(argnum == 0) ret = "arg0";
05854         if(argnum == 1) ret = "arg1";
05855         if(argnum == 2) ret = "arg2";
05856     }
05857     if(arg & WINED3DTA_ALPHAREPLICATE) {
05858         shader_addline(buffer, "MOV arg%u, %s.w;\n", argnum, ret);
05859         if(argnum == 0) ret = "arg0";
05860         if(argnum == 1) ret = "arg1";
05861         if(argnum == 2) ret = "arg2";
05862     }
05863     return ret;
05864 }
05865 
05866 static void gen_ffp_instr(struct wined3d_shader_buffer *buffer, unsigned int stage, BOOL color,
05867         BOOL alpha, DWORD dst, DWORD op, DWORD dw_arg0, DWORD dw_arg1, DWORD dw_arg2)
05868 {
05869     const char *dstmask, *dstreg, *arg0, *arg1, *arg2;
05870     unsigned int mul = 1;
05871     BOOL mul_final_dest = FALSE;
05872 
05873     if(color && alpha) dstmask = "";
05874     else if(color) dstmask = ".xyz";
05875     else dstmask = ".w";
05876 
05877     if(dst == tempreg) dstreg = "tempreg";
05878     else dstreg = "ret";
05879 
05880     arg0 = get_argreg(buffer, 0, stage, dw_arg0);
05881     arg1 = get_argreg(buffer, 1, stage, dw_arg1);
05882     arg2 = get_argreg(buffer, 2, stage, dw_arg2);
05883 
05884     switch (op)
05885     {
05886         case WINED3D_TOP_DISABLE:
05887             if (!stage)
05888                 shader_addline(buffer, "MOV %s%s, fragment.color.primary;\n", dstreg, dstmask);
05889             break;
05890 
05891         case WINED3D_TOP_SELECT_ARG2:
05892             arg1 = arg2;
05893             /* FALLTHROUGH */
05894         case WINED3D_TOP_SELECT_ARG1:
05895             shader_addline(buffer, "MOV %s%s, %s;\n", dstreg, dstmask, arg1);
05896             break;
05897 
05898         case WINED3D_TOP_MODULATE_4X:
05899             mul = 2;
05900             /* FALLTHROUGH */
05901         case WINED3D_TOP_MODULATE_2X:
05902             mul *= 2;
05903             if (!strcmp(dstreg, "result.color"))
05904             {
05905                 dstreg = "ret";
05906                 mul_final_dest = TRUE;
05907             }
05908             /* FALLTHROUGH */
05909         case WINED3D_TOP_MODULATE:
05910             shader_addline(buffer, "MUL %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2);
05911             break;
05912 
05913         case WINED3D_TOP_ADD_SIGNED_2X:
05914             mul = 2;
05915             if (!strcmp(dstreg, "result.color"))
05916             {
05917                 dstreg = "ret";
05918                 mul_final_dest = TRUE;
05919             }
05920             /* FALLTHROUGH */
05921         case WINED3D_TOP_ADD_SIGNED:
05922             shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2);
05923             arg2 = "arg2";
05924             /* FALLTHROUGH */
05925         case WINED3D_TOP_ADD:
05926             shader_addline(buffer, "ADD_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2);
05927             break;
05928 
05929         case WINED3D_TOP_SUBTRACT:
05930             shader_addline(buffer, "SUB_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2);
05931             break;
05932 
05933         case WINED3D_TOP_ADD_SMOOTH:
05934             shader_addline(buffer, "SUB arg1, const.x, %s;\n", arg1);
05935             shader_addline(buffer, "MAD_SAT %s%s, arg1, %s, %s;\n", dstreg, dstmask, arg2, arg1);
05936             break;
05937 
05938         case WINED3D_TOP_BLEND_CURRENT_ALPHA:
05939             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_CURRENT);
05940             shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
05941             break;
05942         case WINED3D_TOP_BLEND_FACTOR_ALPHA:
05943             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TFACTOR);
05944             shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
05945             break;
05946         case WINED3D_TOP_BLEND_TEXTURE_ALPHA:
05947             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE);
05948             shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
05949             break;
05950         case WINED3D_TOP_BLEND_DIFFUSE_ALPHA:
05951             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_DIFFUSE);
05952             shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
05953             break;
05954 
05955         case WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM:
05956             arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE);
05957             shader_addline(buffer, "SUB arg0.w, const.x, %s.w;\n", arg0);
05958             shader_addline(buffer, "MAD_SAT %s%s, %s, arg0.w, %s;\n", dstreg, dstmask, arg2, arg1);
05959             break;
05960 
05961         /* D3DTOP_PREMODULATE ???? */
05962 
05963         case WINED3D_TOP_MODULATE_INVALPHA_ADD_COLOR:
05964             shader_addline(buffer, "SUB arg0.w, const.x, %s;\n", arg1);
05965             shader_addline(buffer, "MAD_SAT %s%s, arg0.w, %s, %s;\n", dstreg, dstmask, arg2, arg1);
05966             break;
05967         case WINED3D_TOP_MODULATE_ALPHA_ADD_COLOR:
05968             shader_addline(buffer, "MAD_SAT %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg1);
05969             break;
05970         case WINED3D_TOP_MODULATE_INVCOLOR_ADD_ALPHA:
05971             shader_addline(buffer, "SUB arg0, const.x, %s;\n", arg1);
05972             shader_addline(buffer, "MAD_SAT %s%s, arg0, %s, %s.w;\n", dstreg, dstmask, arg2, arg1);
05973             break;
05974         case WINED3D_TOP_MODULATE_COLOR_ADD_ALPHA:
05975             shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s.w;\n", dstreg, dstmask, arg1, arg2, arg1);
05976             break;
05977 
05978         case WINED3D_TOP_DOTPRODUCT3:
05979             mul = 4;
05980             if (!strcmp(dstreg, "result.color"))
05981             {
05982                 dstreg = "ret";
05983                 mul_final_dest = TRUE;
05984             }
05985             shader_addline(buffer, "SUB arg1, %s, const.w;\n", arg1);
05986             shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2);
05987             shader_addline(buffer, "DP3_SAT %s%s, arg1, arg2;\n", dstreg, dstmask);
05988             break;
05989 
05990         case WINED3D_TOP_MULTIPLY_ADD:
05991             shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg0);
05992             break;
05993 
05994         case WINED3D_TOP_LERP:
05995             /* The msdn is not quite right here */
05996             shader_addline(buffer, "LRP %s%s, %s, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2);
05997             break;
05998 
05999         case WINED3D_TOP_BUMPENVMAP:
06000         case WINED3D_TOP_BUMPENVMAP_LUMINANCE:
06001             /* Those are handled in the first pass of the shader(generation pass 1 and 2) already */
06002             break;
06003 
06004         default:
06005             FIXME("Unhandled texture op %08x\n", op);
06006     }
06007 
06008     if(mul == 2) {
06009         shader_addline(buffer, "MUL_SAT %s%s, %s, const.y;\n", mul_final_dest ? "result.color" : dstreg, dstmask, dstreg);
06010     } else if(mul == 4) {
06011         shader_addline(buffer, "MUL_SAT %s%s, %s, const.z;\n", mul_final_dest ? "result.color" : dstreg, dstmask, dstreg);
06012     }
06013 }
06014 
06015 static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, const struct wined3d_gl_info *gl_info)
06016 {
06017     unsigned int stage;
06018     struct wined3d_shader_buffer buffer;
06019     BOOL tex_read[MAX_TEXTURES] = {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE};
06020     BOOL bump_used[MAX_TEXTURES] = {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE};
06021     BOOL luminance_used[MAX_TEXTURES] = {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE};
06022     UINT lowest_disabled_stage;
06023     const char *textype;
06024     const char *instr, *sat;
06025     char colorcor_dst[8];
06026     GLuint ret;
06027     DWORD arg0, arg1, arg2;
06028     BOOL tempreg_used = FALSE, tfactor_used = FALSE;
06029     BOOL op_equal;
06030     const char *final_combiner_src = "ret";
06031     GLint pos;
06032 
06033     /* Find out which textures are read */
06034     for (stage = 0; stage < MAX_TEXTURES; ++stage)
06035     {
06036         if (settings->op[stage].cop == WINED3D_TOP_DISABLE)
06037             break;
06038         arg0 = settings->op[stage].carg0 & WINED3DTA_SELECTMASK;
06039         arg1 = settings->op[stage].carg1 & WINED3DTA_SELECTMASK;
06040         arg2 = settings->op[stage].carg2 & WINED3DTA_SELECTMASK;
06041         if(arg0 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE;
06042         if(arg1 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE;
06043         if(arg2 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE;
06044 
06045         if (settings->op[stage].cop == WINED3D_TOP_BLEND_TEXTURE_ALPHA)
06046             tex_read[stage] = TRUE;
06047         if (settings->op[stage].cop == WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM)
06048             tex_read[stage] = TRUE;
06049         if (settings->op[stage].cop == WINED3D_TOP_BUMPENVMAP)
06050         {
06051             bump_used[stage] = TRUE;
06052             tex_read[stage] = TRUE;
06053         }
06054         if (settings->op[stage].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)
06055         {
06056             bump_used[stage] = TRUE;
06057             tex_read[stage] = TRUE;
06058             luminance_used[stage] = TRUE;
06059         }
06060         else if (settings->op[stage].cop == WINED3D_TOP_BLEND_FACTOR_ALPHA)
06061         {
06062             tfactor_used = TRUE;
06063         }
06064 
06065         if(arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR) {
06066             tfactor_used = TRUE;
06067         }
06068 
06069         if(settings->op[stage].dst == tempreg) tempreg_used = TRUE;
06070         if(arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP) {
06071             tempreg_used = TRUE;
06072         }
06073 
06074         if (settings->op[stage].aop == WINED3D_TOP_DISABLE)
06075             continue;
06076         arg0 = settings->op[stage].aarg0 & WINED3DTA_SELECTMASK;
06077         arg1 = settings->op[stage].aarg1 & WINED3DTA_SELECTMASK;
06078         arg2 = settings->op[stage].aarg2 & WINED3DTA_SELECTMASK;
06079         if(arg0 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE;
06080         if(arg1 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE;
06081         if(arg2 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE;
06082 
06083         if(arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP) {
06084             tempreg_used = TRUE;
06085         }
06086         if(arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR) {
06087             tfactor_used = TRUE;
06088         }
06089     }
06090     lowest_disabled_stage = stage;
06091 
06092     /* Shader header */
06093     if (!shader_buffer_init(&buffer))
06094     {
06095         ERR("Failed to initialize shader buffer.\n");
06096         return 0;
06097     }
06098 
06099     shader_addline(&buffer, "!!ARBfp1.0\n");
06100 
06101     switch(settings->fog) {
06102         case FOG_OFF:                                                         break;
06103         case FOG_LINEAR: shader_addline(&buffer, "OPTION ARB_fog_linear;\n"); break;
06104         case FOG_EXP:    shader_addline(&buffer, "OPTION ARB_fog_exp;\n");    break;
06105         case FOG_EXP2:   shader_addline(&buffer, "OPTION ARB_fog_exp2;\n");   break;
06106         default: FIXME("Unexpected fog setting %d\n", settings->fog);
06107     }
06108 
06109     shader_addline(&buffer, "PARAM const = {1, 2, 4, 0.5};\n");
06110     shader_addline(&buffer, "TEMP TMP;\n");
06111     shader_addline(&buffer, "TEMP ret;\n");
06112     if(tempreg_used || settings->sRGB_write) shader_addline(&buffer, "TEMP tempreg;\n");
06113     shader_addline(&buffer, "TEMP arg0;\n");
06114     shader_addline(&buffer, "TEMP arg1;\n");
06115     shader_addline(&buffer, "TEMP arg2;\n");
06116     for(stage = 0; stage < MAX_TEXTURES; stage++) {
06117         if(!tex_read[stage]) continue;
06118         shader_addline(&buffer, "TEMP tex%u;\n", stage);
06119         if(!bump_used[stage]) continue;
06120         shader_addline(&buffer, "PARAM bumpmat%u = program.env[%u];\n", stage, ARB_FFP_CONST_BUMPMAT(stage));
06121         if(!luminance_used[stage]) continue;
06122         shader_addline(&buffer, "PARAM luminance%u = program.env[%u];\n", stage, ARB_FFP_CONST_LUMINANCE(stage));
06123     }
06124     if(tfactor_used) {
06125         shader_addline(&buffer, "PARAM tfactor = program.env[%u];\n", ARB_FFP_CONST_TFACTOR);
06126     }
06127         shader_addline(&buffer, "PARAM specular_enable = program.env[%u];\n", ARB_FFP_CONST_SPECULAR_ENABLE);
06128 
06129     if(settings->sRGB_write) {
06130         shader_addline(&buffer, "PARAM srgb_consts1 = {%f, %f, %f, %f};\n",
06131                        srgb_mul_low, srgb_cmp, srgb_pow, srgb_mul_high);
06132         shader_addline(&buffer, "PARAM srgb_consts2 = {%f, %f, %f, %f};\n",
06133                        srgb_sub_high, 0.0, 0.0, 0.0);
06134     }
06135 
06136     if (lowest_disabled_stage < 7 && settings->emul_clipplanes)
06137         shader_addline(&buffer, "KIL fragment.texcoord[7];\n");
06138 
06139     /* Generate texture sampling instructions) */
06140     for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage)
06141     {
06142         if (!tex_read[stage])
06143             continue;
06144 
06145         switch(settings->op[stage].tex_type) {
06146             case tex_1d:                    textype = "1D";     break;
06147             case tex_2d:                    textype = "2D";     break;
06148             case tex_3d:                    textype = "3D";     break;
06149             case tex_cube:                  textype = "CUBE";   break;
06150             case tex_rect:                  textype = "RECT";   break;
06151             default: textype = "unexpected_textype";   break;
06152         }
06153 
06154         if (settings->op[stage].cop == WINED3D_TOP_BUMPENVMAP
06155                 || settings->op[stage].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)
06156             sat = "";
06157         else
06158             sat = "_SAT";
06159 
06160         if(settings->op[stage].projected == proj_none) {
06161             instr = "TEX";
06162         } else if(settings->op[stage].projected == proj_count4 ||
06163                   settings->op[stage].projected == proj_count3) {
06164             instr = "TXP";
06165         } else {
06166             FIXME("Unexpected projection mode %d\n", settings->op[stage].projected);
06167             instr = "TXP";
06168         }
06169 
06170         if (stage > 0
06171                 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP
06172                 || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE))
06173         {
06174             shader_addline(&buffer, "SWZ arg1, bumpmat%u, x, z, 0, 0;\n", stage - 1);
06175             shader_addline(&buffer, "DP3 ret.x, arg1, tex%u;\n", stage - 1);
06176             shader_addline(&buffer, "SWZ arg1, bumpmat%u, y, w, 0, 0;\n", stage - 1);
06177             shader_addline(&buffer, "DP3 ret.y, arg1, tex%u;\n", stage - 1);
06178 
06179             /* with projective textures, texbem only divides the static texture coord, not the displacement,
06180              * so multiply the displacement with the dividing parameter before passing it to TXP
06181              */
06182             if (settings->op[stage].projected != proj_none) {
06183                 if(settings->op[stage].projected == proj_count4) {
06184                     shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].w;\n", stage);
06185                     shader_addline(&buffer, "MUL ret.xyz, ret, fragment.texcoord[%u].w, fragment.texcoord[%u];\n", stage, stage);
06186                 } else {
06187                     shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].z;\n", stage);
06188                     shader_addline(&buffer, "MAD ret.xyz, ret, fragment.texcoord[%u].z, fragment.texcoord[%u];\n", stage, stage);
06189                 }
06190             } else {
06191                 shader_addline(&buffer, "ADD ret, ret, fragment.texcoord[%u];\n", stage);
06192             }
06193 
06194             shader_addline(&buffer, "%s%s tex%u, ret, texture[%u], %s;\n",
06195                     instr, sat, stage, stage, textype);
06196             if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)
06197             {
06198                 shader_addline(&buffer, "MAD_SAT ret.x, tex%u.z, luminance%u.x, luminance%u.y;\n",
06199                                stage - 1, stage - 1, stage - 1);
06200                 shader_addline(&buffer, "MUL tex%u, tex%u, ret.x;\n", stage, stage);
06201             }
06202         } else if(settings->op[stage].projected == proj_count3) {
06203             shader_addline(&buffer, "MOV ret, fragment.texcoord[%u];\n", stage);
06204             shader_addline(&buffer, "MOV ret.w, ret.z;\n");
06205             shader_addline(&buffer, "%s%s tex%u, ret, texture[%u], %s;\n",
06206                             instr, sat, stage, stage, textype);
06207         } else {
06208             shader_addline(&buffer, "%s%s tex%u, fragment.texcoord[%u], texture[%u], %s;\n",
06209                             instr, sat, stage, stage, stage, textype);
06210         }
06211 
06212         sprintf(colorcor_dst, "tex%u", stage);
06213         gen_color_correction(&buffer, colorcor_dst, WINED3DSP_WRITEMASK_ALL, "const.x", "const.y",
06214                 settings->op[stage].color_fixup);
06215     }
06216 
06217     /* Generate the main shader */
06218     for (stage = 0; stage < MAX_TEXTURES; ++stage)
06219     {
06220         if (settings->op[stage].cop == WINED3D_TOP_DISABLE)
06221         {
06222             if (!stage)
06223                 final_combiner_src = "fragment.color.primary";
06224             break;
06225         }
06226 
06227         if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1
06228                 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1)
06229             op_equal = settings->op[stage].carg1 == settings->op[stage].aarg1;
06230         else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1
06231                 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2)
06232             op_equal = settings->op[stage].carg1 == settings->op[stage].aarg2;
06233         else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2
06234                 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1)
06235             op_equal = settings->op[stage].carg2 == settings->op[stage].aarg1;
06236         else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2
06237                 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2)
06238             op_equal = settings->op[stage].carg2 == settings->op[stage].aarg2;
06239         else
06240             op_equal = settings->op[stage].aop   == settings->op[stage].cop
06241                     && settings->op[stage].carg0 == settings->op[stage].aarg0
06242                     && settings->op[stage].carg1 == settings->op[stage].aarg1
06243                     && settings->op[stage].carg2 == settings->op[stage].aarg2;
06244 
06245         if (settings->op[stage].aop == WINED3D_TOP_DISABLE)
06246         {
06247             gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst,
06248                           settings->op[stage].cop, settings->op[stage].carg0,
06249                           settings->op[stage].carg1, settings->op[stage].carg2);
06250             if (!stage)
06251                 shader_addline(&buffer, "MOV ret.w, fragment.color.primary.w;\n");
06252         }
06253         else if (op_equal)
06254         {
06255             gen_ffp_instr(&buffer, stage, TRUE, TRUE, settings->op[stage].dst,
06256                           settings->op[stage].cop, settings->op[stage].carg0,
06257                           settings->op[stage].carg1, settings->op[stage].carg2);
06258         } else {
06259             gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst,
06260                           settings->op[stage].cop, settings->op[stage].carg0,
06261                           settings->op[stage].carg1, settings->op[stage].carg2);
06262             gen_ffp_instr(&buffer, stage, FALSE, TRUE, settings->op[stage].dst,
06263                           settings->op[stage].aop, settings->op[stage].aarg0,
06264                           settings->op[stage].aarg1, settings->op[stage].aarg2);
06265         }
06266     }
06267 
06268     if(settings->sRGB_write) {
06269         shader_addline(&buffer, "MAD ret, fragment.color.secondary, specular_enable, %s;\n", final_combiner_src);
06270         arbfp_add_sRGB_correction(&buffer, "ret", "arg0", "arg1", "arg2", "tempreg", FALSE);
06271         shader_addline(&buffer, "MOV result.color, ret;\n");
06272     } else {
06273         shader_addline(&buffer, "MAD result.color, fragment.color.secondary, specular_enable, %s;\n", final_combiner_src);
06274     }
06275 
06276     /* Footer */
06277     shader_addline(&buffer, "END\n");
06278 
06279     /* Generate the shader */
06280     GL_EXTCALL(glGenProgramsARB(1, &ret));
06281     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ret));
06282     GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
06283             strlen(buffer.buffer), buffer.buffer));
06284     checkGLcall("glProgramStringARB()");
06285 
06286     glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
06287     if (pos != -1)
06288     {
06289         FIXME("Fragment program error at position %d: %s\n\n", pos,
06290               debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
06291         shader_arb_dump_program_source(buffer.buffer);
06292     }
06293     else
06294     {
06295         GLint native;
06296 
06297         GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
06298         checkGLcall("glGetProgramivARB()");
06299         if (!native) WARN("Program exceeds native resource limits.\n");
06300     }
06301 
06302     shader_buffer_free(&buffer);
06303     return ret;
06304 }
06305 
06306 static void fragment_prog_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
06307 {
06308     const struct wined3d_device *device = context->swapchain->device;
06309     const struct wined3d_gl_info *gl_info = context->gl_info;
06310     struct shader_arb_priv *priv = device->fragment_priv;
06311     BOOL use_vshader = use_vs(state);
06312     BOOL use_pshader = use_ps(state);
06313     struct ffp_frag_settings settings;
06314     const struct arbfp_ffp_desc *desc;
06315     unsigned int i;
06316 
06317     TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id);
06318 
06319     if (isStateDirty(context, STATE_RENDER(WINED3D_RS_FOGENABLE)))
06320     {
06321         if (!use_pshader && device->shader_backend == &arb_program_shader_backend && context->last_was_pshader)
06322         {
06323             /* Reload fixed function constants since they collide with the
06324              * pixel shader constants. */
06325             for (i = 0; i < MAX_TEXTURES; ++i)
06326             {
06327                 set_bumpmat_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_BUMPENV_MAT00));
06328             }
06329             state_texfactor_arbfp(context, state, STATE_RENDER(WINED3D_RS_TEXTUREFACTOR));
06330             state_arb_specularenable(context, state, STATE_RENDER(WINED3D_RS_SPECULARENABLE));
06331         }
06332         else if (use_pshader && !isStateDirty(context, context->state_table[STATE_VSHADER].representative))
06333         {
06334             device->shader_backend->shader_select(context, use_pshader, use_vshader);
06335         }
06336         return;
06337     }
06338 
06339     if (!use_pshader)
06340     {
06341         /* Find or create a shader implementing the fixed function pipeline
06342          * settings, then activate it. */
06343         gen_ffp_frag_op(device, state, &settings, FALSE);
06344         desc = (const struct arbfp_ffp_desc *)find_ffp_frag_shader(&priv->fragment_shaders, &settings);
06345         if(!desc) {
06346             struct arbfp_ffp_desc *new_desc = HeapAlloc(GetProcessHeap(), 0, sizeof(*new_desc));
06347             if (!new_desc)
06348             {
06349                 ERR("Out of memory\n");
06350                 return;
06351             }
06352             new_desc->num_textures_used = 0;
06353             for (i = 0; i < gl_info->limits.texture_stages; ++i)
06354             {
06355                 if (settings.op[i].cop == WINED3D_TOP_DISABLE)
06356                     break;
06357                 new_desc->num_textures_used = i;
06358             }
06359 
06360             memcpy(&new_desc->parent.settings, &settings, sizeof(settings));
06361             new_desc->shader = gen_arbfp_ffp_shader(&settings, gl_info);
06362             add_ffp_frag_shader(&priv->fragment_shaders, &new_desc->parent);
06363             TRACE("Allocated fixed function replacement shader descriptor %p\n", new_desc);
06364             desc = new_desc;
06365         }
06366 
06367         /* Now activate the replacement program. GL_FRAGMENT_PROGRAM_ARB is already active(however, note the
06368          * comment above the shader_select call below). If e.g. GLSL is active, the shader_select call will
06369          * deactivate it.
06370          */
06371         GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader));
06372         checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)");
06373         priv->current_fprogram_id = desc->shader;
06374 
06375         if (device->shader_backend == &arb_program_shader_backend && context->last_was_pshader)
06376         {
06377             /* Reload fixed function constants since they collide with the
06378              * pixel shader constants. */
06379             for (i = 0; i < MAX_TEXTURES; ++i)
06380             {
06381                 set_bumpmat_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_BUMPENV_MAT00));
06382             }
06383             state_texfactor_arbfp(context, state, STATE_RENDER(WINED3D_RS_TEXTUREFACTOR));
06384             state_arb_specularenable(context, state, STATE_RENDER(WINED3D_RS_SPECULARENABLE));
06385         }
06386         context->last_was_pshader = FALSE;
06387     } else {
06388         context->last_was_pshader = TRUE;
06389     }
06390 
06391     /* Finally, select the shader. If a pixel shader is used, it will be set and enabled by the shader backend.
06392      * If this shader backend is arbfp(most likely), then it will simply overwrite the last fixed function replace-
06393      * ment shader. If the shader backend is not ARB, it currently is important that the opengl implementation
06394      * type overwrites GL_ARB_fragment_program. This is currently the case with GLSL. If we really want to use
06395      * atifs or nvrc pixel shaders with arb fragment programs we'd have to disable GL_FRAGMENT_PROGRAM_ARB here
06396      *
06397      * Don't call shader_select if the vertex shader is dirty, because it will be called later on by the vertex
06398      * shader handler
06399      */
06400     if (!isStateDirty(context, context->state_table[STATE_VSHADER].representative))
06401     {
06402         device->shader_backend->shader_select(context, use_pshader, use_vshader);
06403 
06404         if (!isStateDirty(context, STATE_VERTEXSHADERCONSTANT) && (use_vshader || use_pshader))
06405             context_apply_state(context, state, STATE_VERTEXSHADERCONSTANT);
06406     }
06407     if (use_pshader)
06408         context_apply_state(context, state, STATE_PIXELSHADERCONSTANT);
06409 }
06410 
06411 /* We can't link the fog states to the fragment state directly since the
06412  * vertex pipeline links them to FOGENABLE. A different linking in different
06413  * pipeline parts can't be expressed in the combined state table, so we need
06414  * to handle that with a forwarding function. The other invisible side effect
06415  * is that changing the fog start and fog end (which links to FOGENABLE in
06416  * vertex) results in the fragment_prog_arbfp function being called because
06417  * FOGENABLE is dirty, which calls this function here. */
06418 static void state_arbfp_fog(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
06419 {
06420     enum fogsource new_source;
06421 
06422     TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id);
06423 
06424     if (!isStateDirty(context, STATE_PIXELSHADER))
06425         fragment_prog_arbfp(context, state, state_id);
06426 
06427     if (!state->render_states[WINED3D_RS_FOGENABLE])
06428         return;
06429 
06430     if (state->render_states[WINED3D_RS_FOGTABLEMODE] == WINED3D_FOG_NONE)
06431     {
06432         if (use_vs(state))
06433         {
06434             new_source = FOGSOURCE_VS;
06435         }
06436         else
06437         {
06438             if (state->render_states[WINED3D_RS_FOGVERTEXMODE] == WINED3D_FOG_NONE || context->last_was_rhw)
06439                 new_source = FOGSOURCE_COORD;
06440             else
06441                 new_source = FOGSOURCE_FFP;
06442         }
06443     }
06444     else
06445     {
06446         new_source = FOGSOURCE_FFP;
06447     }
06448 
06449     if (new_source != context->fog_source)
06450     {
06451         context->fog_source = new_source;
06452         state_fogstartend(context, state, STATE_RENDER(WINED3D_RS_FOGSTART));
06453     }
06454 }
06455 
06456 static void textransform(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id)
06457 {
06458     if (!isStateDirty(context, STATE_PIXELSHADER))
06459         fragment_prog_arbfp(context, state, state_id);
06460 }
06461 
06462 static const struct StateEntryTemplate arbfp_fragmentstate_template[] =
06463 {
06464     {STATE_RENDER(WINED3D_RS_TEXTUREFACTOR),              { STATE_RENDER(WINED3D_RS_TEXTUREFACTOR),             state_texfactor_arbfp   }, WINED3D_GL_EXT_NONE             },
06465     {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06466     {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06467     {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06468     {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06469     {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06470     {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06471     {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06472     {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06473     {STATE_TEXTURESTAGE(0, WINED3D_TSS_RESULT_ARG),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06474     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
06475     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06476     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06477     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06478     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
06479     {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
06480     {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06481     {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06482     {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06483     {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06484     {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06485     {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06486     {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06487     {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06488     {STATE_TEXTURESTAGE(1, WINED3D_TSS_RESULT_ARG),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06489     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
06490     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06491     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06492     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06493     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
06494     {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
06495     {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06496     {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06497     {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06498     {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06499     {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06500     {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06501     {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06502     {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06503     {STATE_TEXTURESTAGE(2, WINED3D_TSS_RESULT_ARG),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06504     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
06505     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06506     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06507     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06508     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
06509     {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
06510     {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06511     {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06512     {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06513     {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06514     {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06515     {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06516     {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06517     {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06518     {STATE_TEXTURESTAGE(3, WINED3D_TSS_RESULT_ARG),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06519     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
06520     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06521     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06522     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06523     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
06524     {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
06525     {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06526     {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06527     {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06528     {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06529     {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06530     {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06531     {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06532     {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06533     {STATE_TEXTURESTAGE(4, WINED3D_TSS_RESULT_ARG),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06534     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
06535     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06536     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06537     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06538     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
06539     {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
06540     {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06541     {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06542     {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06543     {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06544     {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06545     {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06546     {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06547     {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06548     {STATE_TEXTURESTAGE(5, WINED3D_TSS_RESULT_ARG),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06549     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
06550     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06551     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06552     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06553     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
06554     {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
06555     {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06556     {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06557     {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06558     {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06559     {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06560     {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06561     {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06562     {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06563     {STATE_TEXTURESTAGE(6, WINED3D_TSS_RESULT_ARG),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06564     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
06565     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06566     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06567     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06568     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
06569     {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
06570     {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06571     {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06572     {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06573     {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06574     {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_OP),         { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06575     {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG1),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06576     {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG2),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06577     {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG0),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06578     {STATE_TEXTURESTAGE(7, WINED3D_TSS_RESULT_ARG),       { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06579     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),    { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),   set_bumpmat_arbfp       }, WINED3D_GL_EXT_NONE             },
06580     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT01),    { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06581     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT10),    { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06582     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT11),    { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00),   NULL                    }, WINED3D_GL_EXT_NONE             },
06583     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE),   { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE),  tex_bumpenvlum_arbfp    }, WINED3D_GL_EXT_NONE             },
06584     {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LOFFSET),  { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE),  NULL                    }, WINED3D_GL_EXT_NONE             },
06585     {STATE_PIXELSHADER,                                   { STATE_PIXELSHADER,                                  fragment_prog_arbfp     }, WINED3D_GL_EXT_NONE             },
06586     {STATE_RENDER(WINED3D_RS_FOGENABLE),                  { STATE_RENDER(WINED3D_RS_FOGENABLE),                 state_arbfp_fog         }, WINED3D_GL_EXT_NONE             },
06587     {STATE_RENDER(WINED3D_RS_FOGTABLEMODE),               { STATE_RENDER(WINED3D_RS_FOGENABLE),                 NULL                    }, WINED3D_GL_EXT_NONE             },
06588     {STATE_RENDER(WINED3D_RS_FOGVERTEXMODE),              { STATE_RENDER(WINED3D_RS_FOGENABLE),                 NULL                    }, WINED3D_GL_EXT_NONE             },
06589     {STATE_RENDER(WINED3D_RS_FOGSTART),                   { STATE_RENDER(WINED3D_RS_FOGSTART),                  state_fogstartend       }, WINED3D_GL_EXT_NONE             },
06590     {STATE_RENDER(WINED3D_RS_FOGEND),                     { STATE_RENDER(WINED3D_RS_FOGSTART),                  NULL                    }, WINED3D_GL_EXT_NONE             },
06591     {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE),            { STATE_PIXELSHADER,                                  NULL                    }, WINED3D_GL_EXT_NONE             },
06592     {STATE_RENDER(WINED3D_RS_FOGCOLOR),                   { STATE_RENDER(WINED3D_RS_FOGCOLOR),                  state_fogcolor          }, WINED3D_GL_EXT_NONE             },
06593     {STATE_RENDER(WINED3D_RS_FOGDENSITY),                 { STATE_RENDER(WINED3D_RS_FOGDENSITY),                state_fogdensity        }, WINED3D_GL_EXT_NONE             },
06594     {STATE_TEXTURESTAGE(0,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(0, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform      }, WINED3D_GL_EXT_NONE             },
06595     {STATE_TEXTURESTAGE(1,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(1, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform      }, WINED3D_GL_EXT_NONE             },
06596     {STATE_TEXTURESTAGE(2,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(2, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform      }, WINED3D_GL_EXT_NONE             },
06597     {STATE_TEXTURESTAGE(3,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(3, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform      }, WINED3D_GL_EXT_NONE             },
06598     {STATE_TEXTURESTAGE(4,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(4, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform      }, WINED3D_GL_EXT_NONE             },
06599     {STATE_TEXTURESTAGE(5,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(5, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform      }, WINED3D_GL_EXT_NONE             },
06600     {STATE_TEXTURESTAGE(6,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(6, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform      }, WINED3D_GL_EXT_NONE             },
06601     {STATE_TEXTURESTAGE(7,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(7, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform      }, WINED3D_GL_EXT_NONE             },
06602     {STATE_RENDER(WINED3D_RS_SPECULARENABLE),             { STATE_RENDER(WINED3D_RS_SPECULARENABLE),            state_arb_specularenable}, WINED3D_GL_EXT_NONE             },
06603     {0 /* Terminate */,                                   { 0,                                                  0                       }, WINED3D_GL_EXT_NONE             },
06604 };
06605 
06606 const struct fragment_pipeline arbfp_fragment_pipeline = {
06607     arbfp_enable,
06608     arbfp_get_caps,
06609     arbfp_alloc,
06610     arbfp_free,
06611     shader_arb_color_fixup_supported,
06612     arbfp_fragmentstate_template,
06613     TRUE /* We can disable projected textures */
06614 };
06615 
06616 struct arbfp_blit_priv {
06617     GLenum yuy2_rect_shader, yuy2_2d_shader;
06618     GLenum uyvy_rect_shader, uyvy_2d_shader;
06619     GLenum yv12_rect_shader, yv12_2d_shader;
06620     GLenum p8_rect_shader, p8_2d_shader;
06621     GLuint palette_texture;
06622 };
06623 
06624 static HRESULT arbfp_blit_alloc(struct wined3d_device *device)
06625 {
06626     device->blit_priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct arbfp_blit_priv));
06627     if(!device->blit_priv) {
06628         ERR("Out of memory\n");
06629         return E_OUTOFMEMORY;
06630     }
06631     return WINED3D_OK;
06632 }
06633 
06634 /* Context activation is done by the caller. */
06635 static void arbfp_blit_free(struct wined3d_device *device)
06636 {
06637     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
06638     struct arbfp_blit_priv *priv = device->blit_priv;
06639 
06640     ENTER_GL();
06641     GL_EXTCALL(glDeleteProgramsARB(1, &priv->yuy2_rect_shader));
06642     GL_EXTCALL(glDeleteProgramsARB(1, &priv->yuy2_2d_shader));
06643     GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_rect_shader));
06644     GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_2d_shader));
06645     GL_EXTCALL(glDeleteProgramsARB(1, &priv->yv12_rect_shader));
06646     GL_EXTCALL(glDeleteProgramsARB(1, &priv->yv12_2d_shader));
06647     GL_EXTCALL(glDeleteProgramsARB(1, &priv->p8_rect_shader));
06648     GL_EXTCALL(glDeleteProgramsARB(1, &priv->p8_2d_shader));
06649     checkGLcall("Delete yuv and p8 programs");
06650 
06651     if(priv->palette_texture) glDeleteTextures(1, &priv->palette_texture);
06652     LEAVE_GL();
06653 
06654     HeapFree(GetProcessHeap(), 0, device->blit_priv);
06655     device->blit_priv = NULL;
06656 }
06657 
06658 static BOOL gen_planar_yuv_read(struct wined3d_shader_buffer *buffer, enum complex_fixup fixup,
06659         GLenum textype, char *luminance)
06660 {
06661     char chroma;
06662     const char *tex, *texinstr;
06663 
06664     if (fixup == COMPLEX_FIXUP_UYVY) {
06665         chroma = 'x';
06666         *luminance = 'w';
06667     } else {
06668         chroma = 'w';
06669         *luminance = 'x';
06670     }
06671     switch(textype) {
06672         case GL_TEXTURE_2D:             tex = "2D";     texinstr = "TXP"; break;
06673         case GL_TEXTURE_RECTANGLE_ARB:  tex = "RECT";   texinstr = "TEX"; break;
06674         default:
06675             /* This is more tricky than just replacing the texture type - we have to navigate
06676              * properly in the texture to find the correct chroma values
06677              */
06678             FIXME("Implement yuv correction for non-2d, non-rect textures\n");
06679             return FALSE;
06680     }
06681 
06682     /* First we have to read the chroma values. This means we need at least two pixels(no filtering),
06683      * or 4 pixels(with filtering). To get the unmodified chromas, we have to rid ourselves of the
06684      * filtering when we sample the texture.
06685      *
06686      * These are the rules for reading the chroma:
06687      *
06688      * Even pixel: Cr
06689      * Even pixel: U
06690      * Odd pixel: V
06691      *
06692      * So we have to get the sampling x position in non-normalized coordinates in integers
06693      */
06694     if(textype != GL_TEXTURE_RECTANGLE_ARB) {
06695         shader_addline(buffer, "MUL texcrd.xy, fragment.texcoord[0], size.x;\n");
06696         shader_addline(buffer, "MOV texcrd.w, size.x;\n");
06697     } else {
06698         shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
06699     }
06700     /* We must not allow filtering between pixel x and x+1, this would mix U and V
06701      * Vertical filtering is ok. However, bear in mind that the pixel center is at
06702      * 0.5, so add 0.5.
06703      */
06704     shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n");
06705     shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.y;\n");
06706 
06707     /* Divide the x coordinate by 0.5 and get the fraction. This gives 0.25 and 0.75 for the
06708      * even and odd pixels respectively
06709      */
06710     shader_addline(buffer, "MUL texcrd2, texcrd, coef.y;\n");
06711     shader_addline(buffer, "FRC texcrd2, texcrd2;\n");
06712 
06713     /* Sample Pixel 1 */
06714     shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex);
06715 
06716     /* Put the value into either of the chroma values */
06717     shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n");
06718     shader_addline(buffer, "MUL chroma.x, luminance.%c, temp.x;\n", chroma);
06719     shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n");
06720     shader_addline(buffer, "MUL chroma.y, luminance.%c, temp.x;\n", chroma);
06721 
06722     /* Sample pixel 2. If we read an even pixel(SLT above returned 1), sample
06723      * the pixel right to the current one. Otherwise, sample the left pixel.
06724      * Bias and scale the SLT result to -1;1 and add it to the texcrd.x.
06725      */
06726     shader_addline(buffer, "MAD temp.x, temp.x, coef.z, -coef.x;\n");
06727     shader_addline(buffer, "ADD texcrd.x, texcrd, temp.x;\n");
06728     shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex);
06729 
06730     /* Put the value into the other chroma */
06731     shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n");
06732     shader_addline(buffer, "MAD chroma.y, luminance.%c, temp.x, chroma.y;\n", chroma);
06733     shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n");
06734     shader_addline(buffer, "MAD chroma.x, luminance.%c, temp.x, chroma.x;\n", chroma);
06735 
06736     /* TODO: If filtering is enabled, sample a 2nd pair of pixels left or right of
06737      * the current one and lerp the two U and V values
06738      */
06739 
06740     /* This gives the correctly filtered luminance value */
06741     shader_addline(buffer, "TEX luminance, fragment.texcoord[0], texture[0], %s;\n", tex);
06742 
06743     return TRUE;
06744 }
06745 
06746 static BOOL gen_yv12_read(struct wined3d_shader_buffer *buffer, GLenum textype, char *luminance)
06747 {
06748     const char *tex;
06749 
06750     switch(textype) {
06751         case GL_TEXTURE_2D:             tex = "2D";     break;
06752         case GL_TEXTURE_RECTANGLE_ARB:  tex = "RECT";   break;
06753         default:
06754             FIXME("Implement yv12 correction for non-2d, non-rect textures\n");
06755             return FALSE;
06756     }
06757 
06758     /* YV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2)
06759      * V and a (W/2)x(H/2) U plane, each with 8 bit per pixel. So the effective
06760      * bitdepth is 12 bits per pixel. Since the U and V planes have only half the
06761      * pitch of the luminance plane, the packing into the gl texture is a bit
06762      * unfortunate. If the whole texture is interpreted as luminance data it looks
06763      * approximately like this:
06764      *
06765      *        +----------------------------------+----
06766      *        |                                  |
06767      *        |                                  |
06768      *        |                                  |
06769      *        |                                  |
06770      *        |                                  |   2
06771      *        |            LUMINANCE             |   -
06772      *        |                                  |   3
06773      *        |                                  |
06774      *        |                                  |
06775      *        |                                  |
06776      *        |                                  |
06777      *        +----------------+-----------------+----
06778      *        |                |                 |
06779      *        |  U even rows   |  U odd rows     |
06780      *        |                |                 |   1
06781      *        +----------------+------------------   -
06782      *        |                |                 |   3
06783      *        |  V even rows   |  V odd rows     |
06784      *        |                |                 |
06785      *        +----------------+-----------------+----
06786      *        |                |                 |
06787      *        |     0.5        |       0.5       |
06788      *
06789      * So it appears as if there are 4 chroma images, but in fact the odd rows
06790      * in the chroma images are in the same row as the even ones. So its is
06791      * kinda tricky to read
06792      *
06793      * When reading from rectangle textures, keep in mind that the input y coordinates
06794      * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height
06795      */
06796     shader_addline(buffer, "PARAM yv12_coef = {%f, %f, %f, %f};\n",
06797             2.0f / 3.0f, 1.0f / 6.0f, (2.0f / 3.0f) + (1.0f / 6.0f), 1.0f / 3.0f);
06798 
06799     shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
06800     /* the chroma planes have only half the width */
06801     shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n");
06802 
06803     /* The first value is between 2/3 and 5/6th of the texture's height, so scale+bias
06804      * the coordinate. Also read the right side of the image when reading odd lines
06805      *
06806      * Don't forget to clamp the y values in into the range, otherwise we'll get filtering
06807      * bleeding
06808      */
06809     if(textype == GL_TEXTURE_2D) {
06810 
06811         shader_addline(buffer, "RCP chroma.w, size.y;\n");
06812 
06813         shader_addline(buffer, "MUL texcrd2.y, texcrd.y, size.y;\n");
06814 
06815         shader_addline(buffer, "FLR texcrd2.y, texcrd2.y;\n");
06816         shader_addline(buffer, "MAD texcrd.y, texcrd.y, yv12_coef.y, yv12_coef.x;\n");
06817 
06818         /* Read odd lines from the right side(add size * 0.5 to the x coordinate */
06819         shader_addline(buffer, "ADD texcrd2.x, texcrd2.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */
06820         shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n");
06821         shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n");
06822         shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n");
06823 
06824         /* clamp, keep the half pixel origin in mind */
06825         shader_addline(buffer, "MAD temp.y, coef.y, chroma.w, yv12_coef.x;\n");
06826         shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
06827         shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.z;\n");
06828         shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
06829     } else {
06830         /* Read from [size - size+size/4] */
06831         shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n");
06832         shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.w, size.y;\n");
06833 
06834         /* Read odd lines from the right side(add size * 0.5 to the x coordinate */
06835         shader_addline(buffer, "ADD texcrd2.x, texcrd.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */
06836         shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n");
06837         shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n");
06838         shader_addline(buffer, "MUL texcrd2.x, texcrd2.x, size.x;\n");
06839         shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n");
06840 
06841         /* Make sure to read exactly from the pixel center */
06842         shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n");
06843         shader_addline(buffer, "ADD texcrd.y, texcrd.y, coef.y;\n");
06844 
06845         /* Clamp */
06846         shader_addline(buffer, "MAD temp.y, size.y, coef.w, size.y;\n");
06847         shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n");
06848         shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
06849         shader_addline(buffer, "ADD temp.y, size.y, -coef.y;\n");
06850         shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n");
06851     }
06852     /* Read the texture, put the result into the output register */
06853     shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
06854     shader_addline(buffer, "MOV chroma.x, temp.w;\n");
06855 
06856     /* The other chroma value is 1/6th of the texture lower, from 5/6th to 6/6th
06857      * No need to clamp because we're just reusing the already clamped value from above
06858      */
06859     if(textype == GL_TEXTURE_2D) {
06860         shader_addline(buffer, "ADD texcrd.y, texcrd.y, yv12_coef.y;\n");
06861     } else {
06862         shader_addline(buffer, "MAD texcrd.y, size.y, coef.w, texcrd.y;\n");
06863     }
06864     shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex);
06865     shader_addline(buffer, "MOV chroma.y, temp.w;\n");
06866 
06867     /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate.
06868      * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance
06869      * values due to filtering
06870      */
06871     shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n");
06872     if(textype == GL_TEXTURE_2D) {
06873         /* Multiply the y coordinate by 2/3 and clamp it */
06874         shader_addline(buffer, "MUL texcrd.y, texcrd.y, yv12_coef.x;\n");
06875         shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.x;\n");
06876         shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n");
06877         shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
06878     } else {
06879         /* Reading from texture_rectangles is pretty straightforward, just use the unmodified
06880          * texture coordinate. It is still a good idea to clamp it though, since the opengl texture
06881          * is bigger
06882          */
06883         shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n");
06884         shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n");
06885         shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex);
06886     }
06887     *luminance = 'a';
06888 
06889     return TRUE;
06890 }
06891 
06892 static GLuint gen_p8_shader(struct arbfp_blit_priv *priv,
06893         const struct wined3d_gl_info *gl_info, GLenum textype)
06894 {
06895     GLenum shader;
06896     struct wined3d_shader_buffer buffer;
06897     GLint pos;
06898 
06899     /* Shader header */
06900     if (!shader_buffer_init(&buffer))
06901     {
06902         ERR("Failed to initialize shader buffer.\n");
06903         return 0;
06904     }
06905 
06906     ENTER_GL();
06907     GL_EXTCALL(glGenProgramsARB(1, &shader));
06908     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));
06909     LEAVE_GL();
06910     if(!shader) {
06911         shader_buffer_free(&buffer);
06912         return 0;
06913     }
06914 
06915     shader_addline(&buffer, "!!ARBfp1.0\n");
06916     shader_addline(&buffer, "TEMP index;\n");
06917 
06918     /* { 255/256, 0.5/255*255/256, 0, 0 } */
06919     shader_addline(&buffer, "PARAM constants = { 0.996, 0.00195, 0, 0 };\n");
06920 
06921     /* The alpha-component contains the palette index */
06922     if(textype == GL_TEXTURE_RECTANGLE_ARB)
06923         shader_addline(&buffer, "TXP index, fragment.texcoord[0], texture[0], RECT;\n");
06924     else
06925         shader_addline(&buffer, "TEX index, fragment.texcoord[0], texture[0], 2D;\n");
06926 
06927     /* Scale the index by 255/256 and add a bias of '0.5' in order to sample in the middle */
06928     shader_addline(&buffer, "MAD index.a, index.a, constants.x, constants.y;\n");
06929 
06930     /* Use the alpha-component as an index in the palette to get the final color */
06931     shader_addline(&buffer, "TEX result.color, index.a, texture[1], 1D;\n");
06932     shader_addline(&buffer, "END\n");
06933 
06934     ENTER_GL();
06935     GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
06936             strlen(buffer.buffer), buffer.buffer));
06937     checkGLcall("glProgramStringARB()");
06938 
06939     glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
06940     if (pos != -1)
06941     {
06942         FIXME("Fragment program error at position %d: %s\n\n", pos,
06943               debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
06944         shader_arb_dump_program_source(buffer.buffer);
06945     }
06946 
06947     if (textype == GL_TEXTURE_RECTANGLE_ARB)
06948         priv->p8_rect_shader = shader;
06949     else
06950         priv->p8_2d_shader = shader;
06951 
06952     shader_buffer_free(&buffer);
06953     LEAVE_GL();
06954 
06955     return shader;
06956 }
06957 
06958 /* Context activation is done by the caller. */
06959 static void upload_palette(const struct wined3d_surface *surface, struct wined3d_context *context)
06960 {
06961     BYTE table[256][4];
06962     struct wined3d_device *device = surface->resource.device;
06963     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
06964     struct arbfp_blit_priv *priv = device->blit_priv;
06965     BOOL colorkey = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE;
06966 
06967     d3dfmt_p8_init_palette(surface, table, colorkey);
06968 
06969     ENTER_GL();
06970 
06971     if (gl_info->supported[APPLE_CLIENT_STORAGE])
06972     {
06973         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE);
06974         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)");
06975     }
06976 
06977     if (!priv->palette_texture)
06978         glGenTextures(1, &priv->palette_texture);
06979 
06980     GL_EXTCALL(glActiveTextureARB(GL_TEXTURE1));
06981     glBindTexture(GL_TEXTURE_1D, priv->palette_texture);
06982 
06983     glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE);
06984 
06985     glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
06986     /* Make sure we have discrete color levels. */
06987     glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
06988     glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
06989     /* Upload the palette */
06990     /* TODO: avoid unneeded uploads in the future by adding some SFLAG_PALETTE_DIRTY mechanism */
06991     glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, 256, 0, GL_RGBA, GL_UNSIGNED_BYTE, table);
06992 
06993     if (gl_info->supported[APPLE_CLIENT_STORAGE])
06994     {
06995         glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE);
06996         checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)");
06997     }
06998 
06999     /* Switch back to unit 0 in which the 2D texture will be stored. */
07000     context_active_texture(context, gl_info, 0);
07001     LEAVE_GL();
07002 }
07003 
07004 /* Context activation is done by the caller. */
07005 static GLuint gen_yuv_shader(struct arbfp_blit_priv *priv, const struct wined3d_gl_info *gl_info,
07006         enum complex_fixup yuv_fixup, GLenum textype)
07007 {
07008     GLenum shader;
07009     struct wined3d_shader_buffer buffer;
07010     char luminance_component;
07011     GLint pos;
07012 
07013     /* Shader header */
07014     if (!shader_buffer_init(&buffer))
07015     {
07016         ERR("Failed to initialize shader buffer.\n");
07017         return 0;
07018     }
07019 
07020     ENTER_GL();
07021     GL_EXTCALL(glGenProgramsARB(1, &shader));
07022     checkGLcall("GL_EXTCALL(glGenProgramsARB(1, &shader))");
07023     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));
07024     checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)");
07025     LEAVE_GL();
07026     if(!shader) {
07027         shader_buffer_free(&buffer);
07028         return 0;
07029     }
07030 
07031     /* The YUY2 and UYVY formats contain two pixels packed into a 32 bit macropixel,
07032      * giving effectively 16 bit per pixel. The color consists of a luminance(Y) and
07033      * two chroma(U and V) values. Each macropixel has two luminance values, one for
07034      * each single pixel it contains, and one U and one V value shared between both
07035      * pixels.
07036      *
07037      * The data is loaded into an A8L8 texture. With YUY2, the luminance component
07038      * contains the luminance and alpha the chroma. With UYVY it is vice versa. Thus
07039      * take the format into account when generating the read swizzles
07040      *
07041      * Reading the Y value is straightforward - just sample the texture. The hardware
07042      * takes care of filtering in the horizontal and vertical direction.
07043      *
07044      * Reading the U and V values is harder. We have to avoid filtering horizontally,
07045      * because that would mix the U and V values of one pixel or two adjacent pixels.
07046      * Thus floor the texture coordinate and add 0.5 to get an unfiltered read,
07047      * regardless of the filtering setting. Vertical filtering works automatically
07048      * though - the U and V values of two rows are mixed nicely.
07049      *
07050      * Appart of avoiding filtering issues, the code has to know which value it just
07051      * read, and where it can find the other one. To determine this, it checks if
07052      * it sampled an even or odd pixel, and shifts the 2nd read accordingly.
07053      *
07054      * Handling horizontal filtering of U and V values requires reading a 2nd pair
07055      * of pixels, extracting U and V and mixing them. This is not implemented yet.
07056      *
07057      * An alternative implementation idea is to load the texture as A8R8G8B8 texture,
07058      * with width / 2. This way one read gives all 3 values, finding U and V is easy
07059      * in an unfiltered situation. Finding the luminance on the other hand requires
07060      * finding out if it is an odd or even pixel. The real drawback of this approach
07061      * is filtering. This would have to be emulated completely in the shader, reading
07062      * up two 2 packed pixels in up to 2 rows and interpolating both horizontally and
07063      * vertically. Beyond that it would require adjustments to the texture handling
07064      * code to deal with the width scaling
07065      */
07066     shader_addline(&buffer, "!!ARBfp1.0\n");
07067     shader_addline(&buffer, "TEMP luminance;\n");
07068     shader_addline(&buffer, "TEMP temp;\n");
07069     shader_addline(&buffer, "TEMP chroma;\n");
07070     shader_addline(&buffer, "TEMP texcrd;\n");
07071     shader_addline(&buffer, "TEMP texcrd2;\n");
07072     shader_addline(&buffer, "PARAM coef = {1.0, 0.5, 2.0, 0.25};\n");
07073     shader_addline(&buffer, "PARAM yuv_coef = {1.403, 0.344, 0.714, 1.770};\n");
07074     shader_addline(&buffer, "PARAM size = program.local[0];\n");
07075 
07076     switch (yuv_fixup)
07077     {
07078         case COMPLEX_FIXUP_UYVY:
07079         case COMPLEX_FIXUP_YUY2:
07080             if (!gen_planar_yuv_read(&buffer, yuv_fixup, textype, &luminance_component))
07081             {
07082                 shader_buffer_free(&buffer);
07083                 return 0;
07084             }
07085             break;
07086 
07087         case COMPLEX_FIXUP_YV12:
07088             if (!gen_yv12_read(&buffer, textype, &luminance_component))
07089             {
07090                 shader_buffer_free(&buffer);
07091                 return 0;
07092             }
07093             break;
07094 
07095         default:
07096             FIXME("Unsupported YUV fixup %#x\n", yuv_fixup);
07097             shader_buffer_free(&buffer);
07098             return 0;
07099     }
07100 
07101     /* Calculate the final result. Formula is taken from
07102      * http://www.fourcc.org/fccyvrgb.php. Note that the chroma
07103      * ranges from -0.5 to 0.5
07104      */
07105     shader_addline(&buffer, "SUB chroma.xy, chroma, coef.y;\n");
07106 
07107     shader_addline(&buffer, "MAD result.color.x, chroma.x, yuv_coef.x, luminance.%c;\n", luminance_component);
07108     shader_addline(&buffer, "MAD temp.x, -chroma.y, yuv_coef.y, luminance.%c;\n", luminance_component);
07109     shader_addline(&buffer, "MAD result.color.y, -chroma.x, yuv_coef.z, temp.x;\n");
07110     shader_addline(&buffer, "MAD result.color.z, chroma.y, yuv_coef.w, luminance.%c;\n", luminance_component);
07111     shader_addline(&buffer, "END\n");
07112 
07113     ENTER_GL();
07114     GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB,
07115             strlen(buffer.buffer), buffer.buffer));
07116     checkGLcall("glProgramStringARB()");
07117 
07118     glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
07119     if (pos != -1)
07120     {
07121         FIXME("Fragment program error at position %d: %s\n\n", pos,
07122               debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
07123         shader_arb_dump_program_source(buffer.buffer);
07124     }
07125     else
07126     {
07127         GLint native;
07128 
07129         GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
07130         checkGLcall("glGetProgramivARB()");
07131         if (!native) WARN("Program exceeds native resource limits.\n");
07132     }
07133 
07134     shader_buffer_free(&buffer);
07135     LEAVE_GL();
07136 
07137     switch (yuv_fixup)
07138     {
07139         case COMPLEX_FIXUP_YUY2:
07140             if (textype == GL_TEXTURE_RECTANGLE_ARB) priv->yuy2_rect_shader = shader;
07141             else priv->yuy2_2d_shader = shader;
07142             break;
07143 
07144         case COMPLEX_FIXUP_UYVY:
07145             if (textype == GL_TEXTURE_RECTANGLE_ARB) priv->uyvy_rect_shader = shader;
07146             else priv->uyvy_2d_shader = shader;
07147             break;
07148 
07149         case COMPLEX_FIXUP_YV12:
07150             if (textype == GL_TEXTURE_RECTANGLE_ARB) priv->yv12_rect_shader = shader;
07151             else priv->yv12_2d_shader = shader;
07152             break;
07153         default:
07154             ERR("Unsupported complex fixup: %d\n", yuv_fixup);
07155     }
07156 
07157     return shader;
07158 }
07159 
07160 /* Context activation is done by the caller. */
07161 static HRESULT arbfp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface)
07162 {
07163     GLenum shader;
07164     float size[4] = {(float) surface->pow2Width, (float) surface->pow2Height, 1.0f, 1.0f};
07165     struct arbfp_blit_priv *priv = blit_priv;
07166     enum complex_fixup fixup;
07167     GLenum textype = surface->texture_target;
07168     const struct wined3d_gl_info *gl_info = context->gl_info;
07169 
07170     if (surface->flags & SFLAG_CONVERTED)
07171     {
07172         ENTER_GL();
07173         glEnable(textype);
07174         checkGLcall("glEnable(textype)");
07175         LEAVE_GL();
07176         return WINED3D_OK;
07177     }
07178 
07179     if (!is_complex_fixup(surface->resource.format->color_fixup))
07180     {
07181         TRACE("Fixup:\n");
07182         dump_color_fixup_desc(surface->resource.format->color_fixup);
07183         /* Don't bother setting up a shader for unconverted formats */
07184         ENTER_GL();
07185         glEnable(textype);
07186         checkGLcall("glEnable(textype)");
07187         LEAVE_GL();
07188         return WINED3D_OK;
07189     }
07190 
07191     fixup = get_complex_fixup(surface->resource.format->color_fixup);
07192 
07193     switch(fixup)
07194     {
07195         case COMPLEX_FIXUP_YUY2:
07196             shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->yuy2_rect_shader : priv->yuy2_2d_shader;
07197             break;
07198 
07199         case COMPLEX_FIXUP_UYVY:
07200             shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->uyvy_rect_shader : priv->uyvy_2d_shader;
07201             break;
07202 
07203         case COMPLEX_FIXUP_YV12:
07204             shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->yv12_rect_shader : priv->yv12_2d_shader;
07205             break;
07206 
07207         case COMPLEX_FIXUP_P8:
07208             shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->p8_rect_shader : priv->p8_2d_shader;
07209             if (!shader) shader = gen_p8_shader(priv, gl_info, textype);
07210 
07211             upload_palette(surface, context);
07212             break;
07213 
07214         default:
07215             FIXME("Unsupported complex fixup %#x, not setting a shader\n", fixup);
07216             ENTER_GL();
07217             glEnable(textype);
07218             checkGLcall("glEnable(textype)");
07219             LEAVE_GL();
07220             return E_NOTIMPL;
07221     }
07222 
07223     if (!shader) shader = gen_yuv_shader(priv, gl_info, fixup, textype);
07224 
07225     ENTER_GL();
07226     glEnable(GL_FRAGMENT_PROGRAM_ARB);
07227     checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)");
07228     GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));
07229     checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)");
07230     GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0, size));
07231     checkGLcall("glProgramLocalParameter4fvARB");
07232     LEAVE_GL();
07233 
07234     return WINED3D_OK;
07235 }
07236 
07237 /* Context activation is done by the caller. */
07238 static void arbfp_blit_unset(const struct wined3d_gl_info *gl_info)
07239 {
07240     ENTER_GL();
07241     glDisable(GL_FRAGMENT_PROGRAM_ARB);
07242     checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
07243     glDisable(GL_TEXTURE_2D);
07244     checkGLcall("glDisable(GL_TEXTURE_2D)");
07245     if (gl_info->supported[ARB_TEXTURE_CUBE_MAP])
07246     {
07247         glDisable(GL_TEXTURE_CUBE_MAP_ARB);
07248         checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
07249     }
07250     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
07251     {
07252         glDisable(GL_TEXTURE_RECTANGLE_ARB);
07253         checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
07254     }
07255     LEAVE_GL();
07256 }
07257 
07258 static BOOL arbfp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op,
07259         const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format,
07260         const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format)
07261 {
07262     enum complex_fixup src_fixup;
07263 
07264     if (!gl_info->supported[ARB_FRAGMENT_PROGRAM])
07265         return FALSE;
07266 
07267     if (blit_op != WINED3D_BLIT_OP_COLOR_BLIT)
07268     {
07269         TRACE("Unsupported blit_op=%d\n", blit_op);
07270         return FALSE;
07271     }
07272 
07273     if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM)
07274         return FALSE;
07275 
07276     src_fixup = get_complex_fixup(src_format->color_fixup);
07277     if (TRACE_ON(d3d_shader) && TRACE_ON(d3d))
07278     {
07279         TRACE("Checking support for fixup:\n");
07280         dump_color_fixup_desc(src_format->color_fixup);
07281     }
07282 
07283     if (!is_identity_fixup(dst_format->color_fixup))
07284     {
07285         TRACE("Destination fixups are not supported\n");
07286         return FALSE;
07287     }
07288 
07289     if (is_identity_fixup(src_format->color_fixup))
07290     {
07291         TRACE("[OK]\n");
07292         return TRUE;
07293     }
07294 
07295      /* We only support YUV conversions. */
07296     if (!is_complex_fixup(src_format->color_fixup))
07297     {
07298         TRACE("[FAILED]\n");
07299         return FALSE;
07300     }
07301 
07302     switch(src_fixup)
07303     {
07304         case COMPLEX_FIXUP_YUY2:
07305         case COMPLEX_FIXUP_UYVY:
07306         case COMPLEX_FIXUP_YV12:
07307         case COMPLEX_FIXUP_P8:
07308             TRACE("[OK]\n");
07309             return TRUE;
07310 
07311         default:
07312             FIXME("Unsupported YUV fixup %#x\n", src_fixup);
07313             TRACE("[FAILED]\n");
07314             return FALSE;
07315     }
07316 }
07317 
07318 HRESULT arbfp_blit_surface(struct wined3d_device *device, DWORD filter,
07319         struct wined3d_surface *src_surface, const RECT *src_rect_in,
07320         struct wined3d_surface *dst_surface, const RECT *dst_rect_in)
07321 {
07322     struct wined3d_context *context;
07323     RECT src_rect = *src_rect_in;
07324     RECT dst_rect = *dst_rect_in;
07325 
07326     /* Now load the surface */
07327     if (wined3d_settings.offscreen_rendering_mode != ORM_FBO
07328             && (src_surface->flags & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)) == SFLAG_INDRAWABLE)
07329     {
07330         /* Without FBO blits transferring from the drawable to the texture is
07331          * expensive, because we have to flip the data in sysmem. Since we can
07332          * flip in the blitter, we don't actually need that flip anyway. So we
07333          * use the surface's texture as scratch texture, and flip the source
07334          * rectangle instead. */
07335         surface_load_fb_texture(src_surface, FALSE);
07336 
07337         src_rect.top = src_surface->resource.height - src_rect.top;
07338         src_rect.bottom = src_surface->resource.height - src_rect.bottom;
07339     }
07340     else
07341         surface_internal_preload(src_surface, SRGB_RGB);
07342 
07343     /* Activate the destination context, set it up for blitting */
07344     context = context_acquire(device, dst_surface);
07345     context_apply_blit_state(context, device);
07346 
07347     if (!surface_is_offscreen(dst_surface))
07348         surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect);
07349 
07350     arbfp_blit_set(device->blit_priv, context, src_surface);
07351 
07352     ENTER_GL();
07353 
07354     /* Draw a textured quad */
07355     draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter);
07356 
07357     LEAVE_GL();
07358 
07359     /* Leave the opengl state valid for blitting */
07360     arbfp_blit_unset(context->gl_info);
07361 
07362     if (wined3d_settings.strict_draw_ordering
07363             || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN
07364             && (dst_surface->container.u.swapchain->front_buffer == dst_surface)))
07365         wglFlush(); /* Flush to ensure ordering across contexts. */
07366 
07367     context_release(context);
07368 
07369     surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE);
07370     return WINED3D_OK;
07371 }
07372 
07373 /* Do not call while under the GL lock. */
07374 static HRESULT arbfp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface,
07375         const RECT *dst_rect, const struct wined3d_color *color)
07376 {
07377     FIXME("Color filling not implemented by arbfp_blit\n");
07378     return WINED3DERR_INVALIDCALL;
07379 }
07380 
07381 /* Do not call while under the GL lock. */
07382 static HRESULT arbfp_blit_depth_fill(struct wined3d_device *device,
07383         struct wined3d_surface *surface, const RECT *rect, float depth)
07384 {
07385     FIXME("Depth filling not implemented by arbfp_blit.\n");
07386     return WINED3DERR_INVALIDCALL;
07387 }
07388 
07389 const struct blit_shader arbfp_blit = {
07390     arbfp_blit_alloc,
07391     arbfp_blit_free,
07392     arbfp_blit_set,
07393     arbfp_blit_unset,
07394     arbfp_blit_supported,
07395     arbfp_blit_color_fill,
07396     arbfp_blit_depth_fill,
07397 };

Generated on Fri May 25 2012 04:20:12 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.