ReactOS Fundraising Campaign 2012
 
€ 4,410 / € 30,000

Information | Donate

Home | Info | Community | Development | myReactOS | Contact Us

  1. Home
  2. Community
  3. Development
  4. myReactOS
  5. Fundraiser 2012

  1. Main Page
  2. Alphabetical List
  3. Data Structures
  4. Directories
  5. File List
  6. Data Fields
  7. Globals
  8. Related Pages

ReactOS Development > Doxygen

glsl_shader.c
Go to the documentation of this file.
00001 /*
00002  * GLSL pixel and vertex shader implementation
00003  *
00004  * Copyright 2006 Jason Green
00005  * Copyright 2006-2007 Henri Verbeet
00006  * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
00007  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
00008  *
00009  * This library is free software; you can redistribute it and/or
00010  * modify it under the terms of the GNU Lesser General Public
00011  * License as published by the Free Software Foundation; either
00012  * version 2.1 of the License, or (at your option) any later version.
00013  *
00014  * This library is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00017  * Lesser General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU Lesser General Public
00020  * License along with this library; if not, write to the Free Software
00021  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
00022  */
00023 
00024 /*
00025  * D3D shader asm has swizzles on source parameters, and write masks for
00026  * destination parameters. GLSL uses swizzles for both. The result of this is
00027  * that for example "mov dst.xw, src.zyxw" becomes "dst.xw = src.zw" in GLSL.
00028  * Ie, to generate a proper GLSL source swizzle, we need to take the D3D write
00029  * mask for the destination parameter into account.
00030  */
00031 
00032 #include "config.h"
00033 #include <limits.h>
00034 #include <stdio.h>
00035 #include "wined3d_private.h"
00036 
00037 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
00038 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants);
00039 WINE_DECLARE_DEBUG_CHANNEL(d3d_caps);
00040 WINE_DECLARE_DEBUG_CHANNEL(d3d);
00041 
00042 #define WINED3D_GLSL_SAMPLE_PROJECTED   0x1
00043 #define WINED3D_GLSL_SAMPLE_RECT        0x2
00044 #define WINED3D_GLSL_SAMPLE_LOD         0x4
00045 #define WINED3D_GLSL_SAMPLE_GRAD        0x8
00046 
00047 struct glsl_dst_param
00048 {
00049     char reg_name[150];
00050     char mask_str[6];
00051 };
00052 
00053 struct glsl_src_param
00054 {
00055     char reg_name[150];
00056     char param_str[200];
00057 };
00058 
00059 struct glsl_sample_function
00060 {
00061     const char *name;
00062     DWORD coord_mask;
00063 };
00064 
00065 enum heap_node_op
00066 {
00067     HEAP_NODE_TRAVERSE_LEFT,
00068     HEAP_NODE_TRAVERSE_RIGHT,
00069     HEAP_NODE_POP,
00070 };
00071 
00072 struct constant_entry
00073 {
00074     unsigned int idx;
00075     unsigned int version;
00076 };
00077 
00078 struct constant_heap
00079 {
00080     struct constant_entry *entries;
00081     unsigned int *positions;
00082     unsigned int size;
00083 };
00084 
00085 /* GLSL shader private data */
00086 struct shader_glsl_priv {
00087     struct wined3d_shader_buffer shader_buffer;
00088     struct wine_rb_tree program_lookup;
00089     struct glsl_shader_prog_link *glsl_program;
00090     struct constant_heap vconst_heap;
00091     struct constant_heap pconst_heap;
00092     unsigned char *stack;
00093     GLhandleARB depth_blt_program_full[tex_type_count];
00094     GLhandleARB depth_blt_program_masked[tex_type_count];
00095     UINT next_constant_version;
00096 };
00097 
00098 /* Struct to maintain data about a linked GLSL program */
00099 struct glsl_shader_prog_link {
00100     struct wine_rb_entry        program_lookup_entry;
00101     struct list                 vshader_entry;
00102     struct list                 pshader_entry;
00103     GLhandleARB                 programId;
00104     GLint                       *vuniformF_locations;
00105     GLint                       *puniformF_locations;
00106     GLint                       vuniformI_locations[MAX_CONST_I];
00107     GLint                       puniformI_locations[MAX_CONST_I];
00108     GLint                       posFixup_location;
00109     GLint                       np2Fixup_location;
00110     GLint                       bumpenvmat_location[MAX_TEXTURES];
00111     GLint                       luminancescale_location[MAX_TEXTURES];
00112     GLint                       luminanceoffset_location[MAX_TEXTURES];
00113     GLint                       ycorrection_location;
00114     GLenum                      vertex_color_clamp;
00115     const struct wined3d_shader *vshader;
00116     const struct wined3d_shader *pshader;
00117     struct vs_compile_args      vs_args;
00118     struct ps_compile_args      ps_args;
00119     UINT                        constant_version;
00120     const struct ps_np2fixup_info *np2Fixup_info;
00121 };
00122 
00123 struct glsl_program_key
00124 {
00125     const struct wined3d_shader *vshader;
00126     const struct wined3d_shader *pshader;
00127     struct ps_compile_args      ps_args;
00128     struct vs_compile_args      vs_args;
00129 };
00130 
00131 struct shader_glsl_ctx_priv {
00132     const struct vs_compile_args    *cur_vs_args;
00133     const struct ps_compile_args    *cur_ps_args;
00134     struct ps_np2fixup_info         *cur_np2fixup_info;
00135 };
00136 
00137 struct glsl_ps_compiled_shader
00138 {
00139     struct ps_compile_args          args;
00140     struct ps_np2fixup_info         np2fixup;
00141     GLhandleARB                     prgId;
00142 };
00143 
00144 struct glsl_pshader_private
00145 {
00146     struct glsl_ps_compiled_shader  *gl_shaders;
00147     UINT                            num_gl_shaders, shader_array_size;
00148 };
00149 
00150 struct glsl_vs_compiled_shader
00151 {
00152     struct vs_compile_args          args;
00153     GLhandleARB                     prgId;
00154 };
00155 
00156 struct glsl_vshader_private
00157 {
00158     struct glsl_vs_compiled_shader  *gl_shaders;
00159     UINT                            num_gl_shaders, shader_array_size;
00160 };
00161 
00162 static const char *debug_gl_shader_type(GLenum type)
00163 {
00164     switch (type)
00165     {
00166 #define WINED3D_TO_STR(u) case u: return #u
00167         WINED3D_TO_STR(GL_VERTEX_SHADER_ARB);
00168         WINED3D_TO_STR(GL_GEOMETRY_SHADER_ARB);
00169         WINED3D_TO_STR(GL_FRAGMENT_SHADER_ARB);
00170 #undef WINED3D_TO_STR
00171         default:
00172             return wine_dbg_sprintf("UNKNOWN(%#x)", type);
00173     }
00174 }
00175 
00176 /* Extract a line from the info log.
00177  * Note that this modifies the source string. */
00178 static char *get_info_log_line(char **ptr)
00179 {
00180     char *p, *q;
00181 
00182     p = *ptr;
00183     if (!(q = strstr(p, "\n")))
00184     {
00185         if (!*p) return NULL;
00186         *ptr += strlen(p);
00187         return p;
00188     }
00189     *q = '\0';
00190     *ptr = q + 1;
00191 
00192     return p;
00193 }
00194 
00196 /* GL locking is done by the caller */
00197 static void print_glsl_info_log(const struct wined3d_gl_info *gl_info, GLhandleARB obj)
00198 {
00199     int infologLength = 0;
00200     char *infoLog;
00201 
00202     if (!WARN_ON(d3d_shader) && !FIXME_ON(d3d_shader))
00203         return;
00204 
00205     GL_EXTCALL(glGetObjectParameterivARB(obj,
00206                GL_OBJECT_INFO_LOG_LENGTH_ARB,
00207                &infologLength));
00208 
00209     /* A size of 1 is just a null-terminated string, so the log should be bigger than
00210      * that if there are errors. */
00211     if (infologLength > 1)
00212     {
00213         char *ptr, *line;
00214 
00215         infoLog = HeapAlloc(GetProcessHeap(), 0, infologLength);
00216         /* The info log is supposed to be zero-terminated, but at least some
00217          * versions of fglrx don't terminate the string properly. The reported
00218          * length does include the terminator, so explicitly set it to zero
00219          * here. */
00220         infoLog[infologLength - 1] = 0;
00221         GL_EXTCALL(glGetInfoLogARB(obj, infologLength, NULL, infoLog));
00222 
00223         ptr = infoLog;
00224         if (gl_info->quirks & WINED3D_QUIRK_INFO_LOG_SPAM)
00225         {
00226             WARN("Info log received from GLSL shader #%u:\n", obj);
00227             while ((line = get_info_log_line(&ptr))) WARN("    %s\n", line);
00228         }
00229         else
00230         {
00231             FIXME("Info log received from GLSL shader #%u:\n", obj);
00232             while ((line = get_info_log_line(&ptr))) FIXME("    %s\n", line);
00233         }
00234         HeapFree(GetProcessHeap(), 0, infoLog);
00235     }
00236 }
00237 
00238 /* GL locking is done by the caller. */
00239 static void shader_glsl_compile(const struct wined3d_gl_info *gl_info, GLhandleARB shader, const char *src)
00240 {
00241     TRACE("Compiling shader object %u.\n", shader);
00242     GL_EXTCALL(glShaderSourceARB(shader, 1, &src, NULL));
00243     checkGLcall("glShaderSourceARB");
00244     GL_EXTCALL(glCompileShaderARB(shader));
00245     checkGLcall("glCompileShaderARB");
00246     print_glsl_info_log(gl_info, shader);
00247 }
00248 
00249 /* GL locking is done by the caller. */
00250 static void shader_glsl_dump_program_source(const struct wined3d_gl_info *gl_info, GLhandleARB program)
00251 {
00252     GLint i, object_count, source_size = -1;
00253     GLhandleARB *objects;
00254     char *source = NULL;
00255 
00256     GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_ATTACHED_OBJECTS_ARB, &object_count));
00257     objects = HeapAlloc(GetProcessHeap(), 0, object_count * sizeof(*objects));
00258     if (!objects)
00259     {
00260         ERR("Failed to allocate object array memory.\n");
00261         return;
00262     }
00263 
00264     GL_EXTCALL(glGetAttachedObjectsARB(program, object_count, NULL, objects));
00265     for (i = 0; i < object_count; ++i)
00266     {
00267         char *ptr, *line;
00268         GLint tmp;
00269 
00270         GL_EXTCALL(glGetObjectParameterivARB(objects[i], GL_OBJECT_SHADER_SOURCE_LENGTH_ARB, &tmp));
00271 
00272         if (source_size < tmp)
00273         {
00274             HeapFree(GetProcessHeap(), 0, source);
00275 
00276             source = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, tmp);
00277             if (!source)
00278             {
00279                 ERR("Failed to allocate %d bytes for shader source.\n", tmp);
00280                 HeapFree(GetProcessHeap(), 0, objects);
00281                 return;
00282             }
00283             source_size = tmp;
00284         }
00285 
00286         FIXME("Object %u:\n", objects[i]);
00287         GL_EXTCALL(glGetObjectParameterivARB(objects[i], GL_OBJECT_SUBTYPE_ARB, &tmp));
00288         FIXME("    GL_OBJECT_SUBTYPE_ARB: %s.\n", debug_gl_shader_type(tmp));
00289         GL_EXTCALL(glGetObjectParameterivARB(objects[i], GL_OBJECT_COMPILE_STATUS_ARB, &tmp));
00290         FIXME("    GL_OBJECT_COMPILE_STATUS_ARB: %d.\n", tmp);
00291         FIXME("\n");
00292 
00293         ptr = source;
00294         GL_EXTCALL(glGetShaderSourceARB(objects[i], source_size, NULL, source));
00295         while ((line = get_info_log_line(&ptr))) FIXME("    %s\n", line);
00296         FIXME("\n");
00297     }
00298 
00299     HeapFree(GetProcessHeap(), 0, source);
00300     HeapFree(GetProcessHeap(), 0, objects);
00301 }
00302 
00303 /* GL locking is done by the caller. */
00304 static void shader_glsl_validate_link(const struct wined3d_gl_info *gl_info, GLhandleARB program)
00305 {
00306     GLint tmp;
00307 
00308     if (!TRACE_ON(d3d_shader) && !FIXME_ON(d3d_shader)) return;
00309 
00310     GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_TYPE_ARB, &tmp));
00311     if (tmp == GL_PROGRAM_OBJECT_ARB)
00312     {
00313         GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_LINK_STATUS_ARB, &tmp));
00314         if (!tmp)
00315         {
00316             FIXME("Program %u link status invalid.\n", program);
00317             shader_glsl_dump_program_source(gl_info, program);
00318         }
00319     }
00320 
00321     print_glsl_info_log(gl_info, program);
00322 }
00323 
00327 /* GL locking is done by the caller */
00328 static void shader_glsl_load_psamplers(const struct wined3d_gl_info *gl_info,
00329         const DWORD *tex_unit_map, GLhandleARB programId)
00330 {
00331     GLint name_loc;
00332     int i;
00333     char sampler_name[20];
00334 
00335     for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) {
00336         snprintf(sampler_name, sizeof(sampler_name), "Psampler%d", i);
00337         name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
00338         if (name_loc != -1) {
00339             DWORD mapped_unit = tex_unit_map[i];
00340             if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < gl_info->limits.fragment_samplers)
00341             {
00342                 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
00343                 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
00344                 checkGLcall("glUniform1iARB");
00345             } else {
00346                 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
00347             }
00348         }
00349     }
00350 }
00351 
00352 /* GL locking is done by the caller */
00353 static void shader_glsl_load_vsamplers(const struct wined3d_gl_info *gl_info,
00354         const DWORD *tex_unit_map, GLhandleARB programId)
00355 {
00356     GLint name_loc;
00357     char sampler_name[20];
00358     int i;
00359 
00360     for (i = 0; i < MAX_VERTEX_SAMPLERS; ++i) {
00361         snprintf(sampler_name, sizeof(sampler_name), "Vsampler%d", i);
00362         name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
00363         if (name_loc != -1) {
00364             DWORD mapped_unit = tex_unit_map[MAX_FRAGMENT_SAMPLERS + i];
00365             if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < gl_info->limits.combined_samplers)
00366             {
00367                 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
00368                 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
00369                 checkGLcall("glUniform1iARB");
00370             } else {
00371                 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
00372             }
00373         }
00374     }
00375 }
00376 
00377 /* GL locking is done by the caller */
00378 static inline void walk_constant_heap(const struct wined3d_gl_info *gl_info, const float *constants,
00379         const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
00380 {
00381     int stack_idx = 0;
00382     unsigned int heap_idx = 1;
00383     unsigned int idx;
00384 
00385     if (heap->entries[heap_idx].version <= version) return;
00386 
00387     idx = heap->entries[heap_idx].idx;
00388     if (constant_locations[idx] != -1) GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
00389     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
00390 
00391     while (stack_idx >= 0)
00392     {
00393         /* Note that we fall through to the next case statement. */
00394         switch(stack[stack_idx])
00395         {
00396             case HEAP_NODE_TRAVERSE_LEFT:
00397             {
00398                 unsigned int left_idx = heap_idx << 1;
00399                 if (left_idx < heap->size && heap->entries[left_idx].version > version)
00400                 {
00401                     heap_idx = left_idx;
00402                     idx = heap->entries[heap_idx].idx;
00403                     if (constant_locations[idx] != -1)
00404                         GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
00405 
00406                     stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
00407                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
00408                     break;
00409                 }
00410             }
00411 
00412             case HEAP_NODE_TRAVERSE_RIGHT:
00413             {
00414                 unsigned int right_idx = (heap_idx << 1) + 1;
00415                 if (right_idx < heap->size && heap->entries[right_idx].version > version)
00416                 {
00417                     heap_idx = right_idx;
00418                     idx = heap->entries[heap_idx].idx;
00419                     if (constant_locations[idx] != -1)
00420                         GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
00421 
00422                     stack[stack_idx++] = HEAP_NODE_POP;
00423                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
00424                     break;
00425                 }
00426             }
00427 
00428             case HEAP_NODE_POP:
00429                 heap_idx >>= 1;
00430                 --stack_idx;
00431                 break;
00432         }
00433     }
00434     checkGLcall("walk_constant_heap()");
00435 }
00436 
00437 /* GL locking is done by the caller */
00438 static inline void apply_clamped_constant(const struct wined3d_gl_info *gl_info, GLint location, const GLfloat *data)
00439 {
00440     GLfloat clamped_constant[4];
00441 
00442     if (location == -1) return;
00443 
00444     clamped_constant[0] = data[0] < -1.0f ? -1.0f : data[0] > 1.0f ? 1.0f : data[0];
00445     clamped_constant[1] = data[1] < -1.0f ? -1.0f : data[1] > 1.0f ? 1.0f : data[1];
00446     clamped_constant[2] = data[2] < -1.0f ? -1.0f : data[2] > 1.0f ? 1.0f : data[2];
00447     clamped_constant[3] = data[3] < -1.0f ? -1.0f : data[3] > 1.0f ? 1.0f : data[3];
00448 
00449     GL_EXTCALL(glUniform4fvARB(location, 1, clamped_constant));
00450 }
00451 
00452 /* GL locking is done by the caller */
00453 static inline void walk_constant_heap_clamped(const struct wined3d_gl_info *gl_info, const float *constants,
00454         const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
00455 {
00456     int stack_idx = 0;
00457     unsigned int heap_idx = 1;
00458     unsigned int idx;
00459 
00460     if (heap->entries[heap_idx].version <= version) return;
00461 
00462     idx = heap->entries[heap_idx].idx;
00463     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
00464     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
00465 
00466     while (stack_idx >= 0)
00467     {
00468         /* Note that we fall through to the next case statement. */
00469         switch(stack[stack_idx])
00470         {
00471             case HEAP_NODE_TRAVERSE_LEFT:
00472             {
00473                 unsigned int left_idx = heap_idx << 1;
00474                 if (left_idx < heap->size && heap->entries[left_idx].version > version)
00475                 {
00476                     heap_idx = left_idx;
00477                     idx = heap->entries[heap_idx].idx;
00478                     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
00479 
00480                     stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
00481                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
00482                     break;
00483                 }
00484             }
00485 
00486             case HEAP_NODE_TRAVERSE_RIGHT:
00487             {
00488                 unsigned int right_idx = (heap_idx << 1) + 1;
00489                 if (right_idx < heap->size && heap->entries[right_idx].version > version)
00490                 {
00491                     heap_idx = right_idx;
00492                     idx = heap->entries[heap_idx].idx;
00493                     apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
00494 
00495                     stack[stack_idx++] = HEAP_NODE_POP;
00496                     stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
00497                     break;
00498                 }
00499             }
00500 
00501             case HEAP_NODE_POP:
00502                 heap_idx >>= 1;
00503                 --stack_idx;
00504                 break;
00505         }
00506     }
00507     checkGLcall("walk_constant_heap_clamped()");
00508 }
00509 
00510 /* Loads floating point constants (aka uniforms) into the currently set GLSL program. */
00511 /* GL locking is done by the caller */
00512 static void shader_glsl_load_constantsF(const struct wined3d_shader *shader, const struct wined3d_gl_info *gl_info,
00513         const float *constants, const GLint *constant_locations, const struct constant_heap *heap,
00514         unsigned char *stack, UINT version)
00515 {
00516     const struct wined3d_shader_lconst *lconst;
00517 
00518     /* 1.X pshaders have the constants clamped to [-1;1] implicitly. */
00519     if (shader->reg_maps.shader_version.major == 1
00520             && shader_is_pshader_version(shader->reg_maps.shader_version.type))
00521         walk_constant_heap_clamped(gl_info, constants, constant_locations, heap, stack, version);
00522     else
00523         walk_constant_heap(gl_info, constants, constant_locations, heap, stack, version);
00524 
00525     if (!shader->load_local_constsF)
00526     {
00527         TRACE("No need to load local float constants for this shader\n");
00528         return;
00529     }
00530 
00531     /* Immediate constants are clamped to [-1;1] at shader creation time if needed */
00532     LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
00533     {
00534         GLint location = constant_locations[lconst->idx];
00535         /* We found this uniform name in the program - go ahead and send the data */
00536         if (location != -1) GL_EXTCALL(glUniform4fvARB(location, 1, (const GLfloat *)lconst->value));
00537     }
00538     checkGLcall("glUniform4fvARB()");
00539 }
00540 
00541 /* Loads integer constants (aka uniforms) into the currently set GLSL program. */
00542 /* GL locking is done by the caller */
00543 static void shader_glsl_load_constantsI(const struct wined3d_shader *shader, const struct wined3d_gl_info *gl_info,
00544         const GLint locations[MAX_CONST_I], const int *constants, WORD constants_set)
00545 {
00546     unsigned int i;
00547     struct list* ptr;
00548 
00549     for (i = 0; constants_set; constants_set >>= 1, ++i)
00550     {
00551         if (!(constants_set & 1)) continue;
00552 
00553         TRACE_(d3d_constants)("Loading constants %u: %i, %i, %i, %i\n",
00554                 i, constants[i*4], constants[i*4+1], constants[i*4+2], constants[i*4+3]);
00555 
00556         /* We found this uniform name in the program - go ahead and send the data */
00557         GL_EXTCALL(glUniform4ivARB(locations[i], 1, &constants[i*4]));
00558         checkGLcall("glUniform4ivARB");
00559     }
00560 
00561     /* Load immediate constants */
00562     ptr = list_head(&shader->constantsI);
00563     while (ptr)
00564     {
00565         const struct wined3d_shader_lconst *lconst = LIST_ENTRY(ptr, const struct wined3d_shader_lconst, entry);
00566         unsigned int idx = lconst->idx;
00567         const GLint *values = (const GLint *)lconst->value;
00568 
00569         TRACE_(d3d_constants)("Loading local constants %i: %i, %i, %i, %i\n", idx,
00570             values[0], values[1], values[2], values[3]);
00571 
00572         /* We found this uniform name in the program - go ahead and send the data */
00573         GL_EXTCALL(glUniform4ivARB(locations[idx], 1, values));
00574         checkGLcall("glUniform4ivARB");
00575         ptr = list_next(&shader->constantsI, ptr);
00576     }
00577 }
00578 
00579 /* Loads boolean constants (aka uniforms) into the currently set GLSL program. */
00580 /* GL locking is done by the caller */
00581 static void shader_glsl_load_constantsB(const struct wined3d_shader *shader, const struct wined3d_gl_info *gl_info,
00582         GLhandleARB programId, const BOOL *constants, WORD constants_set)
00583 {
00584     GLint tmp_loc;
00585     unsigned int i;
00586     char tmp_name[8];
00587     const char *prefix;
00588     struct list* ptr;
00589 
00590     switch (shader->reg_maps.shader_version.type)
00591     {
00592         case WINED3D_SHADER_TYPE_VERTEX:
00593             prefix = "VB";
00594             break;
00595 
00596         case WINED3D_SHADER_TYPE_GEOMETRY:
00597             prefix = "GB";
00598             break;
00599 
00600         case WINED3D_SHADER_TYPE_PIXEL:
00601             prefix = "PB";
00602             break;
00603 
00604         default:
00605             FIXME("Unknown shader type %#x.\n",
00606                     shader->reg_maps.shader_version.type);
00607             prefix = "UB";
00608             break;
00609     }
00610 
00611     /* TODO: Benchmark and see if it would be beneficial to store the
00612      * locations of the constants to avoid looking up each time */
00613     for (i = 0; constants_set; constants_set >>= 1, ++i)
00614     {
00615         if (!(constants_set & 1)) continue;
00616 
00617         TRACE_(d3d_constants)("Loading constants %i: %i;\n", i, constants[i]);
00618 
00619         /* TODO: Benchmark and see if it would be beneficial to store the
00620          * locations of the constants to avoid looking up each time */
00621         snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, i);
00622         tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
00623         if (tmp_loc != -1)
00624         {
00625             /* We found this uniform name in the program - go ahead and send the data */
00626             GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, &constants[i]));
00627             checkGLcall("glUniform1ivARB");
00628         }
00629     }
00630 
00631     /* Load immediate constants */
00632     ptr = list_head(&shader->constantsB);
00633     while (ptr)
00634     {
00635         const struct wined3d_shader_lconst *lconst = LIST_ENTRY(ptr, const struct wined3d_shader_lconst, entry);
00636         unsigned int idx = lconst->idx;
00637         const GLint *values = (const GLint *)lconst->value;
00638 
00639         TRACE_(d3d_constants)("Loading local constants %i: %i\n", idx, values[0]);
00640 
00641         snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, idx);
00642         tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
00643         if (tmp_loc != -1) {
00644             /* We found this uniform name in the program - go ahead and send the data */
00645             GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, values));
00646             checkGLcall("glUniform1ivARB");
00647         }
00648         ptr = list_next(&shader->constantsB, ptr);
00649     }
00650 }
00651 
00652 static void reset_program_constant_version(struct wine_rb_entry *entry, void *context)
00653 {
00654     WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry)->constant_version = 0;
00655 }
00656 
00660 /* GL locking is done by the caller (state handler) */
00661 static void shader_glsl_load_np2fixup_constants(void *shader_priv,
00662         const struct wined3d_gl_info *gl_info, const struct wined3d_state *state)
00663 {
00664     struct shader_glsl_priv *glsl_priv = shader_priv;
00665     const struct glsl_shader_prog_link *prog = glsl_priv->glsl_program;
00666 
00667     /* No GLSL program set - nothing to do. */
00668     if (!prog) return;
00669 
00670     /* NP2 texcoord fixup is (currently) only done for pixelshaders. */
00671     if (!use_ps(state)) return;
00672 
00673     if (prog->ps_args.np2_fixup && prog->np2Fixup_location != -1)
00674     {
00675         UINT i;
00676         UINT fixup = prog->ps_args.np2_fixup;
00677         GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS];
00678 
00679         for (i = 0; fixup; fixup >>= 1, ++i)
00680         {
00681             const struct wined3d_texture *tex = state->textures[i];
00682             const unsigned char idx = prog->np2Fixup_info->idx[i];
00683             GLfloat *tex_dim = &np2fixup_constants[(idx >> 1) * 4];
00684 
00685             if (!tex)
00686             {
00687                 ERR("Nonexistent texture is flagged for NP2 texcoord fixup.\n");
00688                 continue;
00689             }
00690 
00691             if (idx % 2)
00692             {
00693                 tex_dim[2] = tex->pow2_matrix[0];
00694                 tex_dim[3] = tex->pow2_matrix[5];
00695             }
00696             else
00697             {
00698                 tex_dim[0] = tex->pow2_matrix[0];
00699                 tex_dim[1] = tex->pow2_matrix[5];
00700             }
00701         }
00702 
00703         GL_EXTCALL(glUniform4fvARB(prog->np2Fixup_location, prog->np2Fixup_info->num_consts, np2fixup_constants));
00704     }
00705 }
00706 
00710 /* GL locking is done by the caller (state handler) */
00711 static void shader_glsl_load_constants(const struct wined3d_context *context,
00712         char usePixelShader, char useVertexShader)
00713 {
00714     const struct wined3d_gl_info *gl_info = context->gl_info;
00715     struct wined3d_device *device = context->swapchain->device;
00716     struct wined3d_stateblock *stateBlock = device->stateBlock;
00717     const struct wined3d_state *state = &stateBlock->state;
00718     struct shader_glsl_priv *priv = device->shader_priv;
00719     float position_fixup[4];
00720 
00721     GLhandleARB programId;
00722     struct glsl_shader_prog_link *prog = priv->glsl_program;
00723     UINT constant_version;
00724     int i;
00725 
00726     if (!prog) {
00727         /* No GLSL program set - nothing to do. */
00728         return;
00729     }
00730     programId = prog->programId;
00731     constant_version = prog->constant_version;
00732 
00733     if (useVertexShader)
00734     {
00735         const struct wined3d_shader *vshader = state->vertex_shader;
00736 
00737         /* Load DirectX 9 float constants/uniforms for vertex shader */
00738         shader_glsl_load_constantsF(vshader, gl_info, state->vs_consts_f,
00739                 prog->vuniformF_locations, &priv->vconst_heap, priv->stack, constant_version);
00740 
00741         /* Load DirectX 9 integer constants/uniforms for vertex shader */
00742         shader_glsl_load_constantsI(vshader, gl_info, prog->vuniformI_locations, state->vs_consts_i,
00743                 stateBlock->changed.vertexShaderConstantsI & vshader->reg_maps.integer_constants);
00744 
00745         /* Load DirectX 9 boolean constants/uniforms for vertex shader */
00746         shader_glsl_load_constantsB(vshader, gl_info, programId, state->vs_consts_b,
00747                 stateBlock->changed.vertexShaderConstantsB & vshader->reg_maps.boolean_constants);
00748 
00749         /* Upload the position fixup params */
00750         shader_get_position_fixup(context, state, position_fixup);
00751         GL_EXTCALL(glUniform4fvARB(prog->posFixup_location, 1, position_fixup));
00752         checkGLcall("glUniform4fvARB");
00753     }
00754 
00755     if (usePixelShader)
00756     {
00757         const struct wined3d_shader *pshader = state->pixel_shader;
00758 
00759         /* Load DirectX 9 float constants/uniforms for pixel shader */
00760         shader_glsl_load_constantsF(pshader, gl_info, state->ps_consts_f,
00761                 prog->puniformF_locations, &priv->pconst_heap, priv->stack, constant_version);
00762 
00763         /* Load DirectX 9 integer constants/uniforms for pixel shader */
00764         shader_glsl_load_constantsI(pshader, gl_info, prog->puniformI_locations, state->ps_consts_i,
00765                 stateBlock->changed.pixelShaderConstantsI & pshader->reg_maps.integer_constants);
00766 
00767         /* Load DirectX 9 boolean constants/uniforms for pixel shader */
00768         shader_glsl_load_constantsB(pshader, gl_info, programId, state->ps_consts_b,
00769                 stateBlock->changed.pixelShaderConstantsB & pshader->reg_maps.boolean_constants);
00770 
00771         /* Upload the environment bump map matrix if needed. The needsbumpmat member specifies the texture stage to load the matrix from.
00772          * It can't be 0 for a valid texbem instruction.
00773          */
00774         for(i = 0; i < MAX_TEXTURES; i++) {
00775             const float *data;
00776 
00777             if(prog->bumpenvmat_location[i] == -1) continue;
00778 
00779             data = (const float *)&state->texture_states[i][WINED3D_TSS_BUMPENV_MAT00];
00780             GL_EXTCALL(glUniformMatrix2fvARB(prog->bumpenvmat_location[i], 1, 0, data));
00781             checkGLcall("glUniformMatrix2fvARB");
00782 
00783             /* texbeml needs the luminance scale and offset too. If texbeml
00784              * is used, needsbumpmat is set too, so we can check that in the
00785              * needsbumpmat check. */
00786             if (prog->luminancescale_location[i] != -1)
00787             {
00788                 const GLfloat *scale = (const GLfloat *)&state->texture_states[i][WINED3D_TSS_BUMPENV_LSCALE];
00789                 const GLfloat *offset = (const GLfloat *)&state->texture_states[i][WINED3D_TSS_BUMPENV_LOFFSET];
00790 
00791                 GL_EXTCALL(glUniform1fvARB(prog->luminancescale_location[i], 1, scale));
00792                 checkGLcall("glUniform1fvARB");
00793                 GL_EXTCALL(glUniform1fvARB(prog->luminanceoffset_location[i], 1, offset));
00794                 checkGLcall("glUniform1fvARB");
00795             }
00796         }
00797 
00798         if (prog->ycorrection_location != -1)
00799         {
00800             float correction_params[4];
00801 
00802             if (context->render_offscreen)
00803             {
00804                 correction_params[0] = 0.0f;
00805                 correction_params[1] = 1.0f;
00806             } else {
00807                 /* position is window relative, not viewport relative */
00808                 correction_params[0] = (float) context->current_rt->resource.height;
00809                 correction_params[1] = -1.0f;
00810             }
00811             GL_EXTCALL(glUniform4fvARB(prog->ycorrection_location, 1, correction_params));
00812         }
00813     }
00814 
00815     if (priv->next_constant_version == UINT_MAX)
00816     {
00817         TRACE("Max constant version reached, resetting to 0.\n");
00818         wine_rb_for_each_entry(&priv->program_lookup, reset_program_constant_version, NULL);
00819         priv->next_constant_version = 1;
00820     }
00821     else
00822     {
00823         prog->constant_version = priv->next_constant_version++;
00824     }
00825 }
00826 
00827 static void update_heap_entry(const struct constant_heap *heap, unsigned int idx,
00828         unsigned int heap_idx, DWORD new_version)
00829 {
00830     struct constant_entry *entries = heap->entries;
00831     unsigned int *positions = heap->positions;
00832     unsigned int parent_idx;
00833 
00834     while (heap_idx > 1)
00835     {
00836         parent_idx = heap_idx >> 1;
00837 
00838         if (new_version <= entries[parent_idx].version) break;
00839 
00840         entries[heap_idx] = entries[parent_idx];
00841         positions[entries[parent_idx].idx] = heap_idx;
00842         heap_idx = parent_idx;
00843     }
00844 
00845     entries[heap_idx].version = new_version;
00846     entries[heap_idx].idx = idx;
00847     positions[idx] = heap_idx;
00848 }
00849 
00850 static void shader_glsl_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count)
00851 {
00852     struct shader_glsl_priv *priv = device->shader_priv;
00853     struct constant_heap *heap = &priv->vconst_heap;
00854     UINT i;
00855 
00856     for (i = start; i < count + start; ++i)
00857     {
00858         if (!device->stateBlock->changed.vertexShaderConstantsF[i])
00859             update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
00860         else
00861             update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
00862     }
00863 }
00864 
00865 static void shader_glsl_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count)
00866 {
00867     struct shader_glsl_priv *priv = device->shader_priv;
00868     struct constant_heap *heap = &priv->pconst_heap;
00869     UINT i;
00870 
00871     for (i = start; i < count + start; ++i)
00872     {
00873         if (!device->stateBlock->changed.pixelShaderConstantsF[i])
00874             update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
00875         else
00876             update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
00877     }
00878 }
00879 
00880 static unsigned int vec4_varyings(DWORD shader_major, const struct wined3d_gl_info *gl_info)
00881 {
00882     unsigned int ret = gl_info->limits.glsl_varyings / 4;
00883     /* 4.0 shaders do not write clip coords because d3d10 does not support user clipplanes */
00884     if(shader_major > 3) return ret;
00885 
00886     /* 3.0 shaders may need an extra varying for the clip coord on some cards(mostly dx10 ones) */
00887     if (gl_info->quirks & WINED3D_QUIRK_GLSL_CLIP_VARYING) ret -= 1;
00888     return ret;
00889 }
00890 
00892 static void shader_generate_glsl_declarations(const struct wined3d_context *context,
00893         struct wined3d_shader_buffer *buffer, const struct wined3d_shader *shader,
00894         const struct wined3d_shader_reg_maps *reg_maps, const struct shader_glsl_ctx_priv *ctx_priv)
00895 {
00896     const struct wined3d_state *state = &shader->device->stateBlock->state;
00897     const struct ps_compile_args *ps_args = ctx_priv->cur_ps_args;
00898     const struct wined3d_gl_info *gl_info = context->gl_info;
00899     const struct wined3d_fb_state *fb = &shader->device->fb;
00900     unsigned int i, extra_constants_needed = 0;
00901     const struct wined3d_shader_lconst *lconst;
00902     DWORD map;
00903 
00904     /* There are some minor differences between pixel and vertex shaders */
00905     char pshader = shader_is_pshader_version(reg_maps->shader_version.type);
00906     char prefix = pshader ? 'P' : 'V';
00907 
00908     /* Prototype the subroutines */
00909     for (i = 0, map = reg_maps->labels; map; map >>= 1, ++i)
00910     {
00911         if (map & 1) shader_addline(buffer, "void subroutine%u();\n", i);
00912     }
00913 
00914     /* Declare the constants (aka uniforms) */
00915     if (shader->limits.constant_float > 0)
00916     {
00917         unsigned max_constantsF;
00918         /* Unless the shader uses indirect addressing, always declare the maximum array size and ignore that we need some
00919          * uniforms privately. E.g. if GL supports 256 uniforms, and we need 2 for the pos fixup and immediate values, still
00920          * declare VC[256]. If the shader needs more uniforms than we have it won't work in any case. If it uses less, the
00921          * compiler will figure out which uniforms are really used and strip them out. This allows a shader to use c255 on
00922          * a dx9 card, as long as it doesn't also use all the other constants.
00923          *
00924          * If the shader uses indirect addressing the compiler must assume that all declared uniforms are used. In this case,
00925          * declare only the amount that we're assured to have.
00926          *
00927          * Thus we run into problems in these two cases:
00928          * 1) The shader really uses more uniforms than supported
00929          * 2) The shader uses indirect addressing, less constants than supported, but uses a constant index > #supported consts
00930          */
00931         if (pshader)
00932         {
00933             /* No indirect addressing here. */
00934             max_constantsF = gl_info->limits.glsl_ps_float_constants;
00935         }
00936         else
00937         {
00938             if (reg_maps->usesrelconstF)
00939             {
00940                 /* Subtract the other potential uniforms from the max
00941                  * available (bools, ints, and 1 row of projection matrix).
00942                  * Subtract another uniform for immediate values, which have
00943                  * to be loaded via uniform by the driver as well. The shader
00944                  * code only uses 0.5, 2.0, 1.0, 128 and -128 in vertex
00945                  * shader code, so one vec4 should be enough. (Unfortunately
00946                  * the Nvidia driver doesn't store 128 and -128 in one float).
00947                  *
00948                  * Writing gl_ClipVertex requires one uniform for each
00949                  * clipplane as well. */
00950                 max_constantsF = gl_info->limits.glsl_vs_float_constants - 3;
00951                 if(ctx_priv->cur_vs_args->clip_enabled)
00952                 {
00953                     max_constantsF -= gl_info->limits.clipplanes;
00954                 }
00955                 max_constantsF -= count_bits(reg_maps->integer_constants);
00956                 /* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly,
00957                  * so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but
00958                  * for now take this into account when calculating the number of available constants
00959                  */
00960                 max_constantsF -= count_bits(reg_maps->boolean_constants);
00961                 /* Set by driver quirks in directx.c */
00962                 max_constantsF -= gl_info->reserved_glsl_constants;
00963             }
00964             else
00965             {
00966                 max_constantsF = gl_info->limits.glsl_vs_float_constants;
00967             }
00968         }
00969         max_constantsF = min(shader->limits.constant_float, max_constantsF);
00970         shader_addline(buffer, "uniform vec4 %cC[%u];\n", prefix, max_constantsF);
00971     }
00972 
00973     /* Always declare the full set of constants, the compiler can remove the
00974      * unused ones because d3d doesn't (yet) support indirect int and bool
00975      * constant addressing. This avoids problems if the app uses e.g. i0 and i9. */
00976     if (shader->limits.constant_int > 0 && reg_maps->integer_constants)
00977         shader_addline(buffer, "uniform ivec4 %cI[%u];\n", prefix, shader->limits.constant_int);
00978 
00979     if (shader->limits.constant_bool > 0 && reg_maps->boolean_constants)
00980         shader_addline(buffer, "uniform bool %cB[%u];\n", prefix, shader->limits.constant_bool);
00981 
00982     if (!pshader)
00983     {
00984         shader_addline(buffer, "uniform vec4 posFixup;\n");
00985         shader_addline(buffer, "void order_ps_input(in vec4[%u]);\n", MAX_REG_OUTPUT);
00986     }
00987     else
00988     {
00989         for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i)
00990         {
00991             if (!(map & 1)) continue;
00992 
00993             shader_addline(buffer, "uniform mat2 bumpenvmat%d;\n", i);
00994 
00995             if (reg_maps->luminanceparams & (1 << i))
00996             {
00997                 shader_addline(buffer, "uniform float luminancescale%d;\n", i);
00998                 shader_addline(buffer, "uniform float luminanceoffset%d;\n", i);
00999                 extra_constants_needed++;
01000             }
01001 
01002             extra_constants_needed++;
01003         }
01004 
01005         if (ps_args->srgb_correction)
01006         {
01007             shader_addline(buffer, "const vec4 srgb_const0 = vec4(%.8e, %.8e, %.8e, %.8e);\n",
01008                     srgb_pow, srgb_mul_high, srgb_sub_high, srgb_mul_low);
01009             shader_addline(buffer, "const vec4 srgb_const1 = vec4(%.8e, 0.0, 0.0, 0.0);\n",
01010                     srgb_cmp);
01011         }
01012         if (reg_maps->vpos || reg_maps->usesdsy)
01013         {
01014             if (shader->limits.constant_float + extra_constants_needed
01015                     + 1 < gl_info->limits.glsl_ps_float_constants)
01016             {
01017                 shader_addline(buffer, "uniform vec4 ycorrection;\n");
01018                 extra_constants_needed++;
01019             }
01020             else
01021             {
01022                 /* This happens because we do not have proper tracking of the constant registers that are
01023                  * actually used, only the max limit of the shader version
01024                  */
01025                 FIXME("Cannot find a free uniform for vpos correction params\n");
01026                 shader_addline(buffer, "const vec4 ycorrection = vec4(%f, %f, 0.0, 0.0);\n",
01027                         context->render_offscreen ? 0.0f : fb->render_targets[0]->resource.height,
01028                         context->render_offscreen ? 1.0f : -1.0f);
01029             }
01030             shader_addline(buffer, "vec4 vpos;\n");
01031         }
01032     }
01033 
01034     /* Declare texture samplers */
01035     for (i = 0; i < shader->limits.sampler; ++i)
01036     {
01037         if (reg_maps->sampler_type[i])
01038         {
01039             const struct wined3d_texture *texture;
01040 
01041             switch (reg_maps->sampler_type[i])
01042             {
01043                 case WINED3DSTT_1D:
01044                     if (pshader && ps_args->shadow & (1 << i))
01045                         shader_addline(buffer, "uniform sampler1DShadow %csampler%u;\n", prefix, i);
01046                     else
01047                         shader_addline(buffer, "uniform sampler1D %csampler%u;\n", prefix, i);
01048                     break;
01049                 case WINED3DSTT_2D:
01050                     texture = state->textures[i];
01051                     if (pshader && ps_args->shadow & (1 << i))
01052                     {
01053                         if (texture && texture->target == GL_TEXTURE_RECTANGLE_ARB)
01054                             shader_addline(buffer, "uniform sampler2DRectShadow %csampler%u;\n", prefix, i);
01055                         else
01056                             shader_addline(buffer, "uniform sampler2DShadow %csampler%u;\n", prefix, i);
01057                     }
01058                     else
01059                     {
01060                         if (texture && texture->target == GL_TEXTURE_RECTANGLE_ARB)
01061                             shader_addline(buffer, "uniform sampler2DRect %csampler%u;\n", prefix, i);
01062                         else
01063                             shader_addline(buffer, "uniform sampler2D %csampler%u;\n", prefix, i);
01064                     }
01065                     break;
01066                 case WINED3DSTT_CUBE:
01067                     if (pshader && ps_args->shadow & (1 << i)) FIXME("Unsupported Cube shadow sampler.\n");
01068                     shader_addline(buffer, "uniform samplerCube %csampler%u;\n", prefix, i);
01069                     break;
01070                 case WINED3DSTT_VOLUME:
01071                     if (pshader && ps_args->shadow & (1 << i)) FIXME("Unsupported 3D shadow sampler.\n");
01072                     shader_addline(buffer, "uniform sampler3D %csampler%u;\n", prefix, i);
01073                     break;
01074                 default:
01075                     shader_addline(buffer, "uniform unsupported_sampler %csampler%u;\n", prefix, i);
01076                     FIXME("Unrecognized sampler type: %#x\n", reg_maps->sampler_type[i]);
01077                     break;
01078             }
01079         }
01080     }
01081 
01082     /* Declare uniforms for NP2 texcoord fixup:
01083      * This is NOT done inside the loop that declares the texture samplers since the NP2 fixup code
01084      * is currently only used for the GeforceFX series and when forcing the ARB_npot extension off.
01085      * Modern cards just skip the code anyway, so put it inside a separate loop. */
01086     if (pshader && ps_args->np2_fixup) {
01087 
01088         struct ps_np2fixup_info* const fixup = ctx_priv->cur_np2fixup_info;
01089         UINT cur = 0;
01090 
01091         /* NP2/RECT textures in OpenGL use texcoords in the range [0,width]x[0,height]
01092          * while D3D has them in the (normalized) [0,1]x[0,1] range.
01093          * samplerNP2Fixup stores texture dimensions and is updated through
01094          * shader_glsl_load_np2fixup_constants when the sampler changes. */
01095 
01096         for (i = 0; i < shader->limits.sampler; ++i)
01097         {
01098             if (reg_maps->sampler_type[i])
01099             {
01100                 if (!(ps_args->np2_fixup & (1 << i))) continue;
01101 
01102                 if (WINED3DSTT_2D != reg_maps->sampler_type[i]) {
01103                     FIXME("Non-2D texture is flagged for NP2 texcoord fixup.\n");
01104                     continue;
01105                 }
01106 
01107                 fixup->idx[i] = cur++;
01108             }
01109         }
01110 
01111         fixup->num_consts = (cur + 1) >> 1;
01112         shader_addline(buffer, "uniform vec4 %csamplerNP2Fixup[%u];\n", prefix, fixup->num_consts);
01113     }
01114 
01115     /* Declare address variables */
01116     for (i = 0, map = reg_maps->address; map; map >>= 1, ++i)
01117     {
01118         if (map & 1) shader_addline(buffer, "ivec4 A%u;\n", i);
01119     }
01120 
01121     /* Declare texture coordinate temporaries and initialize them */
01122     for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i)
01123     {
01124         if (map & 1) shader_addline(buffer, "vec4 T%u = gl_TexCoord[%u];\n", i, i);
01125     }
01126 
01127     /* Declare input register varyings. Only pixel shader, vertex shaders have that declared in the
01128      * helper function shader that is linked in at link time
01129      */
01130     if (pshader && reg_maps->shader_version.major >= 3)
01131     {
01132         UINT in_count = min(vec4_varyings(reg_maps->shader_version.major, gl_info), shader->limits.packed_input);
01133 
01134         if (use_vs(state))
01135             shader_addline(buffer, "varying vec4 IN[%u];\n", in_count);
01136         else
01137             /* TODO: Write a replacement shader for the fixed function vertex pipeline, so this isn't needed.
01138              * For fixed function vertex processing + 3.0 pixel shader we need a separate function in the
01139              * pixel shader that reads the fixed function color into the packed input registers. */
01140             shader_addline(buffer, "vec4 IN[%u];\n", in_count);
01141     }
01142 
01143     /* Declare output register temporaries */
01144     if (shader->limits.packed_output)
01145         shader_addline(buffer, "vec4 OUT[%u];\n", shader->limits.packed_output);
01146 
01147     /* Declare temporary variables */
01148     for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
01149     {
01150         if (map & 1) shader_addline(buffer, "vec4 R%u;\n", i);
01151     }
01152 
01153     /* Declare attributes */
01154     if (reg_maps->shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
01155     {
01156         for (i = 0, map = reg_maps->input_registers; map; map >>= 1, ++i)
01157         {
01158             if (map & 1) shader_addline(buffer, "attribute vec4 attrib%i;\n", i);
01159         }
01160     }
01161 
01162     /* Declare loop registers aLx */
01163     for (i = 0; i < reg_maps->loop_depth; i++) {
01164         shader_addline(buffer, "int aL%u;\n", i);
01165         shader_addline(buffer, "int tmpInt%u;\n", i);
01166     }
01167 
01168     /* Temporary variables for matrix operations */
01169     shader_addline(buffer, "vec4 tmp0;\n");
01170     shader_addline(buffer, "vec4 tmp1;\n");
01171 
01172     /* Local constants use a different name so they can be loaded once at shader link time
01173      * They can't be hardcoded into the shader text via LC = {x, y, z, w}; because the
01174      * float -> string conversion can cause precision loss.
01175      */
01176     if (!shader->load_local_constsF)
01177     {
01178         LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
01179         {
01180             shader_addline(buffer, "uniform vec4 %cLC%u;\n", prefix, lconst->idx);
01181         }
01182     }
01183 
01184     /* Start the main program */
01185     shader_addline(buffer, "void main() {\n");
01186     if(pshader && reg_maps->vpos) {
01187         /* DirectX apps expect integer values, while OpenGL drivers add approximately 0.5. This causes
01188          * off-by-one problems as spotted by the vPos d3d9 visual test. Unfortunately the ATI cards do
01189          * not add exactly 0.5, but rather something like 0.49999999 or 0.50000001, which still causes
01190          * precision troubles when we just subtract 0.5.
01191          *
01192          * To deal with that just floor() the position. This will eliminate the fraction on all cards.
01193          *
01194          * TODO: Test how that behaves with multisampling once we can enable multisampling in winex11.
01195          *
01196          * An advantage of floor is that it works even if the driver doesn't add 1/2. It is somewhat
01197          * questionable if 1.5, 2.5, ... are the proper values to return in gl_FragCoord, even though
01198          * coordinates specify the pixel centers instead of the pixel corners. This code will behave
01199          * correctly on drivers that returns integer values.
01200          */
01201         shader_addline(buffer, "vpos = floor(vec4(0, ycorrection[0], 0, 0) + gl_FragCoord * vec4(1, ycorrection[1], 1, 1));\n");
01202     }
01203 }
01204 
01205 /*****************************************************************************
01206  * Functions to generate GLSL strings from DirectX Shader bytecode begin here.
01207  *
01208  * For more information, see http://wiki.winehq.org/DirectX-Shaders
01209  ****************************************************************************/
01210 
01211 /* Prototypes */
01212 static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins,
01213         const struct wined3d_shader_src_param *wined3d_src, DWORD mask, struct glsl_src_param *glsl_src);
01214 
01216 static const char * const shift_glsl_tab[] = {
01217     "",           /*  0 (none) */
01218     "2.0 * ",     /*  1 (x2)   */
01219     "4.0 * ",     /*  2 (x4)   */
01220     "8.0 * ",     /*  3 (x8)   */
01221     "16.0 * ",    /*  4 (x16)  */
01222     "32.0 * ",    /*  5 (x32)  */
01223     "",           /*  6 (x64)  */
01224     "",           /*  7 (x128) */
01225     "",           /*  8 (d256) */
01226     "",           /*  9 (d128) */
01227     "",           /* 10 (d64)  */
01228     "",           /* 11 (d32)  */
01229     "0.0625 * ",  /* 12 (d16)  */
01230     "0.125 * ",   /* 13 (d8)   */
01231     "0.25 * ",    /* 14 (d4)   */
01232     "0.5 * "      /* 15 (d2)   */
01233 };
01234 
01235 /* Generate a GLSL parameter that does the input modifier computation and return the input register/mask to use */
01236 static void shader_glsl_gen_modifier(enum wined3d_shader_src_modifier src_modifier,
01237         const char *in_reg, const char *in_regswizzle, char *out_str)
01238 {
01239     out_str[0] = 0;
01240 
01241     switch (src_modifier)
01242     {
01243     case WINED3DSPSM_DZ: /* Need to handle this in the instructions itself (texld & texcrd). */
01244     case WINED3DSPSM_DW:
01245     case WINED3DSPSM_NONE:
01246         sprintf(out_str, "%s%s", in_reg, in_regswizzle);
01247         break;
01248     case WINED3DSPSM_NEG:
01249         sprintf(out_str, "-%s%s", in_reg, in_regswizzle);
01250         break;
01251     case WINED3DSPSM_NOT:
01252         sprintf(out_str, "!%s%s", in_reg, in_regswizzle);
01253         break;
01254     case WINED3DSPSM_BIAS:
01255         sprintf(out_str, "(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
01256         break;
01257     case WINED3DSPSM_BIASNEG:
01258         sprintf(out_str, "-(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
01259         break;
01260     case WINED3DSPSM_SIGN:
01261         sprintf(out_str, "(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
01262         break;
01263     case WINED3DSPSM_SIGNNEG:
01264         sprintf(out_str, "-(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
01265         break;
01266     case WINED3DSPSM_COMP:
01267         sprintf(out_str, "(1.0 - %s%s)", in_reg, in_regswizzle);
01268         break;
01269     case WINED3DSPSM_X2:
01270         sprintf(out_str, "(2.0 * %s%s)", in_reg, in_regswizzle);
01271         break;
01272     case WINED3DSPSM_X2NEG:
01273         sprintf(out_str, "-(2.0 * %s%s)", in_reg, in_regswizzle);
01274         break;
01275     case WINED3DSPSM_ABS:
01276         sprintf(out_str, "abs(%s%s)", in_reg, in_regswizzle);
01277         break;
01278     case WINED3DSPSM_ABSNEG:
01279         sprintf(out_str, "-abs(%s%s)", in_reg, in_regswizzle);
01280         break;
01281     default:
01282         FIXME("Unhandled modifier %u\n", src_modifier);
01283         sprintf(out_str, "%s%s", in_reg, in_regswizzle);
01284     }
01285 }
01286 
01289 static void shader_glsl_get_register_name(const struct wined3d_shader_register *reg,
01290         char *register_name, BOOL *is_color, const struct wined3d_shader_instruction *ins)
01291 {
01292     /* oPos, oFog and oPts in D3D */
01293     static const char * const hwrastout_reg_names[] = {"OUT[10]", "OUT[11].x", "OUT[11].y"};
01294 
01295     const struct wined3d_shader *shader = ins->ctx->shader;
01296     const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
01297     const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
01298     char pshader = shader_is_pshader_version(reg_maps->shader_version.type);
01299 
01300     *is_color = FALSE;
01301 
01302     switch (reg->type)
01303     {
01304         case WINED3DSPR_TEMP:
01305             sprintf(register_name, "R%u", reg->idx);
01306             break;
01307 
01308         case WINED3DSPR_INPUT:
01309             /* vertex shaders */
01310             if (!pshader)
01311             {
01312                 struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
01313                 if (priv->cur_vs_args->swizzle_map & (1 << reg->idx)) *is_color = TRUE;
01314                 sprintf(register_name, "attrib%u", reg->idx);
01315                 break;
01316             }
01317 
01318             /* pixel shaders >= 3.0 */
01319             if (reg_maps->shader_version.major >= 3)
01320             {
01321                 DWORD idx = shader->u.ps.input_reg_map[reg->idx];
01322                 unsigned int in_count = vec4_varyings(reg_maps->shader_version.major, gl_info);
01323 
01324                 if (reg->rel_addr)
01325                 {
01326                     struct glsl_src_param rel_param;
01327 
01328                     shader_glsl_add_src_param(ins, reg->rel_addr, WINED3DSP_WRITEMASK_0, &rel_param);
01329 
01330                     /* Removing a + 0 would be an obvious optimization, but macos doesn't see the NOP
01331                      * operation there */
01332                     if (idx)
01333                     {
01334                         if (shader->u.ps.declared_in_count > in_count)
01335                         {
01336                             sprintf(register_name,
01337                                     "((%s + %u) > %d ? (%s + %u) > %d ? gl_SecondaryColor : gl_Color : IN[%s + %u])",
01338                                     rel_param.param_str, idx, in_count - 1, rel_param.param_str, idx, in_count,
01339                                     rel_param.param_str, idx);
01340                         }
01341                         else
01342                         {
01343                             sprintf(register_name, "IN[%s + %u]", rel_param.param_str, idx);
01344                         }
01345                     }
01346                     else
01347                     {
01348                         if (shader->u.ps.declared_in_count > in_count)
01349                         {
01350                             sprintf(register_name, "((%s) > %d ? (%s) > %d ? gl_SecondaryColor : gl_Color : IN[%s])",
01351                                     rel_param.param_str, in_count - 1, rel_param.param_str, in_count,
01352                                     rel_param.param_str);
01353                         }
01354                         else
01355                         {
01356                             sprintf(register_name, "IN[%s]", rel_param.param_str);
01357                         }
01358                     }
01359                 }
01360                 else
01361                 {
01362                     if (idx == in_count) sprintf(register_name, "gl_Color");
01363                     else if (idx == in_count + 1) sprintf(register_name, "gl_SecondaryColor");
01364                     else sprintf(register_name, "IN[%u]", idx);
01365                 }
01366             }
01367             else
01368             {
01369                 if (!reg->idx) strcpy(register_name, "gl_Color");
01370                 else strcpy(register_name, "gl_SecondaryColor");
01371                 break;
01372             }
01373             break;
01374 
01375         case WINED3DSPR_CONST:
01376             {
01377                 const char prefix = pshader ? 'P' : 'V';
01378 
01379                 /* Relative addressing */
01380                 if (reg->rel_addr)
01381                 {
01382                     struct glsl_src_param rel_param;
01383                     shader_glsl_add_src_param(ins, reg->rel_addr, WINED3DSP_WRITEMASK_0, &rel_param);
01384                     if (reg->idx) sprintf(register_name, "%cC[%s + %u]", prefix, rel_param.param_str, reg->idx);
01385                     else sprintf(register_name, "%cC[%s]", prefix, rel_param.param_str);
01386                 }
01387                 else
01388                 {
01389                     if (shader_constant_is_local(shader, reg->idx))
01390                         sprintf(register_name, "%cLC%u", prefix, reg->idx);
01391                     else
01392                         sprintf(register_name, "%cC[%u]", prefix, reg->idx);
01393                 }
01394             }
01395             break;
01396 
01397         case WINED3DSPR_CONSTINT:
01398             if (pshader) sprintf(register_name, "PI[%u]", reg->idx);
01399             else sprintf(register_name, "VI[%u]", reg->idx);
01400             break;
01401 
01402         case WINED3DSPR_CONSTBOOL:
01403             if (pshader) sprintf(register_name, "PB[%u]", reg->idx);
01404             else sprintf(register_name, "VB[%u]", reg->idx);
01405             break;
01406 
01407         case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
01408             if (pshader) sprintf(register_name, "T%u", reg->idx);
01409             else sprintf(register_name, "A%u", reg->idx);
01410             break;
01411 
01412         case WINED3DSPR_LOOP:
01413             sprintf(register_name, "aL%u", ins->ctx->loop_state->current_reg - 1);
01414             break;
01415 
01416         case WINED3DSPR_SAMPLER:
01417             if (pshader) sprintf(register_name, "Psampler%u", reg->idx);
01418             else sprintf(register_name, "Vsampler%u", reg->idx);
01419             break;
01420 
01421         case WINED3DSPR_COLOROUT:
01422             if (reg->idx >= gl_info->limits.buffers)
01423                 WARN("Write to render target %u, only %d supported.\n", reg->idx, gl_info->limits.buffers);
01424 
01425             sprintf(register_name, "gl_FragData[%u]", reg->idx);
01426             break;
01427 
01428         case WINED3DSPR_RASTOUT:
01429             sprintf(register_name, "%s", hwrastout_reg_names[reg->idx]);
01430             break;
01431 
01432         case WINED3DSPR_DEPTHOUT:
01433             sprintf(register_name, "gl_FragDepth");
01434             break;
01435 
01436         case WINED3DSPR_ATTROUT:
01437             if (!reg->idx) sprintf(register_name, "OUT[8]");
01438             else sprintf(register_name, "OUT[9]");
01439             break;
01440 
01441         case WINED3DSPR_TEXCRDOUT:
01442             /* Vertex shaders >= 3.0: WINED3DSPR_OUTPUT */
01443             sprintf(register_name, "OUT[%u]", reg->idx);
01444             break;
01445 
01446         case WINED3DSPR_MISCTYPE:
01447             if (!reg->idx)
01448             {
01449                 /* vPos */
01450                 sprintf(register_name, "vpos");
01451             }
01452             else if (reg->idx == 1)
01453             {
01454                 /* Note that gl_FrontFacing is a bool, while vFace is
01455                  * a float for which the sign determines front/back */
01456                 sprintf(register_name, "(gl_FrontFacing ? 1.0 : -1.0)");
01457             }
01458             else
01459             {
01460                 FIXME("Unhandled misctype register %d\n", reg->idx);
01461                 sprintf(register_name, "unrecognized_register");
01462             }
01463             break;
01464 
01465         case WINED3DSPR_IMMCONST:
01466             switch (reg->immconst_type)
01467             {
01468                 case WINED3D_IMMCONST_SCALAR:
01469                     sprintf(register_name, "%.8e", *(const float *)reg->immconst_data);
01470                     break;
01471 
01472                 case WINED3D_IMMCONST_VEC4:
01473                     sprintf(register_name, "vec4(%.8e, %.8e, %.8e, %.8e)",
01474                             *(const float *)&reg->immconst_data[0], *(const float *)&reg->immconst_data[1],
01475                             *(const float *)&reg->immconst_data[2], *(const float *)&reg->immconst_data[3]);
01476                     break;
01477 
01478                 default:
01479                     FIXME("Unhandled immconst type %#x\n", reg->immconst_type);
01480                     sprintf(register_name, "<unhandled_immconst_type %#x>", reg->immconst_type);
01481             }
01482             break;
01483 
01484         default:
01485             FIXME("Unhandled register name Type(%d)\n", reg->type);
01486             sprintf(register_name, "unrecognized_register");
01487             break;
01488     }
01489 }
01490 
01491 static void shader_glsl_write_mask_to_str(DWORD write_mask, char *str)
01492 {
01493     *str++ = '.';
01494     if (write_mask & WINED3DSP_WRITEMASK_0) *str++ = 'x';
01495     if (write_mask & WINED3DSP_WRITEMASK_1) *str++ = 'y';
01496     if (write_mask & WINED3DSP_WRITEMASK_2) *str++ = 'z';
01497     if (write_mask & WINED3DSP_WRITEMASK_3) *str++ = 'w';
01498     *str = '\0';
01499 }
01500 
01501 /* Get the GLSL write mask for the destination register */
01502 static DWORD shader_glsl_get_write_mask(const struct wined3d_shader_dst_param *param, char *write_mask)
01503 {
01504     DWORD mask = param->write_mask;
01505 
01506     if (shader_is_scalar(&param->reg))
01507     {
01508         mask = WINED3DSP_WRITEMASK_0;
01509         *write_mask = '\0';
01510     }
01511     else
01512     {
01513         shader_glsl_write_mask_to_str(mask, write_mask);
01514     }
01515 
01516     return mask;
01517 }
01518 
01519 static unsigned int shader_glsl_get_write_mask_size(DWORD write_mask) {
01520     unsigned int size = 0;
01521 
01522     if (write_mask & WINED3DSP_WRITEMASK_0) ++size;
01523     if (write_mask & WINED3DSP_WRITEMASK_1) ++size;
01524     if (write_mask & WINED3DSP_WRITEMASK_2) ++size;
01525     if (write_mask & WINED3DSP_WRITEMASK_3) ++size;
01526 
01527     return size;
01528 }
01529 
01530 static void shader_glsl_swizzle_to_str(const DWORD swizzle, BOOL fixup, DWORD mask, char *str)
01531 {
01532     /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
01533      * but addressed as "rgba". To fix this we need to swap the register's x
01534      * and z components. */
01535     const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
01536 
01537     *str++ = '.';
01538     /* swizzle bits fields: wwzzyyxx */
01539     if (mask & WINED3DSP_WRITEMASK_0) *str++ = swizzle_chars[swizzle & 0x03];
01540     if (mask & WINED3DSP_WRITEMASK_1) *str++ = swizzle_chars[(swizzle >> 2) & 0x03];
01541     if (mask & WINED3DSP_WRITEMASK_2) *str++ = swizzle_chars[(swizzle >> 4) & 0x03];
01542     if (mask & WINED3DSP_WRITEMASK_3) *str++ = swizzle_chars[(swizzle >> 6) & 0x03];
01543     *str = '\0';
01544 }
01545 
01546 static void shader_glsl_get_swizzle(const struct wined3d_shader_src_param *param,
01547         BOOL fixup, DWORD mask, char *swizzle_str)
01548 {
01549     if (shader_is_scalar(&param->reg))
01550         *swizzle_str = '\0';
01551     else
01552         shader_glsl_swizzle_to_str(param->swizzle, fixup, mask, swizzle_str);
01553 }
01554 
01555 /* From a given parameter token, generate the corresponding GLSL string.
01556  * Also, return the actual register name and swizzle in case the
01557  * caller needs this information as well. */
01558 static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins,
01559         const struct wined3d_shader_src_param *wined3d_src, DWORD mask, struct glsl_src_param *glsl_src)
01560 {
01561     BOOL is_color = FALSE;
01562     char swizzle_str[6];
01563 
01564     glsl_src->reg_name[0] = '\0';
01565     glsl_src->param_str[0] = '\0';
01566     swizzle_str[0] = '\0';
01567 
01568     shader_glsl_get_register_name(&wined3d_src->reg, glsl_src->reg_name, &is_color, ins);
01569     shader_glsl_get_swizzle(wined3d_src, is_color, mask, swizzle_str);
01570     shader_glsl_gen_modifier(wined3d_src->modifiers, glsl_src->reg_name, swizzle_str, glsl_src->param_str);
01571 }
01572 
01573 /* From a given parameter token, generate the corresponding GLSL string.
01574  * Also, return the actual register name and swizzle in case the
01575  * caller needs this information as well. */
01576 static DWORD shader_glsl_add_dst_param(const struct wined3d_shader_instruction *ins,
01577         const struct wined3d_shader_dst_param *wined3d_dst, struct glsl_dst_param *glsl_dst)
01578 {
01579     BOOL is_color = FALSE;
01580 
01581     glsl_dst->mask_str[0] = '\0';
01582     glsl_dst->reg_name[0] = '\0';
01583 
01584     shader_glsl_get_register_name(&wined3d_dst->reg, glsl_dst->reg_name, &is_color, ins);
01585     return shader_glsl_get_write_mask(wined3d_dst, glsl_dst->mask_str);
01586 }
01587 
01588 /* Append the destination part of the instruction to the buffer, return the effective write mask */
01589 static DWORD shader_glsl_append_dst_ext(struct wined3d_shader_buffer *buffer,
01590         const struct wined3d_shader_instruction *ins, const struct wined3d_shader_dst_param *dst)
01591 {
01592     struct glsl_dst_param glsl_dst;
01593     DWORD mask;
01594 
01595     mask = shader_glsl_add_dst_param(ins, dst, &glsl_dst);
01596     if (mask) shader_addline(buffer, "%s%s = %s(", glsl_dst.reg_name, glsl_dst.mask_str, shift_glsl_tab[dst->shift]);
01597 
01598     return mask;
01599 }
01600 
01601 /* Append the destination part of the instruction to the buffer, return the effective write mask */
01602 static DWORD shader_glsl_append_dst(struct wined3d_shader_buffer *buffer, const struct wined3d_shader_instruction *ins)
01603 {
01604     return shader_glsl_append_dst_ext(buffer, ins, &ins->dst[0]);
01605 }
01606 
01608 static void shader_glsl_add_instruction_modifiers(const struct wined3d_shader_instruction *ins)
01609 {
01610     struct glsl_dst_param dst_param;
01611     DWORD modifiers;
01612 
01613     if (!ins->dst_count) return;
01614 
01615     modifiers = ins->dst[0].modifiers;
01616     if (!modifiers) return;
01617 
01618     shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
01619 
01620     if (modifiers & WINED3DSPDM_SATURATE)
01621     {
01622         /* _SAT means to clamp the value of the register to between 0 and 1 */
01623         shader_addline(ins->ctx->buffer, "%s%s = clamp(%s%s, 0.0, 1.0);\n", dst_param.reg_name,
01624                 dst_param.mask_str, dst_param.reg_name, dst_param.mask_str);
01625     }
01626 
01627     if (modifiers & WINED3DSPDM_MSAMPCENTROID)
01628     {
01629         FIXME("_centroid modifier not handled\n");
01630     }
01631 
01632     if (modifiers & WINED3DSPDM_PARTIALPRECISION)
01633     {
01634         /* MSDN says this modifier can be safely ignored, so that's what we'll do. */
01635     }
01636 }
01637 
01638 static const char *shader_glsl_get_rel_op(enum wined3d_shader_rel_op op)
01639 {
01640     switch (op)
01641     {
01642         case WINED3D_SHADER_REL_OP_GT: return ">";
01643         case WINED3D_SHADER_REL_OP_EQ: return "==";
01644         case WINED3D_SHADER_REL_OP_GE: return ">=";
01645         case WINED3D_SHADER_REL_OP_LT: return "<";
01646         case WINED3D_SHADER_REL_OP_NE: return "!=";
01647         case WINED3D_SHADER_REL_OP_LE: return "<=";
01648         default:
01649             FIXME("Unrecognized operator %#x.\n", op);
01650             return "(\?\?)";
01651     }
01652 }
01653 
01654 static void shader_glsl_get_sample_function(const struct wined3d_shader_context *ctx,
01655         DWORD sampler_idx, DWORD flags, struct glsl_sample_function *sample_function)
01656 {
01657     enum wined3d_sampler_texture_type sampler_type = ctx->reg_maps->sampler_type[sampler_idx];
01658     const struct wined3d_gl_info *gl_info = ctx->gl_info;
01659     BOOL shadow = shader_is_pshader_version(ctx->reg_maps->shader_version.type)
01660             && (((const struct shader_glsl_ctx_priv *)ctx->backend_data)->cur_ps_args->shadow & (1 << sampler_idx));
01661     BOOL projected = flags & WINED3D_GLSL_SAMPLE_PROJECTED;
01662     BOOL texrect = flags & WINED3D_GLSL_SAMPLE_RECT;
01663     BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD;
01664     BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD;
01665 
01666     /* Note that there's no such thing as a projected cube texture. */
01667     switch(sampler_type) {
01668         case WINED3DSTT_1D:
01669             if (shadow)
01670             {
01671                 if (lod)
01672                 {
01673                     sample_function->name = projected ? "shadow1DProjLod" : "shadow1DLod";
01674                 }
01675                 else if (grad)
01676                 {
01677                     if (gl_info->supported[EXT_GPU_SHADER4])
01678                         sample_function->name = projected ? "shadow1DProjGrad" : "shadow1DGrad";
01679                     else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
01680                         sample_function->name = projected ? "shadow1DProjGradARB" : "shadow1DGradARB";
01681                     else
01682                     {
01683                         FIXME("Unsupported 1D shadow grad function.\n");
01684                         sample_function->name = "unsupported1DGrad";
01685                     }
01686                 }
01687                 else
01688                 {
01689                     sample_function->name = projected ? "shadow1DProj" : "shadow1D";
01690                 }
01691                 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1;
01692             }
01693             else
01694             {
01695                 if (lod)
01696                 {
01697                     sample_function->name = projected ? "texture1DProjLod" : "texture1DLod";
01698                 }
01699                 else if (grad)
01700                 {
01701                     if (gl_info->supported[EXT_GPU_SHADER4])
01702                         sample_function->name = projected ? "texture1DProjGrad" : "texture1DGrad";
01703                     else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
01704                         sample_function->name = projected ? "texture1DProjGradARB" : "texture1DGradARB";
01705                     else
01706                     {
01707                         FIXME("Unsupported 1D grad function.\n");
01708                         sample_function->name = "unsupported1DGrad";
01709                     }
01710                 }
01711                 else
01712                 {
01713                     sample_function->name = projected ? "texture1DProj" : "texture1D";
01714                 }
01715                 sample_function->coord_mask = WINED3DSP_WRITEMASK_0;
01716             }
01717             break;
01718 
01719         case WINED3DSTT_2D:
01720             if (shadow)
01721             {
01722                 if (texrect)
01723                 {
01724                     if (lod)
01725                     {
01726                         sample_function->name = projected ? "shadow2DRectProjLod" : "shadow2DRectLod";
01727                     }
01728                     else if (grad)
01729                     {
01730                         if (gl_info->supported[EXT_GPU_SHADER4])
01731                             sample_function->name = projected ? "shadow2DRectProjGrad" : "shadow2DRectGrad";
01732                         else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
01733                             sample_function->name = projected ? "shadow2DRectProjGradARB" : "shadow2DRectGradARB";
01734                         else
01735                         {
01736                             FIXME("Unsupported RECT shadow grad function.\n");
01737                             sample_function->name = "unsupported2DRectGrad";
01738                         }
01739                     }
01740                     else
01741                     {
01742                         sample_function->name = projected ? "shadow2DRectProj" : "shadow2DRect";
01743                     }
01744                 }
01745                 else
01746                 {
01747                     if (lod)
01748                     {
01749                         sample_function->name = projected ? "shadow2DProjLod" : "shadow2DLod";
01750                     }
01751                     else if (grad)
01752                     {
01753                         if (gl_info->supported[EXT_GPU_SHADER4])
01754                             sample_function->name = projected ? "shadow2DProjGrad" : "shadow2DGrad";
01755                         else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
01756                             sample_function->name = projected ? "shadow2DProjGradARB" : "shadow2DGradARB";
01757                         else
01758                         {
01759                             FIXME("Unsupported 2D shadow grad function.\n");
01760                             sample_function->name = "unsupported2DGrad";
01761                         }
01762                     }
01763                     else
01764                     {
01765                         sample_function->name = projected ? "shadow2DProj" : "shadow2D";
01766                     }
01767                 }
01768                 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
01769             }
01770             else
01771             {
01772                 if (texrect)
01773                 {
01774                     if (lod)
01775                     {
01776                         sample_function->name = projected ? "texture2DRectProjLod" : "texture2DRectLod";
01777                     }
01778                     else if (grad)
01779                     {
01780                         if (gl_info->supported[EXT_GPU_SHADER4])
01781                             sample_function->name = projected ? "texture2DRectProjGrad" : "texture2DRectGrad";
01782                         else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
01783                             sample_function->name = projected ? "texture2DRectProjGradARB" : "texture2DRectGradARB";
01784                         else
01785                         {
01786                             FIXME("Unsupported RECT grad function.\n");
01787                             sample_function->name = "unsupported2DRectGrad";
01788                         }
01789                     }
01790                     else
01791                     {
01792                         sample_function->name = projected ? "texture2DRectProj" : "texture2DRect";
01793                     }
01794                 }
01795                 else
01796                 {
01797                     if (lod)
01798                     {
01799                         sample_function->name = projected ? "texture2DProjLod" : "texture2DLod";
01800                     }
01801                     else if (grad)
01802                     {
01803                         if (gl_info->supported[EXT_GPU_SHADER4])
01804                             sample_function->name = projected ? "texture2DProjGrad" : "texture2DGrad";
01805                         else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
01806                             sample_function->name = projected ? "texture2DProjGradARB" : "texture2DGradARB";
01807                         else
01808                         {
01809                             FIXME("Unsupported 2D grad function.\n");
01810                             sample_function->name = "unsupported2DGrad";
01811                         }
01812                     }
01813                     else
01814                     {
01815                         sample_function->name = projected ? "texture2DProj" : "texture2D";
01816                     }
01817                 }
01818                 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1;
01819             }
01820             break;
01821 
01822         case WINED3DSTT_CUBE:
01823             if (shadow)
01824             {
01825                 FIXME("Unsupported Cube shadow function.\n");
01826                 sample_function->name = "unsupportedCubeShadow";
01827                 sample_function->coord_mask = 0;
01828             }
01829             else
01830             {
01831                 if (lod)
01832                 {
01833                     sample_function->name = "textureCubeLod";
01834                 }
01835                 else if (grad)
01836                 {
01837                     if (gl_info->supported[EXT_GPU_SHADER4])
01838                         sample_function->name = "textureCubeGrad";
01839                     else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
01840                         sample_function->name = "textureCubeGradARB";
01841                     else
01842                     {
01843                         FIXME("Unsupported Cube grad function.\n");
01844                         sample_function->name = "unsupportedCubeGrad";
01845                     }
01846                 }
01847                 else
01848                 {
01849                     sample_function->name = "textureCube";
01850                 }
01851                 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
01852             }
01853             break;
01854 
01855         case WINED3DSTT_VOLUME:
01856             if (shadow)
01857             {
01858                 FIXME("Unsupported 3D shadow function.\n");
01859                 sample_function->name = "unsupported3DShadow";
01860                 sample_function->coord_mask = 0;
01861             }
01862             else
01863             {
01864                 if (lod)
01865                 {
01866                     sample_function->name = projected ? "texture3DProjLod" : "texture3DLod";
01867                 }
01868                 else  if (grad)
01869                 {
01870                     if (gl_info->supported[EXT_GPU_SHADER4])
01871                         sample_function->name = projected ? "texture3DProjGrad" : "texture3DGrad";
01872                     else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
01873                         sample_function->name = projected ? "texture3DProjGradARB" : "texture3DGradARB";
01874                     else
01875                     {
01876                         FIXME("Unsupported 3D grad function.\n");
01877                         sample_function->name = "unsupported3DGrad";
01878                     }
01879                 }
01880                 else
01881                 {
01882                     sample_function->name = projected ? "texture3DProj" : "texture3D";
01883                 }
01884                 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
01885             }
01886             break;
01887 
01888         default:
01889             sample_function->name = "";
01890             sample_function->coord_mask = 0;
01891             FIXME("Unrecognized sampler type: %#x;\n", sampler_type);
01892             break;
01893     }
01894 }
01895 
01896 static void shader_glsl_append_fixup_arg(char *arguments, const char *reg_name,
01897         BOOL sign_fixup, enum fixup_channel_source channel_source)
01898 {
01899     switch(channel_source)
01900     {
01901         case CHANNEL_SOURCE_ZERO:
01902             strcat(arguments, "0.0");
01903             break;
01904 
01905         case CHANNEL_SOURCE_ONE:
01906             strcat(arguments, "1.0");
01907             break;
01908 
01909         case CHANNEL_SOURCE_X:
01910             strcat(arguments, reg_name);
01911             strcat(arguments, ".x");
01912             break;
01913 
01914         case CHANNEL_SOURCE_Y:
01915             strcat(arguments, reg_name);
01916             strcat(arguments, ".y");
01917             break;
01918 
01919         case CHANNEL_SOURCE_Z:
01920             strcat(arguments, reg_name);
01921             strcat(arguments, ".z");
01922             break;
01923 
01924         case CHANNEL_SOURCE_W:
01925             strcat(arguments, reg_name);
01926             strcat(arguments, ".w");
01927             break;
01928 
01929         default:
01930             FIXME("Unhandled channel source %#x\n", channel_source);
01931             strcat(arguments, "undefined");
01932             break;
01933     }
01934 
01935     if (sign_fixup) strcat(arguments, " * 2.0 - 1.0");
01936 }
01937 
01938 static void shader_glsl_color_correction(const struct wined3d_shader_instruction *ins, struct color_fixup_desc fixup)
01939 {
01940     struct wined3d_shader_dst_param dst;
01941     unsigned int mask_size, remaining;
01942     struct glsl_dst_param dst_param;
01943     char arguments[256];
01944     DWORD mask;
01945 
01946     mask = 0;
01947     if (fixup.x_sign_fixup || fixup.x_source != CHANNEL_SOURCE_X) mask |= WINED3DSP_WRITEMASK_0;
01948     if (fixup.y_sign_fixup || fixup.y_source != CHANNEL_SOURCE_Y) mask |= WINED3DSP_WRITEMASK_1;
01949     if (fixup.z_sign_fixup || fixup.z_source != CHANNEL_SOURCE_Z) mask |= WINED3DSP_WRITEMASK_2;
01950     if (fixup.w_sign_fixup || fixup.w_source != CHANNEL_SOURCE_W) mask |= WINED3DSP_WRITEMASK_3;
01951     mask &= ins->dst[0].write_mask;
01952 
01953     if (!mask) return; /* Nothing to do */
01954 
01955     if (is_complex_fixup(fixup))
01956     {
01957         enum complex_fixup complex_fixup = get_complex_fixup(fixup);
01958         FIXME("Complex fixup (%#x) not supported\n",complex_fixup);
01959         return;
01960     }
01961 
01962     mask_size = shader_glsl_get_write_mask_size(mask);
01963 
01964     dst = ins->dst[0];
01965     dst.write_mask = mask;
01966     shader_glsl_add_dst_param(ins, &dst, &dst_param);
01967 
01968     arguments[0] = '\0';
01969     remaining = mask_size;
01970     if (mask & WINED3DSP_WRITEMASK_0)
01971     {
01972         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.x_sign_fixup, fixup.x_source);
01973         if (--remaining) strcat(arguments, ", ");
01974     }
01975     if (mask & WINED3DSP_WRITEMASK_1)
01976     {
01977         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.y_sign_fixup, fixup.y_source);
01978         if (--remaining) strcat(arguments, ", ");
01979     }
01980     if (mask & WINED3DSP_WRITEMASK_2)
01981     {
01982         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.z_sign_fixup, fixup.z_source);
01983         if (--remaining) strcat(arguments, ", ");
01984     }
01985     if (mask & WINED3DSP_WRITEMASK_3)
01986     {
01987         shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.w_sign_fixup, fixup.w_source);
01988         if (--remaining) strcat(arguments, ", ");
01989     }
01990 
01991     if (mask_size > 1)
01992     {
01993         shader_addline(ins->ctx->buffer, "%s%s = vec%u(%s);\n",
01994                 dst_param.reg_name, dst_param.mask_str, mask_size, arguments);
01995     }
01996     else
01997     {
01998         shader_addline(ins->ctx->buffer, "%s%s = %s;\n", dst_param.reg_name, dst_param.mask_str, arguments);
01999     }
02000 }
02001 
02002 static void PRINTF_ATTR(8, 9) shader_glsl_gen_sample_code(const struct wined3d_shader_instruction *ins,
02003         DWORD sampler, const struct glsl_sample_function *sample_function, DWORD swizzle,
02004         const char *dx, const char *dy, const char *bias, const char *coord_reg_fmt, ...)
02005 {
02006     const char *sampler_base;
02007     char dst_swizzle[6];
02008     struct color_fixup_desc fixup;
02009     BOOL np2_fixup = FALSE;
02010     va_list args;
02011 
02012     shader_glsl_swizzle_to_str(swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle);
02013 
02014     if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
02015     {
02016         const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
02017         fixup = priv->cur_ps_args->color_fixup[sampler];
02018         sampler_base = "Psampler";
02019 
02020         if(priv->cur_ps_args->np2_fixup & (1 << sampler)) {
02021             if(bias) {
02022                 FIXME("Biased sampling from NP2 textures is unsupported\n");
02023             } else {
02024                 np2_fixup = TRUE;
02025             }
02026         }
02027     } else {
02028         sampler_base = "Vsampler";
02029         fixup = COLOR_FIXUP_IDENTITY; /* FIXME: Vshader color fixup */
02030     }
02031 
02032     shader_glsl_append_dst(ins->ctx->buffer, ins);
02033 
02034     shader_addline(ins->ctx->buffer, "%s(%s%u, ", sample_function->name, sampler_base, sampler);
02035 
02036     va_start(args, coord_reg_fmt);
02037     shader_vaddline(ins->ctx->buffer, coord_reg_fmt, args);
02038     va_end(args);
02039 
02040     if(bias) {
02041         shader_addline(ins->ctx->buffer, ", %s)%s);\n", bias, dst_swizzle);
02042     } else {
02043         if (np2_fixup) {
02044             const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
02045             const unsigned char idx = priv->cur_np2fixup_info->idx[sampler];
02046 
02047             shader_addline(ins->ctx->buffer, " * PsamplerNP2Fixup[%u].%s)%s);\n", idx >> 1,
02048                            (idx % 2) ? "zw" : "xy", dst_swizzle);
02049         } else if(dx && dy) {
02050             shader_addline(ins->ctx->buffer, ", %s, %s)%s);\n", dx, dy, dst_swizzle);
02051         } else {
02052             shader_addline(ins->ctx->buffer, ")%s);\n", dst_swizzle);
02053         }
02054     }
02055 
02056     if(!is_identity_fixup(fixup)) {
02057         shader_glsl_color_correction(ins, fixup);
02058     }
02059 }
02060 
02061 /*****************************************************************************
02062  * Begin processing individual instruction opcodes
02063  ****************************************************************************/
02064 
02065 /* Generate GLSL arithmetic functions (dst = src1 + src2) */
02066 static void shader_glsl_arith(const struct wined3d_shader_instruction *ins)
02067 {
02068     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02069     struct glsl_src_param src0_param;
02070     struct glsl_src_param src1_param;
02071     DWORD write_mask;
02072     char op;
02073 
02074     /* Determine the GLSL operator to use based on the opcode */
02075     switch (ins->handler_idx)
02076     {
02077         case WINED3DSIH_MUL: op = '*'; break;
02078         case WINED3DSIH_ADD: op = '+'; break;
02079         case WINED3DSIH_SUB: op = '-'; break;
02080         default:
02081             op = ' ';
02082             FIXME("Opcode %#x not yet handled in GLSL\n", ins->handler_idx);
02083             break;
02084     }
02085 
02086     write_mask = shader_glsl_append_dst(buffer, ins);
02087     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
02088     shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
02089     shader_addline(buffer, "%s %c %s);\n", src0_param.param_str, op, src1_param.param_str);
02090 }
02091 
02092 /* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */
02093 static void shader_glsl_mov(const struct wined3d_shader_instruction *ins)
02094 {
02095     const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
02096     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02097     struct glsl_src_param src0_param;
02098     DWORD write_mask;
02099 
02100     write_mask = shader_glsl_append_dst(buffer, ins);
02101     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
02102 
02103     /* In vs_1_1 WINED3DSIO_MOV can write to the address register. In later
02104      * shader versions WINED3DSIO_MOVA is used for this. */
02105     if (ins->ctx->reg_maps->shader_version.major == 1
02106             && !shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)
02107             && ins->dst[0].reg.type == WINED3DSPR_ADDR)
02108     {
02109         /* This is a simple floor() */
02110         unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
02111         if (mask_size > 1) {
02112             shader_addline(buffer, "ivec%d(floor(%s)));\n", mask_size, src0_param.param_str);
02113         } else {
02114             shader_addline(buffer, "int(floor(%s)));\n", src0_param.param_str);
02115         }
02116     }
02117     else if(ins->handler_idx == WINED3DSIH_MOVA)
02118     {
02119         /* We need to *round* to the nearest int here. */
02120         unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
02121 
02122         if (gl_info->supported[EXT_GPU_SHADER4])
02123         {
02124             if (mask_size > 1)
02125                 shader_addline(buffer, "ivec%d(round(%s)));\n", mask_size, src0_param.param_str);
02126             else
02127                 shader_addline(buffer, "int(round(%s)));\n", src0_param.param_str);
02128         }
02129         else
02130         {
02131             if (mask_size > 1)
02132                 shader_addline(buffer, "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s)));\n",
02133                         mask_size, src0_param.param_str, mask_size, src0_param.param_str);
02134             else
02135                 shader_addline(buffer, "int(floor(abs(%s) + 0.5) * sign(%s)));\n",
02136                         src0_param.param_str, src0_param.param_str);
02137         }
02138     }
02139     else
02140     {
02141         shader_addline(buffer, "%s);\n", src0_param.param_str);
02142     }
02143 }
02144 
02145 /* Process the dot product operators DP3 and DP4 in GLSL (dst = dot(src0, src1)) */
02146 static void shader_glsl_dot(const struct wined3d_shader_instruction *ins)
02147 {
02148     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02149     struct glsl_src_param src0_param;
02150     struct glsl_src_param src1_param;
02151     DWORD dst_write_mask, src_write_mask;
02152     unsigned int dst_size = 0;
02153 
02154     dst_write_mask = shader_glsl_append_dst(buffer, ins);
02155     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
02156 
02157     /* dp3 works on vec3, dp4 on vec4 */
02158     if (ins->handler_idx == WINED3DSIH_DP4)
02159     {
02160         src_write_mask = WINED3DSP_WRITEMASK_ALL;
02161     } else {
02162         src_write_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
02163     }
02164 
02165     shader_glsl_add_src_param(ins, &ins->src[0], src_write_mask, &src0_param);
02166     shader_glsl_add_src_param(ins, &ins->src[1], src_write_mask, &src1_param);
02167 
02168     if (dst_size > 1) {
02169         shader_addline(buffer, "vec%d(dot(%s, %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
02170     } else {
02171         shader_addline(buffer, "dot(%s, %s));\n", src0_param.param_str, src1_param.param_str);
02172     }
02173 }
02174 
02175 /* Note that this instruction has some restrictions. The destination write mask
02176  * can't contain the w component, and the source swizzles have to be .xyzw */
02177 static void shader_glsl_cross(const struct wined3d_shader_instruction *ins)
02178 {
02179     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
02180     struct glsl_src_param src0_param;
02181     struct glsl_src_param src1_param;
02182     char dst_mask[6];
02183 
02184     shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
02185     shader_glsl_append_dst(ins->ctx->buffer, ins);
02186     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
02187     shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param);
02188     shader_addline(ins->ctx->buffer, "cross(%s, %s)%s);\n", src0_param.param_str, src1_param.param_str, dst_mask);
02189 }
02190 
02191 /* Process the WINED3DSIO_POW instruction in GLSL (dst = |src0|^src1)
02192  * Src0 and src1 are scalars. Note that D3D uses the absolute of src0, while
02193  * GLSL uses the value as-is. */
02194 static void shader_glsl_pow(const struct wined3d_shader_instruction *ins)
02195 {
02196     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02197     struct glsl_src_param src0_param;
02198     struct glsl_src_param src1_param;
02199     DWORD dst_write_mask;
02200     unsigned int dst_size;
02201 
02202     dst_write_mask = shader_glsl_append_dst(buffer, ins);
02203     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
02204 
02205     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
02206     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
02207 
02208     if (dst_size > 1)
02209     {
02210         shader_addline(buffer, "vec%u(%s == 0.0 ? 1.0 : pow(abs(%s), %s)));\n",
02211                 dst_size, src1_param.param_str, src0_param.param_str, src1_param.param_str);
02212     }
02213     else
02214     {
02215         shader_addline(buffer, "%s == 0.0 ? 1.0 : pow(abs(%s), %s));\n",
02216                 src1_param.param_str, src0_param.param_str, src1_param.param_str);
02217     }
02218 }
02219 
02220 /* Process the WINED3DSIO_LOG instruction in GLSL (dst = log2(|src0|))
02221  * Src0 is a scalar. Note that D3D uses the absolute of src0, while
02222  * GLSL uses the value as-is. */
02223 static void shader_glsl_log(const struct wined3d_shader_instruction *ins)
02224 {
02225     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02226     struct glsl_src_param src0_param;
02227     DWORD dst_write_mask;
02228     unsigned int dst_size;
02229 
02230     dst_write_mask = shader_glsl_append_dst(buffer, ins);
02231     dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
02232 
02233     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
02234 
02235     if (dst_size > 1)
02236     {
02237         shader_addline(buffer, "vec%u(log2(abs(%s))));\n",
02238                 dst_size, src0_param.param_str);
02239     }
02240     else
02241     {
02242         shader_addline(buffer, "log2(abs(%s)));\n",
02243                 src0_param.param_str);
02244     }
02245 }
02246 
02247 /* Map the opcode 1-to-1 to the GL code (arg->dst = instruction(src0, src1, ...) */
02248 static void shader_glsl_map2gl(const struct wined3d_shader_instruction *ins)
02249 {
02250     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02251     struct glsl_src_param src_param;
02252     const char *instruction;
02253     DWORD write_mask;
02254     unsigned i;
02255 
02256     /* Determine the GLSL function to use based on the opcode */
02257     /* TODO: Possibly make this a table for faster lookups */
02258     switch (ins->handler_idx)
02259     {
02260         case WINED3DSIH_MIN: instruction = "min"; break;
02261         case WINED3DSIH_MAX: instruction = "max"; break;
02262         case WINED3DSIH_ABS: instruction = "abs"; break;
02263         case WINED3DSIH_FRC: instruction = "fract"; break;
02264         case WINED3DSIH_EXP: instruction = "exp2"; break;
02265         case WINED3DSIH_DSX: instruction = "dFdx"; break;
02266         case WINED3DSIH_DSY: instruction = "ycorrection.y * dFdy"; break;
02267         default: instruction = "";
02268             FIXME("Opcode %#x not yet handled in GLSL\n", ins->handler_idx);
02269             break;
02270     }
02271 
02272     write_mask = shader_glsl_append_dst(buffer, ins);
02273 
02274     shader_addline(buffer, "%s(", instruction);
02275 
02276     if (ins->src_count)
02277     {
02278         shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
02279         shader_addline(buffer, "%s", src_param.param_str);
02280         for (i = 1; i < ins->src_count; ++i)
02281         {
02282             shader_glsl_add_src_param(ins, &ins->src[i], write_mask, &src_param);
02283             shader_addline(buffer, ", %s", src_param.param_str);
02284         }
02285     }
02286 
02287     shader_addline(buffer, "));\n");
02288 }
02289 
02290 static void shader_glsl_nrm(const struct wined3d_shader_instruction *ins)
02291 {
02292     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02293     struct glsl_src_param src_param;
02294     unsigned int mask_size;
02295     DWORD write_mask;
02296     char dst_mask[6];
02297 
02298     write_mask = shader_glsl_get_write_mask(ins->dst, dst_mask);
02299     mask_size = shader_glsl_get_write_mask_size(write_mask);
02300     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
02301 
02302     shader_addline(buffer, "tmp0.x = dot(%s, %s);\n",
02303             src_param.param_str, src_param.param_str);
02304     shader_glsl_append_dst(buffer, ins);
02305 
02306     if (mask_size > 1)
02307     {
02308         shader_addline(buffer, "tmp0.x == 0.0 ? vec%u(0.0) : (%s * inversesqrt(tmp0.x)));\n",
02309                 mask_size, src_param.param_str);
02310     }
02311     else
02312     {
02313         shader_addline(buffer, "tmp0.x == 0.0 ? 0.0 : (%s * inversesqrt(tmp0.x)));\n",
02314                 src_param.param_str);
02315     }
02316 }
02317 
02327 static void shader_glsl_expp(const struct wined3d_shader_instruction *ins)
02328 {
02329     struct glsl_src_param src_param;
02330 
02331     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src_param);
02332 
02333     if (ins->ctx->reg_maps->shader_version.major < 2)
02334     {
02335         char dst_mask[6];
02336 
02337         shader_addline(ins->ctx->buffer, "tmp0.x = exp2(floor(%s));\n", src_param.param_str);
02338         shader_addline(ins->ctx->buffer, "tmp0.y = %s - floor(%s);\n", src_param.param_str, src_param.param_str);
02339         shader_addline(ins->ctx->buffer, "tmp0.z = exp2(%s);\n", src_param.param_str);
02340         shader_addline(ins->ctx->buffer, "tmp0.w = 1.0;\n");
02341 
02342         shader_glsl_append_dst(ins->ctx->buffer, ins);
02343         shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
02344         shader_addline(ins->ctx->buffer, "tmp0%s);\n", dst_mask);
02345     } else {
02346         DWORD write_mask;
02347         unsigned int mask_size;
02348 
02349         write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
02350         mask_size = shader_glsl_get_write_mask_size(write_mask);
02351 
02352         if (mask_size > 1) {
02353             shader_addline(ins->ctx->buffer, "vec%d(exp2(%s)));\n", mask_size, src_param.param_str);
02354         } else {
02355             shader_addline(ins->ctx->buffer, "exp2(%s));\n", src_param.param_str);
02356         }
02357     }
02358 }
02359 
02361 static void shader_glsl_rcp(const struct wined3d_shader_instruction *ins)
02362 {
02363     struct glsl_src_param src_param;
02364     DWORD write_mask;
02365     unsigned int mask_size;
02366 
02367     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
02368     mask_size = shader_glsl_get_write_mask_size(write_mask);
02369     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src_param);
02370 
02371     if (mask_size > 1)
02372     {
02373         shader_addline(ins->ctx->buffer, "vec%u(1.0 / %s));\n",
02374                 mask_size, src_param.param_str);
02375     }
02376     else
02377     {
02378         shader_addline(ins->ctx->buffer, "1.0 / %s);\n",
02379                 src_param.param_str);
02380     }
02381 }
02382 
02383 static void shader_glsl_rsq(const struct wined3d_shader_instruction *ins)
02384 {
02385     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
02386     struct glsl_src_param src_param;
02387     DWORD write_mask;
02388     unsigned int mask_size;
02389 
02390     write_mask = shader_glsl_append_dst(buffer, ins);
02391     mask_size = shader_glsl_get_write_mask_size(write_mask);
02392 
02393     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src_param);
02394 
02395     if (mask_size > 1)
02396     {
02397         shader_addline(buffer, "vec%u(inversesqrt(abs(%s))));\n",
02398                 mask_size, src_param.param_str);
02399     }
02400     else
02401     {
02402         shader_addline(buffer, "inversesqrt(abs(%s)));\n",
02403                 src_param.param_str);
02404     }
02405 }
02406 
02408 static void shader_glsl_compare(const struct wined3d_shader_instruction *ins)
02409 {
02410     struct glsl_src_param src0_param;
02411     struct glsl_src_param src1_param;
02412     DWORD write_mask;
02413     unsigned int mask_size;
02414 
02415     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
02416     mask_size = shader_glsl_get_write_mask_size(write_mask);
02417     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
02418     shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
02419 
02420     if (mask_size > 1) {
02421         const char *compare;
02422 
02423         switch(ins->handler_idx)
02424         {
02425             case WINED3DSIH_SLT: compare = "lessThan"; break;
02426             case WINED3DSIH_SGE: compare = "greaterThanEqual"; break;
02427             default: compare = "";
02428                 FIXME("Can't handle opcode %#x\n", ins->handler_idx);
02429         }
02430 
02431         shader_addline(ins->ctx->buffer, "vec%d(%s(%s, %s)));\n", mask_size, compare,
02432                 src0_param.param_str, src1_param.param_str);
02433     } else {
02434         switch(ins->handler_idx)
02435         {
02436             case WINED3DSIH_SLT:
02437                 /* Step(src0, src1) is not suitable here because if src0 == src1 SLT is supposed,
02438                  * to return 0.0 but step returns 1.0 because step is not < x
02439                  * An alternative is a bvec compare padded with an unused second component.
02440                  * step(src1 * -1.0, src0 * -1.0) is not an option because it suffers from the same
02441                  * issue. Playing with not() is not possible either because not() does not accept
02442                  * a scalar.
02443                  */
02444                 shader_addline(ins->ctx->buffer, "(%s < %s) ? 1.0 : 0.0);\n",
02445                         src0_param.param_str, src1_param.param_str);
02446                 break;
02447             case WINED3DSIH_SGE:
02448                 /* Here we can use the step() function and safe a conditional */
02449                 shader_addline(ins->ctx->buffer, "step(%s, %s));\n", src1_param.param_str, src0_param.param_str);
02450                 break;
02451             default:
02452                 FIXME("Can't handle opcode %#x\n", ins->handler_idx);
02453         }
02454 
02455     }
02456 }
02457 
02459 static void shader_glsl_cmp(const struct wined3d_shader_instruction *ins)
02460 {
02461     struct glsl_src_param src0_param;
02462     struct glsl_src_param src1_param;
02463     struct glsl_src_param src2_param;
02464     DWORD write_mask, cmp_channel = 0;
02465     unsigned int i, j;
02466     char mask_char[6];
02467     BOOL temp_destination = FALSE;
02468 
02469     if (shader_is_scalar(&ins->src[0].reg))
02470     {
02471         write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
02472 
02473         shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
02474         shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
02475         shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
02476 
02477         shader_addline(ins->ctx->buffer, "%s >= 0.0 ? %s : %s);\n",
02478                        src0_param.param_str, src1_param.param_str, src2_param.param_str);
02479     } else {
02480         DWORD dst_mask = ins->dst[0].write_mask;
02481         struct wined3d_shader_dst_param dst = ins->dst[0];
02482 
02483         /* Cycle through all source0 channels */
02484         for (i=0; i<4; i++) {
02485             write_mask = 0;
02486             /* Find the destination channels which use the current source0 channel */
02487             for (j=0; j<4; j++) {
02488                 if (((ins->src[0].swizzle >> (2 * j)) & 0x3) == i)
02489                 {
02490                     write_mask |= WINED3DSP_WRITEMASK_0 << j;
02491                     cmp_channel = WINED3DSP_WRITEMASK_0 << j;
02492                 }
02493             }
02494             dst.write_mask = dst_mask & write_mask;
02495 
02496             /* Splitting the cmp instruction up in multiple lines imposes a problem:
02497             * The first lines may overwrite source parameters of the following lines.
02498             * Deal with that by using a temporary destination register if needed
02499             */
02500             if ((ins->src[0].reg.idx == ins->dst[0].reg.idx
02501                     && ins->src[0].reg.type == ins->dst[0].reg.type)
02502                     || (ins->src[1].reg.idx == ins->dst[0].reg.idx
02503                     && ins->src[1].reg.type == ins->dst[0].reg.type)
02504                     || (ins->src[2].reg.idx == ins->dst[0].reg.idx
02505                     && ins->src[2].reg.type == ins->dst[0].reg.type))
02506             {
02507                 write_mask = shader_glsl_get_write_mask(&dst, mask_char);
02508                 if (!write_mask) continue;
02509                 shader_addline(ins->ctx->buffer, "tmp0%s = (", mask_char);
02510                 temp_destination = TRUE;
02511             } else {
02512                 write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst);
02513                 if (!write_mask) continue;
02514             }
02515 
02516             shader_glsl_add_src_param(ins, &ins->src[0], cmp_channel, &src0_param);
02517             shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
02518             shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
02519 
02520             shader_addline(ins->ctx->buffer, "%s >= 0.0 ? %s : %s);\n",
02521                         src0_param.param_str, src1_param.param_str, src2_param.param_str);
02522         }
02523 
02524         if(temp_destination) {
02525             shader_glsl_get_write_mask(&ins->dst[0], mask_char);
02526             shader_glsl_append_dst(ins->ctx->buffer, ins);
02527             shader_addline(ins->ctx->buffer, "tmp0%s);\n", mask_char);
02528         }
02529     }
02530 
02531 }
02532 
02534 /* For ps 1.1-1.3, only a single component of src0 is used. For ps 1.4
02535  * the compare is done per component of src0. */
02536 static void shader_glsl_cnd(const struct wined3d_shader_instruction *ins)
02537 {
02538     struct wined3d_shader_dst_param dst;
02539     struct glsl_src_param src0_param;
02540     struct glsl_src_param src1_param;
02541     struct glsl_src_param src2_param;
02542     DWORD write_mask, cmp_channel = 0;
02543     unsigned int i, j;
02544     DWORD dst_mask;
02545     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
02546             ins->ctx->reg_maps->shader_version.minor);
02547 
02548     if (shader_version < WINED3D_SHADER_VERSION(1, 4))
02549     {
02550         write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
02551         shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
02552         shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
02553         shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
02554 
02555         /* Fun: The D3DSI_COISSUE flag changes the semantic of the cnd instruction for < 1.4 shaders */
02556         if (ins->coissue)
02557         {
02558             shader_addline(ins->ctx->buffer, "%s /* COISSUE! */);\n", src1_param.param_str);
02559         } else {
02560             shader_addline(ins->ctx->buffer, "%s > 0.5 ? %s : %s);\n",
02561                     src0_param.param_str, src1_param.param_str, src2_param.param_str);
02562         }
02563         return;
02564     }
02565     /* Cycle through all source0 channels */
02566     dst_mask = ins->dst[0].write_mask;
02567     dst = ins->dst[0];
02568     for (i=0; i<4; i++) {
02569         write_mask = 0;
02570         /* Find the destination channels which use the current source0 channel */
02571         for (j=0; j<4; j++) {
02572             if (((ins->src[0].swizzle >> (2 * j)) & 0x3) == i)
02573             {
02574                 write_mask |= WINED3DSP_WRITEMASK_0 << j;
02575                 cmp_channel = WINED3DSP_WRITEMASK_0 << j;
02576             }
02577         }
02578 
02579         dst.write_mask = dst_mask & write_mask;
02580         write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst);
02581         if (!write_mask) continue;
02582 
02583         shader_glsl_add_src_param(ins, &ins->src[0], cmp_channel, &src0_param);
02584         shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
02585         shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
02586 
02587         shader_addline(ins->ctx->buffer, "%s > 0.5 ? %s : %s);\n",
02588                 src0_param.param_str, src1_param.param_str, src2_param.param_str);
02589     }
02590 }
02591 
02593 static void shader_glsl_mad(const struct wined3d_shader_instruction *ins)
02594 {
02595     struct glsl_src_param src0_param;
02596     struct glsl_src_param src1_param;
02597     struct glsl_src_param src2_param;
02598     DWORD write_mask;
02599 
02600     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
02601     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
02602     shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
02603     shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
02604     shader_addline(ins->ctx->buffer, "(%s * %s) + %s);\n",
02605             src0_param.param_str, src1_param.param_str, src2_param.param_str);
02606 }
02607 
02608 /* Handles transforming all WINED3DSIO_M?x? opcodes for
02609    Vertex shaders to GLSL codes */
02610 static void shader_glsl_mnxn(const struct wined3d_shader_instruction *ins)
02611 {
02612     int i;
02613     int nComponents = 0;
02614     struct wined3d_shader_dst_param tmp_dst = {{0}};
02615     struct wined3d_shader_src_param tmp_src[2] = {{{0}}};
02616     struct wined3d_shader_instruction tmp_ins;
02617 
02618     memset(&tmp_ins, 0, sizeof(tmp_ins));
02619 
02620     /* Set constants for the temporary argument */
02621     tmp_ins.ctx = ins->ctx;
02622     tmp_ins.dst_count = 1;
02623     tmp_ins.dst = &tmp_dst;
02624     tmp_ins.src_count = 2;
02625     tmp_ins.src = tmp_src;
02626 
02627     switch(ins->handler_idx)
02628     {
02629         case WINED3DSIH_M4x4:
02630             nComponents = 4;
02631             tmp_ins.handler_idx = WINED3DSIH_DP4;
02632             break;
02633         case WINED3DSIH_M4x3:
02634             nComponents = 3;
02635             tmp_ins.handler_idx = WINED3DSIH_DP4;
02636             break;
02637         case WINED3DSIH_M3x4:
02638             nComponents = 4;
02639             tmp_ins.handler_idx = WINED3DSIH_DP3;
02640             break;
02641         case WINED3DSIH_M3x3:
02642             nComponents = 3;
02643             tmp_ins.handler_idx = WINED3DSIH_DP3;
02644             break;
02645         case WINED3DSIH_M3x2:
02646             nComponents = 2;
02647             tmp_ins.handler_idx = WINED3DSIH_DP3;
02648             break;
02649         default:
02650             break;
02651     }
02652 
02653     tmp_dst = ins->dst[0];
02654     tmp_src[0] = ins->src[0];
02655     tmp_src[1] = ins->src[1];
02656     for (i = 0; i < nComponents; ++i)
02657     {
02658         tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i;
02659         shader_glsl_dot(&tmp_ins);
02660         ++tmp_src[1].reg.idx;
02661     }
02662 }
02663 
02670 static void shader_glsl_lrp(const struct wined3d_shader_instruction *ins)
02671 {
02672     struct glsl_src_param src0_param;
02673     struct glsl_src_param src1_param;
02674     struct glsl_src_param src2_param;
02675     DWORD write_mask;
02676 
02677     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
02678 
02679     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
02680     shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
02681     shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
02682 
02683     shader_addline(ins->ctx->buffer, "mix(%s, %s, %s));\n",
02684             src2_param.param_str, src1_param.param_str, src0_param.param_str);
02685 }
02686 
02693 static void shader_glsl_lit(const struct wined3d_shader_instruction *ins)
02694 {
02695     struct glsl_src_param src0_param;
02696     struct glsl_src_param src1_param;
02697     struct glsl_src_param src3_param;
02698     char dst_mask[6];
02699 
02700     shader_glsl_append_dst(ins->ctx->buffer, ins);
02701     shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
02702 
02703     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
02704     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_1, &src1_param);
02705     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src3_param);
02706 
02707     /* The sdk specifies the instruction like this
02708      * dst.x = 1.0;
02709      * if(src.x > 0.0) dst.y = src.x
02710      * else dst.y = 0.0.
02711      * if(src.x > 0.0 && src.y > 0.0) dst.z = pow(src.y, power);
02712      * else dst.z = 0.0;
02713      * dst.w = 1.0;
02714      * (where power = src.w clamped between -128 and 128)
02715      *
02716      * Obviously that has quite a few conditionals in it which we don't like. So the first step is this:
02717      * dst.x = 1.0                                  ... No further explanation needed
02718      * dst.y = max(src.y, 0.0);                     ... If x < 0.0, use 0.0, otherwise x. Same as the conditional
02719      * dst.z = x > 0.0 ? pow(max(y, 0.0), p) : 0;   ... 0 ^ power is 0, and otherwise we use y anyway
02720      * dst.w = 1.0.                                 ... Nothing fancy.
02721      *
02722      * So we still have one conditional in there. So do this:
02723      * dst.z = pow(max(0.0, src.y) * step(0.0, src.x), power);
02724      *
02725      * step(0.0, x) will return 1 if src.x > 0.0, and 0 otherwise. So if y is 0 we get pow(0.0 * 1.0, power),
02726      * which sets dst.z to 0. If y > 0, but x = 0.0, we get pow(y * 0.0, power), which results in 0 too.
02727      * if both x and y are > 0, we get pow(y * 1.0, power), as it is supposed to.
02728      *
02729      * Unfortunately pow(0.0 ^ 0.0) returns NaN on most GPUs, but lit with src.y = 0 and src.w = 0 returns
02730      * a non-NaN value in dst.z. What we return doesn't matter, as long as it is not NaN. Return 0, which is
02731      * what all Windows HW drivers and GL_ARB_vertex_program's LIT do.
02732      */
02733     shader_addline(ins->ctx->buffer,
02734             "vec4(1.0, max(%s, 0.0), %s == 0.0 ? 0.0 : "
02735             "pow(max(0.0, %s) * step(0.0, %s), clamp(%s, -128.0, 128.0)), 1.0)%s);\n",
02736             src0_param.param_str, src3_param.param_str, src1_param.param_str,
02737             src0_param.param_str, src3_param.param_str, dst_mask);
02738 }
02739 
02746 static void shader_glsl_dst(const struct wined3d_shader_instruction *ins)
02747 {
02748     struct glsl_src_param src0y_param;
02749     struct glsl_src_param src0z_param;
02750     struct glsl_src_param src1y_param;
02751     struct glsl_src_param src1w_param;
02752     char dst_mask[6];
02753 
02754     shader_glsl_append_dst(ins->ctx->buffer, ins);
02755     shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
02756 
02757     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_1, &src0y_param);
02758     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &src0z_param);
02759     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_1, &src1y_param);
02760     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_3, &src1w_param);
02761 
02762     shader_addline(ins->ctx->buffer, "vec4(1.0, %s * %s, %s, %s))%s;\n",
02763             src0y_param.param_str, src1y_param.param_str, src0z_param.param_str, src1w_param.param_str, dst_mask);
02764 }
02765 
02775 static void shader_glsl_sincos(const struct wined3d_shader_instruction *ins)
02776 {
02777     struct glsl_src_param src0_param;
02778     DWORD write_mask;
02779 
02780     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
02781     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
02782 
02783     switch (write_mask) {
02784         case WINED3DSP_WRITEMASK_0:
02785             shader_addline(ins->ctx->buffer, "cos(%s));\n", src0_param.param_str);
02786             break;
02787 
02788         case WINED3DSP_WRITEMASK_1:
02789             shader_addline(ins->ctx->buffer, "sin(%s));\n", src0_param.param_str);
02790             break;
02791 
02792         case (WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1):
02793             shader_addline(ins->ctx->buffer, "vec2(cos(%s), sin(%s)));\n", src0_param.param_str, src0_param.param_str);
02794             break;
02795 
02796         default:
02797             ERR("Write mask should be .x, .y or .xy\n");
02798             break;
02799     }
02800 }
02801 
02802 /* sgn in vs_2_0 has 2 extra parameters(registers for temporary storage) which we don't use
02803  * here. But those extra parameters require a dedicated function for sgn, since map2gl would
02804  * generate invalid code
02805  */
02806 static void shader_glsl_sgn(const struct wined3d_shader_instruction *ins)
02807 {
02808     struct glsl_src_param src0_param;
02809     DWORD write_mask;
02810 
02811     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
02812     shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
02813 
02814     shader_addline(ins->ctx->buffer, "sign(%s));\n", src0_param.param_str);
02815 }
02816 
02822 /* FIXME: I don't think nested loops will work correctly this way. */
02823 static void shader_glsl_loop(const struct wined3d_shader_instruction *ins)
02824 {
02825     struct wined3d_shader_loop_state *loop_state = ins->ctx->loop_state;
02826     const struct wined3d_shader *shader = ins->ctx->shader;
02827     const struct wined3d_shader_lconst *constant;
02828     struct glsl_src_param src1_param;
02829     const DWORD *control_values = NULL;
02830 
02831     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_ALL, &src1_param);
02832 
02833     /* Try to hardcode the loop control parameters if possible. Direct3D 9 class hardware doesn't support real
02834      * varying indexing, but Microsoft designed this feature for Shader model 2.x+. If the loop control is
02835      * known at compile time, the GLSL compiler can unroll the loop, and replace indirect addressing with direct
02836      * addressing.
02837      */
02838     if (ins->src[1].reg.type == WINED3DSPR_CONSTINT)
02839     {
02840         LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry)
02841         {
02842             if (constant->idx == ins->src[1].reg.idx)
02843             {
02844                 control_values = constant->value;
02845                 break;
02846             }
02847         }
02848     }
02849 
02850     if (control_values)
02851     {
02852         struct wined3d_shader_loop_control loop_control;
02853         loop_control.count = control_values[0];
02854         loop_control.start = control_values[1];
02855         loop_control.step = (int)control_values[2];
02856 
02857         if (loop_control.step > 0)
02858         {
02859             shader_addline(ins->ctx->buffer, "for (aL%u = %u; aL%u < (%u * %d + %u); aL%u += %d) {\n",
02860                     loop_state->current_depth, loop_control.start,
02861                     loop_state->current_depth, loop_control.count, loop_control.step, loop_control.start,
02862                     loop_state->current_depth, loop_control.step);
02863         }
02864         else if (loop_control.step < 0)
02865         {
02866             shader_addline(ins->ctx->buffer, "for (aL%u = %u; aL%u > (%u * %d + %u); aL%u += %d) {\n",
02867                     loop_state->current_depth, loop_control.start,
02868                     loop_state->current_depth, loop_control.count, loop_control.step, loop_control.start,
02869                     loop_state->current_depth, loop_control.step);
02870         }
02871         else
02872         {
02873             shader_addline(ins->ctx->buffer, "for (aL%u = %u, tmpInt%u = 0; tmpInt%u < %u; tmpInt%u++) {\n",
02874                     loop_state->current_depth, loop_control.start, loop_state->current_depth,
02875                     loop_state->current_depth, loop_control.count,
02876                     loop_state->current_depth);
02877         }
02878     } else {
02879         shader_addline(ins->ctx->buffer,
02880                 "for (tmpInt%u = 0, aL%u = %s.y; tmpInt%u < %s.x; tmpInt%u++, aL%u += %s.z) {\n",
02881                 loop_state->current_depth, loop_state->current_reg,
02882                 src1_param.reg_name, loop_state->current_depth, src1_param.reg_name,
02883                 loop_state->current_depth, loop_state->current_reg, src1_param.reg_name);
02884     }
02885 
02886     ++loop_state->current_depth;
02887     ++loop_state->current_reg;
02888 }
02889 
02890 static void shader_glsl_end(const struct wined3d_shader_instruction *ins)
02891 {
02892     struct wined3d_shader_loop_state *loop_state = ins->ctx->loop_state;
02893 
02894     shader_addline(ins->ctx->buffer, "}\n");
02895 
02896     if (ins->handler_idx == WINED3DSIH_ENDLOOP)
02897     {
02898         --loop_state->current_depth;
02899         --loop_state->current_reg;
02900     }
02901 
02902     if (ins->handler_idx == WINED3DSIH_ENDREP)
02903     {
02904         --loop_state->current_depth;
02905     }
02906 }
02907 
02908 static void shader_glsl_rep(const struct wined3d_shader_instruction *ins)
02909 {
02910     const struct wined3d_shader *shader = ins->ctx->shader;
02911     struct wined3d_shader_loop_state *loop_state = ins->ctx->loop_state;
02912     const struct wined3d_shader_lconst *constant;
02913     struct glsl_src_param src0_param;
02914     const DWORD *control_values = NULL;
02915 
02916     /* Try to hardcode local values to help the GLSL compiler to unroll and optimize the loop */
02917     if (ins->src[0].reg.type == WINED3DSPR_CONSTINT)
02918     {
02919         LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry)
02920         {
02921             if (constant->idx == ins->src[0].reg.idx)
02922             {
02923                 control_values = constant->value;
02924                 break;
02925             }
02926         }
02927     }
02928 
02929     if (control_values)
02930     {
02931         shader_addline(ins->ctx->buffer, "for (tmpInt%d = 0; tmpInt%d < %d; tmpInt%d++) {\n",
02932                 loop_state->current_depth, loop_state->current_depth,
02933                 control_values[0], loop_state->current_depth);
02934     }
02935     else
02936     {
02937         shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
02938         shader_addline(ins->ctx->buffer, "for (tmpInt%d = 0; tmpInt%d < %s; tmpInt%d++) {\n",
02939                 loop_state->current_depth, loop_state->current_depth,
02940                 src0_param.param_str, loop_state->current_depth);
02941     }
02942 
02943     ++loop_state->current_depth;
02944 }
02945 
02946 static void shader_glsl_if(const struct wined3d_shader_instruction *ins)
02947 {
02948     struct glsl_src_param src0_param;
02949 
02950     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
02951     shader_addline(ins->ctx->buffer, "if (%s) {\n", src0_param.param_str);
02952 }
02953 
02954 static void shader_glsl_ifc(const struct wined3d_shader_instruction *ins)
02955 {
02956     struct glsl_src_param src0_param;
02957     struct glsl_src_param src1_param;
02958 
02959     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
02960     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
02961 
02962     shader_addline(ins->ctx->buffer, "if (%s %s %s) {\n",
02963             src0_param.param_str, shader_glsl_get_rel_op(ins->flags), src1_param.param_str);
02964 }
02965 
02966 static void shader_glsl_else(const struct wined3d_shader_instruction *ins)
02967 {
02968     shader_addline(ins->ctx->buffer, "} else {\n");
02969 }
02970 
02971 static void shader_glsl_break(const struct wined3d_shader_instruction *ins)
02972 {
02973     shader_addline(ins->ctx->buffer, "break;\n");
02974 }
02975 
02976 /* FIXME: According to MSDN the compare is done per component. */
02977 static void shader_glsl_breakc(const struct wined3d_shader_instruction *ins)
02978 {
02979     struct glsl_src_param src0_param;
02980     struct glsl_src_param src1_param;
02981 
02982     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
02983     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
02984 
02985     shader_addline(ins->ctx->buffer, "if (%s %s %s) break;\n",
02986             src0_param.param_str, shader_glsl_get_rel_op(ins->flags), src1_param.param_str);
02987 }
02988 
02989 static void shader_glsl_label(const struct wined3d_shader_instruction *ins)
02990 {
02991     shader_addline(ins->ctx->buffer, "}\n");
02992     shader_addline(ins->ctx->buffer, "void subroutine%u () {\n",  ins->src[0].reg.idx);
02993 }
02994 
02995 static void shader_glsl_call(const struct wined3d_shader_instruction *ins)
02996 {
02997     shader_addline(ins->ctx->buffer, "subroutine%u();\n", ins->src[0].reg.idx);
02998 }
02999 
03000 static void shader_glsl_callnz(const struct wined3d_shader_instruction *ins)
03001 {
03002     struct glsl_src_param src1_param;
03003 
03004     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
03005     shader_addline(ins->ctx->buffer, "if (%s) subroutine%u();\n", src1_param.param_str, ins->src[0].reg.idx);
03006 }
03007 
03008 static void shader_glsl_ret(const struct wined3d_shader_instruction *ins)
03009 {
03010     /* No-op. The closing } is written when a new function is started, and at the end of the shader. This
03011      * function only suppresses the unhandled instruction warning
03012      */
03013 }
03014 
03015 /*********************************************
03016  * Pixel Shader Specific Code begins here
03017  ********************************************/
03018 static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
03019 {
03020     const struct wined3d_shader *shader = ins->ctx->shader;
03021     struct wined3d_device *device = shader->device;
03022     DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
03023             ins->ctx->reg_maps->shader_version.minor);
03024     struct glsl_sample_function sample_function;
03025     const struct wined3d_texture *texture;
03026     DWORD sample_flags = 0;
03027     DWORD sampler_idx;
03028     DWORD mask = 0, swizzle;
03029 
03030     /* 1.0-1.4: Use destination register as sampler source.
03031      * 2.0+: Use provided sampler source. */
03032     if (shader_version < WINED3D_SHADER_VERSION(2,0)) sampler_idx = ins->dst[0].reg.idx;
03033     else sampler_idx = ins->src[1].reg.idx;
03034     texture = device->stateBlock->state.textures[sampler_idx];
03035 
03036     if (shader_version < WINED3D_SHADER_VERSION(1,4))
03037     {
03038         const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
03039         DWORD flags = (priv->cur_ps_args->tex_transform >> sampler_idx * WINED3D_PSARGS_TEXTRANSFORM_SHIFT)
03040                 & WINED3D_PSARGS_TEXTRANSFORM_MASK;
03041         enum wined3d_sampler_texture_type sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
03042 
03043         /* Projected cube textures don't make a lot of sense, the resulting coordinates stay the same. */
03044         if (flags & WINED3D_PSARGS_PROJECTED && sampler_type != WINED3DSTT_CUBE)
03045         {
03046             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
03047             switch (flags & ~WINED3D_PSARGS_PROJECTED)
03048             {
03049                 case WINED3D_TTFF_COUNT1:
03050                     FIXME("WINED3D_TTFF_PROJECTED with WINED3D_TTFF_COUNT1?\n");
03051                     break;
03052                 case WINED3D_TTFF_COUNT2:
03053                     mask = WINED3DSP_WRITEMASK_1;
03054                     break;
03055                 case WINED3D_TTFF_COUNT3:
03056                     mask = WINED3DSP_WRITEMASK_2;
03057                     break;
03058                 case WINED3D_TTFF_COUNT4:
03059                 case WINED3D_TTFF_DISABLE:
03060                     mask = WINED3DSP_WRITEMASK_3;
03061                     break;
03062             }
03063         }
03064     }
03065     else if (shader_version < WINED3D_SHADER_VERSION(2,0))
03066     {
03067         enum wined3d_shader_src_modifier src_mod = ins->src[0].modifiers;
03068 
03069         if (src_mod == WINED3DSPSM_DZ) {
03070             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
03071             mask = WINED3DSP_WRITEMASK_2;
03072         } else if (src_mod == WINED3DSPSM_DW) {
03073             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
03074             mask = WINED3DSP_WRITEMASK_3;
03075         }
03076     } else {
03077         if (ins->flags & WINED3DSI_TEXLD_PROJECT)
03078         {
03079             /* ps 2.0 texldp instruction always divides by the fourth component. */
03080             sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
03081             mask = WINED3DSP_WRITEMASK_3;
03082         }
03083     }
03084 
03085     if (texture && texture->target == GL_TEXTURE_RECTANGLE_ARB)
03086         sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
03087 
03088     shader_glsl_get_sample_function(ins->ctx, sampler_idx, sample_flags, &sample_function);
03089     mask |= sample_function.coord_mask;
03090 
03091     if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE;
03092     else swizzle = ins->src[1].swizzle;
03093 
03094     /* 1.0-1.3: Use destination register as coordinate source.
03095        1.4+: Use provided coordinate source register. */
03096     if (shader_version < WINED3D_SHADER_VERSION(1,4))
03097     {
03098         char coord_mask[6];
03099         shader_glsl_write_mask_to_str(mask, coord_mask);
03100         shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL,
03101                 "T%u%s", sampler_idx, coord_mask);
03102     }
03103     else
03104     {
03105         struct glsl_src_param coord_param;
03106         shader_glsl_add_src_param(ins, &ins->src[0], mask, &coord_param);
03107         if (ins->flags & WINED3DSI_TEXLD_BIAS)
03108         {
03109             struct glsl_src_param bias;
03110             shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &bias);
03111             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, bias.param_str,
03112                     "%s", coord_param.param_str);
03113         } else {
03114             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL,
03115                     "%s", coord_param.param_str);
03116         }
03117     }
03118 }
03119 
03120 static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins)
03121 {
03122     const struct wined3d_shader *shader = ins->ctx->shader;
03123     struct wined3d_device *device = shader->device;
03124     const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
03125     struct glsl_src_param coord_param, dx_param, dy_param;
03126     DWORD sample_flags = WINED3D_GLSL_SAMPLE_GRAD;
03127     struct glsl_sample_function sample_function;
03128     DWORD sampler_idx;
03129     DWORD swizzle = ins->src[1].swizzle;
03130     const struct wined3d_texture *texture;
03131 
03132     if (!gl_info->supported[ARB_SHADER_TEXTURE_LOD] && !gl_info->supported[EXT_GPU_SHADER4])
03133     {
03134         FIXME("texldd used, but not supported by hardware. Falling back to regular tex\n");
03135         shader_glsl_tex(ins);
03136         return;
03137     }
03138 
03139     sampler_idx = ins->src[1].reg.idx;
03140     texture = device->stateBlock->state.textures[sampler_idx];
03141     if (texture && texture->target == GL_TEXTURE_RECTANGLE_ARB)
03142         sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
03143 
03144     shader_glsl_get_sample_function(ins->ctx, sampler_idx, sample_flags, &sample_function);
03145     shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
03146     shader_glsl_add_src_param(ins, &ins->src[2], sample_function.coord_mask, &dx_param);
03147     shader_glsl_add_src_param(ins, &ins->src[3], sample_function.coord_mask, &dy_param);
03148 
03149     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, dx_param.param_str, dy_param.param_str, NULL,
03150                                 "%s", coord_param.param_str);
03151 }
03152 
03153 static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins)
03154 {
03155     const struct wined3d_shader *shader = ins->ctx->shader;
03156     struct wined3d_device *device = shader->device;
03157     const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
03158     struct glsl_src_param coord_param, lod_param;
03159     DWORD sample_flags = WINED3D_GLSL_SAMPLE_LOD;
03160     struct glsl_sample_function sample_function;
03161     DWORD sampler_idx;
03162     DWORD swizzle = ins->src[1].swizzle;
03163     const struct wined3d_texture *texture;
03164 
03165     sampler_idx = ins->src[1].reg.idx;
03166     texture = device->stateBlock->state.textures[sampler_idx];
03167     if (texture && texture->target == GL_TEXTURE_RECTANGLE_ARB)
03168         sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
03169 
03170     shader_glsl_get_sample_function(ins->ctx, sampler_idx, sample_flags, &sample_function);
03171     shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
03172 
03173     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param);
03174 
03175     if (!gl_info->supported[ARB_SHADER_TEXTURE_LOD] && !gl_info->supported[EXT_GPU_SHADER4]
03176             && shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
03177     {
03178         /* Plain GLSL only supports Lod sampling functions in vertex shaders.
03179          * However, the NVIDIA drivers allow them in fragment shaders as well,
03180          * even without the appropriate extension. */
03181         WARN("Using %s in fragment shader.\n", sample_function.name);
03182     }
03183     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, lod_param.param_str,
03184             "%s", coord_param.param_str);
03185 }
03186 
03187 static void shader_glsl_texcoord(const struct wined3d_shader_instruction *ins)
03188 {
03189     /* FIXME: Make this work for more than just 2D textures */
03190     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
03191     DWORD write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
03192 
03193     if (!(ins->ctx->reg_maps->shader_version.major == 1 && ins->ctx->reg_maps->shader_version.minor == 4))
03194     {
03195         char dst_mask[6];
03196 
03197         shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
03198         shader_addline(buffer, "clamp(gl_TexCoord[%u], 0.0, 1.0)%s);\n",
03199                 ins->dst[0].reg.idx, dst_mask);
03200     }
03201     else
03202     {
03203         enum wined3d_shader_src_modifier src_mod = ins->src[0].modifiers;
03204         DWORD reg = ins->src[0].reg.idx;
03205         char dst_swizzle[6];
03206 
03207         shader_glsl_get_swizzle(&ins->src[0], FALSE, write_mask, dst_swizzle);
03208 
03209         if (src_mod == WINED3DSPSM_DZ)
03210         {
03211             unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
03212             struct glsl_src_param div_param;
03213 
03214             shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &div_param);
03215 
03216             if (mask_size > 1) {
03217                 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
03218             } else {
03219                 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
03220             }
03221         }
03222         else if (src_mod == WINED3DSPSM_DW)
03223         {
03224             unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
03225             struct glsl_src_param div_param;
03226 
03227             shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &div_param);
03228 
03229             if (mask_size > 1) {
03230                 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
03231             } else {
03232                 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
03233             }
03234         } else {
03235             shader_addline(buffer, "gl_TexCoord[%u]%s);\n", reg, dst_swizzle);
03236         }
03237     }
03238 }
03239 
03243 static void shader_glsl_texdp3tex(const struct wined3d_shader_instruction *ins)
03244 {
03245     DWORD sampler_idx = ins->dst[0].reg.idx;
03246     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
03247     struct glsl_sample_function sample_function;
03248     struct glsl_src_param src0_param;
03249     UINT mask_size;
03250 
03251     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
03252 
03253     /* Do I have to take care about the projected bit? I don't think so, since the dp3 returns only one
03254      * scalar, and projected sampling would require 4.
03255      *
03256      * It is a dependent read - not valid with conditional NP2 textures
03257      */
03258     shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function);
03259     mask_size = shader_glsl_get_write_mask_size(sample_function.coord_mask);
03260 
03261     switch(mask_size)
03262     {
03263         case 1:
03264             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
03265                     "dot(gl_TexCoord[%u].xyz, %s)", sampler_idx, src0_param.param_str);
03266             break;
03267 
03268         case 2:
03269             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
03270                     "vec2(dot(gl_TexCoord[%u].xyz, %s), 0.0)", sampler_idx, src0_param.param_str);
03271             break;
03272 
03273         case 3:
03274             shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
03275                     "vec3(dot(gl_TexCoord[%u].xyz, %s), 0.0, 0.0)", sampler_idx, src0_param.param_str);
03276             break;
03277 
03278         default:
03279             FIXME("Unexpected mask size %u\n", mask_size);
03280             break;
03281     }
03282 }
03283 
03286 static void shader_glsl_texdp3(const struct wined3d_shader_instruction *ins)
03287 {
03288     DWORD dstreg = ins->dst[0].reg.idx;
03289     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
03290     struct glsl_src_param src0_param;
03291     DWORD dst_mask;
03292     unsigned int mask_size;
03293 
03294     dst_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
03295     mask_size = shader_glsl_get_write_mask_size(dst_mask);
03296     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
03297 
03298     if (mask_size > 1) {
03299         shader_addline(ins->ctx->buffer, "vec%d(dot(T%u.xyz, %s)));\n", mask_size, dstreg, src0_param.param_str);
03300     } else {
03301         shader_addline(ins->ctx->buffer, "dot(T%u.xyz, %s));\n", dstreg, src0_param.param_str);
03302     }
03303 }
03304 
03307 static void shader_glsl_texdepth(const struct wined3d_shader_instruction *ins)
03308 {
03309     struct glsl_dst_param dst_param;
03310 
03311     shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
03312 
03313     /* Tests show that texdepth never returns anything below 0.0, and that r5.y is clamped to 1.0.
03314      * Negative input is accepted, -0.25 / -0.5 returns 0.5. GL should clamp gl_FragDepth to [0;1], but
03315      * this doesn't always work, so clamp the results manually. Whether or not the x value is clamped at 1
03316      * too is irrelevant, since if x = 0, any y value < 1.0 (and > 1.0 is not allowed) results in a result
03317      * >= 1.0 or < 0.0
03318      */
03319     shader_addline(ins->ctx->buffer, "gl_FragDepth = clamp((%s.x / min(%s.y, 1.0)), 0.0, 1.0);\n",
03320             dst_param.reg_name, dst_param.reg_name);
03321 }
03322 
03328 static void shader_glsl_texm3x2depth(const struct wined3d_shader_instruction *ins)
03329 {
03330     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
03331     DWORD dstreg = ins->dst[0].reg.idx;
03332     struct glsl_src_param src0_param;
03333 
03334     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
03335 
03336     shader_addline(ins->ctx->buffer, "tmp0.y = dot(T%u.xyz, %s);\n", dstreg, src0_param.param_str);
03337     shader_addline(ins->ctx->buffer, "gl_FragDepth = (tmp0.y == 0.0) ? 1.0 : clamp(tmp0.x / tmp0.y, 0.0, 1.0);\n");
03338 }
03339 
03342 static void shader_glsl_texm3x2pad(const struct wined3d_shader_instruction *ins)
03343 {
03344     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
03345     DWORD reg = ins->dst[0].reg.idx;
03346     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
03347     struct glsl_src_param src0_param;
03348 
03349     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
03350     shader_addline(buffer, "tmp0.x = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
03351 }
03352 
03355 static void shader_glsl_texm3x3pad(const struct wined3d_shader_instruction *ins)
03356 {
03357     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
03358     DWORD reg = ins->dst[0].reg.idx;
03359     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
03360     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
03361     struct glsl_src_param src0_param;
03362 
03363     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
03364     shader_addline(buffer, "tmp0.%c = dot(T%u.xyz, %s);\n", 'x' + tex_mx->current_row, reg, src0_param.param_str);
03365     tex_mx->texcoord_w[tex_mx->current_row++] = reg;
03366 }
03367 
03368 static void shader_glsl_texm3x2tex(const struct wined3d_shader_instruction *ins)
03369 {
03370     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
03371     DWORD reg = ins->dst[0].reg.idx;
03372     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
03373     struct glsl_sample_function sample_function;
03374     struct glsl_src_param src0_param;
03375 
03376     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
03377     shader_addline(buffer, "tmp0.y = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
03378 
03379     shader_glsl_get_sample_function(ins->ctx, reg, 0, &sample_function);
03380 
03381     /* Sample the texture using the calculated coordinates */
03382     shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xy");
03383 }
03384 
03387 static void shader_glsl_texm3x3tex(const struct wined3d_shader_instruction *ins)
03388 {
03389     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
03390     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
03391     struct glsl_sample_function sample_function;
03392     struct glsl_src_param src0_param;
03393     DWORD reg = ins->dst[0].reg.idx;
03394 
03395     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
03396     shader_addline(ins->ctx->buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
03397 
03398     /* Dependent read, not valid with conditional NP2 */
03399     shader_glsl_get_sample_function(ins->ctx, reg, 0, &sample_function);
03400 
03401     /* Sample the texture using the calculated coordinates */
03402     shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz");
03403 
03404     tex_mx->current_row = 0;
03405 }
03406 
03409 static void shader_glsl_texm3x3(const struct wined3d_shader_instruction *ins)
03410 {
03411     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
03412     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
03413     struct glsl_src_param src0_param;
03414     char dst_mask[6];
03415     DWORD reg = ins->dst[0].reg.idx;
03416 
03417     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
03418 
03419     shader_glsl_append_dst(ins->ctx->buffer, ins);
03420     shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
03421     shader_addline(ins->ctx->buffer, "vec4(tmp0.xy, dot(T%u.xyz, %s), 1.0)%s);\n", reg, src0_param.param_str, dst_mask);
03422 
03423     tex_mx->current_row = 0;
03424 }
03425 
03426 /* Process the WINED3DSIO_TEXM3X3SPEC instruction in GLSL
03427  * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
03428 static void shader_glsl_texm3x3spec(const struct wined3d_shader_instruction *ins)
03429 {
03430     struct glsl_src_param src0_param;
03431     struct glsl_src_param src1_param;
03432     DWORD reg = ins->dst[0].reg.idx;
03433     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
03434     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
03435     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
03436     struct glsl_sample_function sample_function;
03437     char coord_mask[6];
03438 
03439     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
03440     shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param);
03441 
03442     /* Perform the last matrix multiply operation */
03443     shader_addline(buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
03444     /* Reflection calculation */
03445     shader_addline(buffer, "tmp0.xyz = -reflect((%s), normalize(tmp0.xyz));\n", src1_param.param_str);
03446 
03447     /* Dependent read, not valid with conditional NP2 */
03448     shader_glsl_get_sample_function(ins->ctx, reg, 0, &sample_function);
03449     shader_glsl_write_mask_to_str(sample_function.coord_mask, coord_mask);
03450 
03451     /* Sample the texture */
03452     shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE,
03453             NULL, NULL, NULL, "tmp0%s", coord_mask);
03454 
03455     tex_mx->current_row = 0;
03456 }
03457 
03458 /* Process the WINED3DSIO_TEXM3X3VSPEC instruction in GLSL
03459  * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
03460 static void shader_glsl_texm3x3vspec(const struct wined3d_shader_instruction *ins)
03461 {
03462     DWORD reg = ins->dst[0].reg.idx;
03463     struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
03464     struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
03465     DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
03466     struct glsl_sample_function sample_function;
03467     struct glsl_src_param src0_param;
03468     char coord_mask[6];
03469 
03470     shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
03471 
03472     /* Perform the last matrix multiply operation */
03473     shader_addline(buffer, "tmp0.z = dot(vec3(T%u), vec3(%s));\n", reg, src0_param.param_str);
03474 
03475     /* Construct the eye-ray vector from w coordinates */
03476     shader_addline(buffer, "tmp1.xyz = normalize(vec3(gl_TexCoord[%u].w, gl_TexCoord[%u].w, gl_TexCoord[%u].w));\n",
03477             tex_mx->texcoord_w[0], tex_mx->texcoord_w[1], reg);
03478     shader_addline(buffer, "tmp0.xyz = -reflect(tmp1.xyz, normalize(tmp0.xyz));\n");
03479 
03480     /* Dependent read, not valid with conditional NP2 */
03481     shader_glsl_get_sample_function(ins->ctx, reg, 0, &sample_function);
03482     shader_glsl_write_mask_to_str(sample_function.coord_mask, coord_mask);
03483 
03484     /* Sample the texture using the calculated coordinates */
03485     shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE,
03486             NULL, NULL, NULL, "tmp0%s", coord_mask);
03487 
03488     tex_mx->current_row = 0;
03489 }
03490 
03495 static void shader_glsl_texbem(const struct wined3d_shader_instruction *ins)
03496 {
03497     const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
03498     struct glsl_sample_function sample_function;
03499     struct glsl_src_param coord_param;
03500     DWORD sampler_idx;
03501     DWORD mask;
03502     DWORD flags;
03503     char coord_mask[6];
03504 
03505     sampler_idx = ins->dst[0].reg.idx;
03506     flags = (priv->cur_ps_args->tex_transform >> sampler_idx * WINED3D_PSARGS_TEXTRANSFORM_SHIFT)
03507             & WINED3D_PSARGS_TEXTRANSFORM_MASK;
03508 
03509     /* Dependent read, not valid with conditional NP2 */
03510     shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function);
03511     mask = sample_function.coord_mask;
03512 
03513     shader_glsl_write_mask_to_str(mask, coord_mask);
03514 
03515     /* With projected textures, texbem only divides the static texture coord,
03516      * not the displacement, so we can't let GL handle this. */
03517     if (flags & WINED3D_PSARGS_PROJECTED)
03518     {
03519         DWORD div_mask=0;
03520         char coord_div_mask[3];
03521         switch (flags & ~WINED3D_PSARGS_PROJECTED)
03522         {
03523             case WINED3D_TTFF_COUNT1:
03524                 FIXME("WINED3D_TTFF_PROJECTED with WINED3D_TTFF_COUNT1?\n");
03525                 break;
03526             case WINED3D_TTFF_COUNT2:
03527                 div_mask = WINED3DSP_WRITEMASK_1;
03528                 break;
03529             case WINED3D_TTFF_COUNT3:
03530                 div_mask = WINED3DSP_WRITEMASK_2;
03531                 break;
03532             case WINED3D_TTFF_COUNT4:
03533             case WINED3D_TTFF_DISABLE:
03534                 div_mask = WINED3DSP_WRITEMASK_3;
03535                 break;
03536         }
03537         shader_glsl_write_mask_to_str(div_mask, coord_div_mask);
03538         shader_addline(ins->ctx->buffer, "T%u%s /= T%u%s;\n", sampler_idx, coord_mask, sampler_idx, coord_div_mask);
03539     }
03540 
03541     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &coord_param);
03542 
03543     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
03544             "T%u%s + vec4(bumpenvmat%d * %s, 0.0, 0.0)%s", sampler_idx, coord_mask, sampler_idx,
03545             coord_param.param_str, coord_mask);
03546 
03547     if (ins->handler_idx == WINED3DSIH_TEXBEML)
03548     {
03549         struct glsl_src_param luminance_param;
03550         struct glsl_dst_param dst_param;
03551 
03552         shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &luminance_param);
03553         shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
03554 
03555         shader_addline(ins->ctx->buffer, "%s%s *= (%s * luminancescale%d + luminanceoffset%d);\n",
03556                 dst_param.reg_name, dst_param.mask_str,
03557                 luminance_param.param_str, sampler_idx, sampler_idx);
03558     }
03559 }
03560 
03561 static void shader_glsl_bem(const struct wined3d_shader_instruction *ins)
03562 {
03563     struct glsl_src_param src0_param, src1_param;
03564     DWORD sampler_idx = ins->dst[0].reg.idx;
03565 
03566     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
03567     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
03568 
03569     shader_glsl_append_dst(ins->ctx->buffer, ins);
03570     shader_addline(ins->ctx->buffer, "%s + bumpenvmat%d * %s);\n",
03571             src0_param.param_str, sampler_idx, src1_param.param_str);
03572 }
03573 
03576 static void shader_glsl_texreg2ar(const struct wined3d_shader_instruction *ins)
03577 {
03578     struct glsl_sample_function sample_function;
03579     struct glsl_src_param src0_param;
03580     DWORD sampler_idx = ins->dst[0].reg.idx;
03581 
03582     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
03583 
03584     shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function);
03585     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
03586             "%s.wx", src0_param.reg_name);
03587 }
03588 
03591 static void shader_glsl_texreg2gb(const struct wined3d_shader_instruction *ins)
03592 {
03593     struct glsl_sample_function sample_function;
03594     struct glsl_src_param src0_param;
03595     DWORD sampler_idx = ins->dst[0].reg.idx;
03596 
03597     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
03598 
03599     shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function);
03600     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
03601             "%s.yz", src0_param.reg_name);
03602 }
03603 
03606 static void shader_glsl_texreg2rgb(const struct wined3d_shader_instruction *ins)
03607 {
03608     struct glsl_sample_function sample_function;
03609     struct glsl_src_param src0_param;
03610     DWORD sampler_idx = ins->dst[0].reg.idx;
03611 
03612     /* Dependent read, not valid with conditional NP2 */
03613     shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function);
03614     shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &src0_param);
03615 
03616     shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
03617             "%s", src0_param.param_str);
03618 }
03619 
03622 static void shader_glsl_texkill(const struct wined3d_shader_instruction *ins)
03623 {
03624     struct glsl_dst_param dst_param;
03625 
03626     /* The argument is a destination parameter, and no writemasks are allowed */
03627     shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
03628     if (ins->ctx->reg_maps->shader_version.major >= 2)
03629     {
03630         /* 2.0 shaders compare all 4 components in texkill */
03631         shader_addline(ins->ctx->buffer, "if (any(lessThan(%s.xyzw, vec4(0.0)))) discard;\n", dst_param.reg_name);
03632     } else {
03633         /* 1.X shaders only compare the first 3 components, probably due to the nature of the texkill
03634          * instruction as a tex* instruction, and phase, which kills all a / w components. Even if all
03635          * 4 components are defined, only the first 3 are used
03636          */
03637         shader_addline(ins->ctx->buffer, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;\n", dst_param.reg_name);
03638     }
03639 }
03640 
03643 static void shader_glsl_dp2add(const struct wined3d_shader_instruction *ins)
03644 {
03645     struct glsl_src_param src0_param;
03646     struct glsl_src_param src1_param;
03647     struct glsl_src_param src2_param;
03648     DWORD write_mask;
03649     unsigned int mask_size;
03650 
03651     write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
03652     mask_size = shader_glsl_get_write_mask_size(write_mask);
03653 
03654     shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
03655     shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
03656     shader_glsl_add_src_param(ins, &ins->src[2], WINED3DSP_WRITEMASK_0, &src2_param);
03657 
03658     if (mask_size > 1) {
03659         shader_addline(ins->ctx->buffer, "vec%d(dot(%s, %s) + %s));\n",
03660                 mask_size, src0_param.param_str, src1_param.param_str, src2_param.param_str);
03661     } else {
03662         shader_addline(ins->ctx->buffer, "dot(%s, %s) + %s);\n",
03663                 src0_param.param_str, src1_param.param_str, src2_param.param_str);
03664     }
03665 }
03666 
03667 static void shader_glsl_input_pack(const struct wined3d_shader *shader, struct wined3d_shader_buffer *buffer,
03668         const struct wined3d_shader_signature_element *input_signature,
03669         const struct wined3d_shader_reg_maps *reg_maps,
03670         enum vertexprocessing_mode vertexprocessing)
03671 {
03672     WORD map = reg_maps->input_registers;
03673     unsigned int i;
03674 
03675     for (i = 0; map; map >>= 1, ++i)
03676     {
03677         const char *semantic_name;
03678         UINT semantic_idx;
03679         char reg_mask[6];
03680 
03681         /* Unused */
03682         if (!(map & 1)) continue;
03683 
03684         semantic_name = input_signature[i].semantic_name;
03685         semantic_idx = input_signature[i].semantic_idx;
03686         shader_glsl_write_mask_to_str(input_signature[i].mask, reg_mask);
03687 
03688         if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD))
03689         {
03690             if (semantic_idx < 8 && vertexprocessing == pretransformed)
03691                 shader_addline(buffer, "IN[%u]%s = gl_TexCoord[%u]%s;\n",
03692                         shader->u.ps.input_reg_map[i], reg_mask, semantic_idx, reg_mask);
03693             else
03694                 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
03695                         shader->u.ps.input_reg_map[i], reg_mask, reg_mask);
03696         }
03697         else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR))
03698         {
03699             if (!semantic_idx)
03700                 shader_addline(buffer, "IN[%u]%s = vec4(gl_Color)%s;\n",
03701                         shader->u.ps.input_reg_map[i], reg_mask, reg_mask);
03702             else if (semantic_idx == 1)
03703                 shader_addline(buffer, "IN[%u]%s = vec4(gl_SecondaryColor)%s;\n",
03704                         shader->u.ps.input_reg_map[i], reg_mask, reg_mask);
03705             else
03706                 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
03707                         shader->u.ps.input_reg_map[i], reg_mask, reg_mask);
03708         }
03709         else
03710         {
03711             shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
03712                     shader->u.ps.input_reg_map[i], reg_mask, reg_mask);
03713         }
03714     }
03715 }
03716 
03717 /*********************************************
03718  * Vertex Shader Specific Code begins here
03719  ********************************************/
03720 
03721 static void add_glsl_program_entry(struct shader_glsl_priv *priv, struct glsl_shader_prog_link *entry)
03722 {
03723     struct glsl_program_key key;
03724 
03725     key.vshader = entry->vshader;
03726     key.pshader = entry->pshader;
03727     key.vs_args = entry->vs_args;
03728     key.ps_args = entry->ps_args;
03729 
03730     if (wine_rb_put(&priv->program_lookup, &key, &entry->program_lookup_entry) == -1)
03731     {
03732         ERR("Failed to insert program entry.\n");
03733     }
03734 }
03735 
03736 static struct glsl_shader_prog_link *get_glsl_program_entry(const struct shader_glsl_priv *priv,
03737         const struct wined3d_shader *vshader, const struct wined3d_shader *pshader,
03738         const struct vs_compile_args *vs_args, const struct ps_compile_args *ps_args)
03739 {
03740     struct wine_rb_entry *entry;
03741     struct glsl_program_key key;
03742 
03743     key.vshader = vshader;
03744     key.pshader = pshader;
03745     key.vs_args = *vs_args;
03746     key.ps_args = *ps_args;
03747 
03748     entry = wine_rb_get(&priv->program_lookup, &key);
03749     return entry ? WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry) : NULL;
03750 }
03751 
03752 /* GL locking is done by the caller */
03753 static void delete_glsl_program_entry(struct shader_glsl_priv *priv, const struct wined3d_gl_info *gl_info,
03754         struct glsl_shader_prog_link *entry)
03755 {
03756     struct glsl_program_key key;
03757 
03758     key.vshader = entry->vshader;
03759     key.pshader = entry->pshader;
03760     key.vs_args = entry->vs_args;
03761     key.ps_args = entry->ps_args;
03762     wine_rb_remove(&priv->program_lookup, &key);
03763 
03764     GL_EXTCALL(glDeleteObjectARB(entry->programId));
03765     if (entry->vshader) list_remove(&entry->vshader_entry);
03766     if (entry->pshader) list_remove(&entry->pshader_entry);
03767     HeapFree(GetProcessHeap(), 0, entry->vuniformF_locations);
03768     HeapFree(GetProcessHeap(), 0, entry->puniformF_locations);
03769     HeapFree(GetProcessHeap(), 0, entry);
03770 }
03771 
03772 static void handle_ps3_input(struct wined3d_shader_buffer *buffer,
03773         const struct wined3d_gl_info *gl_info, const DWORD *map,
03774         const struct wined3d_shader_signature_element *input_signature,
03775         const struct wined3d_shader_reg_maps *reg_maps_in,
03776         const struct wined3d_shader_signature_element *output_signature,
03777         const struct wined3d_shader_reg_maps *reg_maps_out)
03778 {
03779     unsigned int i, j;
03780     const char *semantic_name_in;
03781     UINT semantic_idx_in;
03782     DWORD *set;
03783     DWORD in_idx;
03784     unsigned int in_count = vec4_varyings(3, gl_info);
03785     char reg_mask[6];
03786     char destination[50];
03787     WORD input_map, output_map;
03788 
03789     set = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*set) * (in_count + 2));
03790 
03791     input_map = reg_maps_in->input_registers;
03792     for (i = 0; input_map; input_map >>= 1, ++i)
03793     {
03794         if (!(input_map & 1)) continue;
03795 
03796         in_idx = map[i];
03797         /* Declared, but not read register */
03798         if (in_idx == ~0U) continue;
03799         if (in_idx >= (in_count + 2))
03800         {
03801             FIXME("More input varyings declared than supported, expect issues.\n");
03802             continue;
03803         }
03804 
03805         if (in_idx == in_count) {
03806             sprintf(destination, "gl_FrontColor");
03807         } else if (in_idx == in_count + 1) {
03808             sprintf(destination, "gl_FrontSecondaryColor");
03809         } else {
03810             sprintf(destination, "IN[%u]", in_idx);
03811         }
03812 
03813         semantic_name_in = input_signature[i].semantic_name;
03814         semantic_idx_in = input_signature[i].semantic_idx;
03815         set[in_idx] = ~0U;
03816 
03817         output_map = reg_maps_out->output_registers;
03818         for (j = 0; output_map; output_map >>= 1, ++j)
03819         {
03820             DWORD mask;
03821 
03822             if (!(output_map & 1)
03823                     || semantic_idx_in != output_signature[j].semantic_idx
03824                     || strcmp(semantic_name_in, output_signature[j].semantic_name)
03825                     || !(mask = input_signature[i].mask & output_signature[j].mask))
03826                 continue;
03827 
03828             set[in_idx] = mask;
03829             shader_glsl_write_mask_to_str(mask, reg_mask);
03830 
03831             shader_addline(buffer, "%s%s = OUT[%u]%s;\n",
03832                     destination, reg_mask, j, reg_mask);
03833         }
03834     }
03835 
03836     for (i = 0; i < in_count + 2; ++i)
03837     {
03838         unsigned int size;
03839 
03840         if (!set[i] || set[i] == WINED3DSP_WRITEMASK_ALL)
03841             continue;
03842 
03843         if (set[i] == ~0U) set[i] = 0;
03844 
03845         size = 0;
03846         if (!(set[i] & WINED3DSP_WRITEMASK_0)) reg_mask[size++] = 'x';
03847         if (!(set[i] & WINED3DSP_WRITEMASK_1)) reg_mask[size++] = 'y';
03848         if (!(set[i] & WINED3DSP_WRITEMASK_2)) reg_mask[size++] = 'z';
03849         if (!(set[i] & WINED3DSP_WRITEMASK_3)) reg_mask[size++] = 'w';
03850         reg_mask[size] = '\0';
03851 
03852         if (i == in_count) sprintf(destination, "gl_FrontColor");
03853         else if (i == in_count + 1) sprintf(destination, "gl_FrontSecondaryColor");
03854         else sprintf(destination, "IN[%u]", i);
03855 
03856         if (size == 1) shader_addline(buffer, "%s.%s = 0.0;\n", destination, reg_mask);
03857         else shader_addline(buffer, "%s.%s = vec%u(0.0);\n", destination, reg_mask, size);
03858     }
03859 
03860     HeapFree(GetProcessHeap(), 0, set);
03861 }
03862 
03863 /* GL locking is done by the caller */
03864 static GLhandleARB generate_param_reorder_function(struct wined3d_shader_buffer *buffer,
03865         const struct wined3d_shader *vs, const struct wined3d_shader *ps,
03866         const struct wined3d_gl_info *gl_info)
03867 {
03868     GLhandleARB ret = 0;
03869     DWORD ps_major = ps ? ps->reg_maps.shader_version.major : 0;
03870     unsigned int i;
03871     const char *semantic_name;
03872     UINT semantic_idx;
03873     char reg_mask[6];
03874     const struct wined3d_shader_signature_element *output_signature = vs->output_signature;
03875     WORD map = vs->reg_maps.output_registers;
03876 
03877     shader_buffer_clear(buffer);
03878 
03879     shader_addline(buffer, "#version 120\n");
03880 
03881     if (ps_major < 3)
03882     {
03883         shader_addline(buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT);
03884 
03885         for (i = 0; map; map >>= 1, ++i)
03886         {
03887             DWORD write_mask;
03888 
03889             if (!(map & 1)) continue;
03890 
03891             semantic_name = output_signature[i].semantic_name;
03892             semantic_idx = output_signature[i].semantic_idx;
03893             write_mask = output_signature[i].mask;
03894             shader_glsl_write_mask_to_str(write_mask, reg_mask);
03895 
03896             if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR))
03897             {
03898                 if (!semantic_idx)
03899                     shader_addline(buffer, "gl_FrontColor%s = OUT[%u]%s;\n",
03900                             reg_mask, i, reg_mask);
03901                 else if (semantic_idx == 1)
03902                     shader_addline(buffer, "gl_FrontSecondaryColor%s = OUT[%u]%s;\n",
03903                             reg_mask, i, reg_mask);
03904             }
03905             else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION))
03906             {
03907                 shader_addline(buffer, "gl_Position%s = OUT[%u]%s;\n",
03908                         reg_mask, i, reg_mask);
03909             }
03910             else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD))
03911             {
03912                 if (semantic_idx < 8)
03913                 {
03914                     if (!(gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) || ps_major > 0)
03915                         write_mask |= WINED3DSP_WRITEMASK_3;
03916 
03917                     shader_addline(buffer, "gl_TexCoord[%u]%s = OUT[%u]%s;\n",
03918                             semantic_idx, reg_mask, i, reg_mask);
03919                     if (!(write_mask & WINED3DSP_WRITEMASK_3))
03920                         shader_addline(buffer, "gl_TexCoord[%u].w = 1.0;\n", semantic_idx);
03921                 }
03922             }
03923             else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE))
03924             {
03925                 shader_addline(buffer, "gl_PointSize = OUT[%u].%c;\n", i, reg_mask[1]);
03926             }
03927             else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG))
03928             {
03929                 shader_addline(buffer, "gl_FogFragCoord = OUT[%u].%c;\n", i, reg_mask[1]);
03930             }
03931         }
03932         shader_addline(buffer, "}\n");
03933 
03934     }
03935     else
03936     {
03937         UINT in_count = min(vec4_varyings(ps_major, gl_info), ps->limits.packed_input);
03938         /* This one is tricky: a 3.0 pixel shader reads from a 3.0 vertex shader */
03939         shader_addline(buffer, "varying vec4 IN[%u];\n", in_count);
03940         shader_addline(buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT);
03941 
03942         /* First, sort out position and point size. Those are not passed to the pixel shader */
03943         for (i = 0; map; map >>= 1, ++i)
03944         {
03945             if (!(map & 1)) continue;
03946 
03947             semantic_name = output_signature[i].semantic_name;
03948             shader_glsl_write_mask_to_str(output_signature[i].mask, reg_mask);
03949 
03950             if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION))
03951             {
03952                 shader_addline(buffer, "gl_Position%s = OUT[%u]%s;\n",
03953                         reg_mask, i, reg_mask);
03954             }
03955             else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE))
03956             {
03957                 shader_addline(buffer, "gl_PointSize = OUT[%u].%c;\n", i, reg_mask[1]);
03958             }
03959         }
03960 
03961         /* Then, fix the pixel shader input */
03962         handle_ps3_input(buffer, gl_info, ps->u.ps.input_reg_map, ps->input_signature,
03963                 &ps->reg_maps, output_signature, &vs->reg_maps);
03964 
03965         shader_addline(buffer, "}\n");
03966     }
03967 
03968     ret = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
03969     checkGLcall("glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB)");
03970     shader_glsl_compile(gl_info, ret, buffer->buffer);
03971 
03972     return ret;
03973 }
03974 
03975 /* GL locking is done by the caller */
03976 static void hardcode_local_constants(const struct wined3d_shader *shader,
03977         const struct wined3d_gl_info *gl_info, GLhandleARB programId, char prefix)
03978 {
03979     const struct wined3d_shader_lconst *lconst;
03980     GLint tmp_loc;
03981     const float *value;
03982     char glsl_name[8];
03983 
03984     LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
03985     {
03986         value = (const float *)lconst->value;
03987         snprintf(glsl_name, sizeof(glsl_name), "%cLC%u", prefix, lconst->idx);
03988         tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
03989         GL_EXTCALL(glUniform4fvARB(tmp_loc, 1, value));
03990     }
03991     checkGLcall("Hardcoding local constants");
03992 }
03993 
03994 /* GL locking is done by the caller */
03995 static GLuint shader_glsl_generate_pshader(const struct wined3d_context *context,
03996         struct wined3d_shader_buffer *buffer, const struct wined3d_shader *shader,
03997         const struct ps_compile_args *args, struct ps_np2fixup_info *np2fixup_info)
03998 {
03999     const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
04000     const struct wined3d_gl_info *gl_info = context->gl_info;
04001     const DWORD *function = shader->function;
04002     struct shader_glsl_ctx_priv priv_ctx;
04003 
04004     /* Create the hw GLSL shader object and assign it as the shader->prgId */
04005     GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
04006 
04007     memset(&priv_ctx, 0, sizeof(priv_ctx));
04008     priv_ctx.cur_ps_args = args;
04009     priv_ctx.cur_np2fixup_info = np2fixup_info;
04010 
04011     shader_addline(buffer, "#version 120\n");
04012 
04013     if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
04014     {
04015         shader_addline(buffer, "#extension GL_ARB_shader_texture_lod : enable\n");
04016     }
04017     if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
04018     {
04019         /* The spec says that it doesn't have to be explicitly enabled, but the nvidia
04020          * drivers write a warning if we don't do so
04021          */
04022         shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n");
04023     }
04024     if (gl_info->supported[EXT_GPU_SHADER4])
04025     {
04026         shader_addline(buffer, "#extension GL_EXT_gpu_shader4 : enable\n");
04027     }
04028 
04029     /* Base Declarations */
04030     shader_generate_glsl_declarations(context, buffer, shader, reg_maps, &priv_ctx);
04031 
04032     /* Pack 3.0 inputs */
04033     if (reg_maps->shader_version.major >= 3 && args->vp_mode != vertexshader)
04034         shader_glsl_input_pack(shader, buffer, shader->input_signature, reg_maps, args->vp_mode);
04035 
04036     /* Base Shader Body */
04037     shader_generate_main(shader, buffer, reg_maps, function, &priv_ctx);
04038 
04039     /* Pixel shaders < 2.0 place the resulting color in R0 implicitly */
04040     if (reg_maps->shader_version.major < 2)
04041     {
04042         /* Some older cards like GeforceFX ones don't support multiple buffers, so also not gl_FragData */
04043         shader_addline(buffer, "gl_FragData[0] = R0;\n");
04044     }
04045 
04046     if (args->srgb_correction)
04047     {
04048         shader_addline(buffer, "tmp0.xyz = pow(gl_FragData[0].xyz, vec3(srgb_const0.x));\n");
04049         shader_addline(buffer, "tmp0.xyz = tmp0.xyz * vec3(srgb_const0.y) - vec3(srgb_const0.z);\n");
04050         shader_addline(buffer, "tmp1.xyz = gl_FragData[0].xyz * vec3(srgb_const0.w);\n");
04051         shader_addline(buffer, "bvec3 srgb_compare = lessThan(gl_FragData[0].xyz, vec3(srgb_const1.x));\n");
04052         shader_addline(buffer, "gl_FragData[0].xyz = mix(tmp0.xyz, tmp1.xyz, vec3(srgb_compare));\n");
04053         shader_addline(buffer, "gl_FragData[0] = clamp(gl_FragData[0], 0.0, 1.0);\n");
04054     }
04055     /* Pixel shader < 3.0 do not replace the fog stage.
04056      * This implements linear fog computation and blending.
04057      * TODO: non linear fog
04058      * NOTE: gl_Fog.start and gl_Fog.end don't hold fog start s and end e but
04059      * -1/(e-s) and e/(e-s) respectively.
04060      */
04061     if (reg_maps->shader_version.major < 3)
04062     {
04063         switch(args->fog) {
04064             case FOG_OFF: break;
04065             case FOG_LINEAR:
04066                 shader_addline(buffer, "float fogstart = -1.0 / (gl_Fog.end - gl_Fog.start);\n");
04067                 shader_addline(buffer, "float fogend = gl_Fog.end * -fogstart;\n");
04068                 shader_addline(buffer, "float Fog = clamp(gl_FogFragCoord * fogstart + fogend, 0.0, 1.0);\n");
04069                 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
04070                 break;
04071             case FOG_EXP:
04072                 /* Fog = e^(-gl_Fog.density * gl_FogFragCoord) */
04073                 shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_FogFragCoord);\n");
04074                 shader_addline(buffer, "Fog = clamp(Fog, 0.0, 1.0);\n");
04075                 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
04076                 break;
04077             case FOG_EXP2:
04078                 /* Fog = e^(-(gl_Fog.density * gl_FogFragCoord)^2) */
04079                 shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_Fog.density * gl_FogFragCoord * gl_FogFragCoord);\n");
04080                 shader_addline(buffer, "Fog = clamp(Fog, 0.0, 1.0);\n");
04081                 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
04082                 break;
04083         }
04084     }
04085 
04086     shader_addline(buffer, "}\n");
04087 
04088     TRACE("Compiling shader object %u\n", shader_obj);
04089     shader_glsl_compile(gl_info, shader_obj, buffer->buffer);
04090 
04091     /* Store the shader object */
04092     return shader_obj;
04093 }
04094 
04095 /* GL locking is done by the caller */
04096 static GLuint shader_glsl_generate_vshader(const struct wined3d_context *context,
04097         struct wined3d_shader_buffer *buffer, const struct wined3d_shader *shader,
04098         const struct vs_compile_args *args)
04099 {
04100     const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
04101     const struct wined3d_gl_info *gl_info = context->gl_info;
04102     const DWORD *function = shader->function;
04103     struct shader_glsl_ctx_priv priv_ctx;
04104 
04105     /* Create the hw GLSL shader program and assign it as the shader->prgId */
04106     GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
04107 
04108     shader_addline(buffer, "#version 120\n");
04109 
04110     if (gl_info->supported[EXT_GPU_SHADER4])
04111         shader_addline(buffer, "#extension GL_EXT_gpu_shader4 : enable\n");
04112 
04113     memset(&priv_ctx, 0, sizeof(priv_ctx));
04114     priv_ctx.cur_vs_args = args;
04115 
04116     /* Base Declarations */
04117     shader_generate_glsl_declarations(context, buffer, shader, reg_maps, &priv_ctx);
04118 
04119     /* Base Shader Body */
04120     shader_generate_main(shader, buffer, reg_maps, function, &priv_ctx);
04121 
04122     /* Unpack outputs */
04123     shader_addline(buffer, "order_ps_input(OUT);\n");
04124 
04125     /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used
04126      * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE),
04127      * the fog frag coord is thrown away. If the fog frag coord is used, but not written by
04128      * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0)
04129      */
04130     if (args->fog_src == VS_FOG_Z)
04131         shader_addline(buffer, "gl_FogFragCoord = gl_Position.z;\n");
04132     else if (!reg_maps->fog)
04133         shader_addline(buffer, "gl_FogFragCoord = 0.0;\n");
04134 
04135     /* We always store the clipplanes without y inversion */
04136     if (args->clip_enabled)
04137         shader_addline(buffer, "gl_ClipVertex = gl_Position;\n");
04138 
04139     /* Write the final position.
04140      *
04141      * OpenGL coordinates specify the center of the pixel while d3d coords specify
04142      * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
04143      * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
04144      * contains 1.0 to allow a mad.
04145      */
04146     shader_addline(buffer, "gl_Position.y = gl_Position.y * posFixup.y;\n");
04147     shader_addline(buffer, "gl_Position.xy += posFixup.zw * gl_Position.ww;\n");
04148 
04149     /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
04150      *
04151      * Basically we want (in homogeneous coordinates) z = z * 2 - 1. However, shaders are run
04152      * before the homogeneous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
04153      * which is the same as z = z * 2 - w.
04154      */
04155     shader_addline(buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
04156 
04157     shader_addline(buffer, "}\n");
04158 
04159     TRACE("Compiling shader object %u\n", shader_obj);
04160     shader_glsl_compile(gl_info, shader_obj, buffer->buffer);
04161 
04162     return shader_obj;
04163 }
04164 
04165 static GLhandleARB find_glsl_pshader(const struct wined3d_context *context,
04166         struct wined3d_shader_buffer *buffer, struct wined3d_shader *shader,
04167         const struct ps_compile_args *args, const struct ps_np2fixup_info **np2fixup_info)
04168 {
04169     struct wined3d_state *state = &shader->device->stateBlock->state;
04170     UINT i;
04171     DWORD new_size;
04172     struct glsl_ps_compiled_shader *new_array;
04173     struct glsl_pshader_private    *shader_data;
04174     struct ps_np2fixup_info        *np2fixup = NULL;
04175     GLhandleARB ret;
04176 
04177     if (!shader->backend_data)
04178     {
04179         shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
04180         if (!shader->backend_data)
04181         {
04182             ERR("Failed to allocate backend data.\n");
04183             return 0;
04184         }
04185     }
04186     shader_data = shader->backend_data;
04187 
04188     /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
04189      * so a linear search is more performant than a hashmap or a binary search
04190      * (cache coherency etc)
04191      */
04192     for (i = 0; i < shader_data->num_gl_shaders; ++i)
04193     {
04194         if (!memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args)))
04195         {
04196             if (args->np2_fixup) *np2fixup_info = &shader_data->gl_shaders[i].np2fixup;
04197             return shader_data->gl_shaders[i].prgId;
04198         }
04199     }
04200 
04201     TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader);
04202     if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
04203         if (shader_data->num_gl_shaders)
04204         {
04205             new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
04206             new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders,
04207                                     new_size * sizeof(*shader_data->gl_shaders));
04208         } else {
04209             new_array = HeapAlloc(GetProcessHeap(), 0, sizeof(*shader_data->gl_shaders));
04210             new_size = 1;
04211         }
04212 
04213         if(!new_array) {
04214             ERR("Out of memory\n");
04215             return 0;
04216         }
04217         shader_data->gl_shaders = new_array;
04218         shader_data->shader_array_size = new_size;
04219     }
04220 
04221     shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
04222 
04223     memset(&shader_data->gl_shaders[shader_data->num_gl_shaders].np2fixup, 0, sizeof(struct ps_np2fixup_info));
04224     if (args->np2_fixup) np2fixup = &shader_data->gl_shaders[shader_data->num_gl_shaders].np2fixup;
04225 
04226     pixelshader_update_samplers(&shader->reg_maps, state->textures);
04227 
04228     shader_buffer_clear(buffer);
04229     ret = shader_glsl_generate_pshader(context, buffer, shader, args, np2fixup);
04230     shader_data->gl_shaders[shader_data->num_gl_shaders++].prgId = ret;
04231     *np2fixup_info = np2fixup;
04232 
04233     return ret;
04234 }
04235 
04236 static inline BOOL vs_args_equal(const struct vs_compile_args *stored, const struct vs_compile_args *new,
04237                                  const DWORD use_map) {
04238     if((stored->swizzle_map & use_map) != new->swizzle_map) return FALSE;
04239     if((stored->clip_enabled) != new->clip_enabled) return FALSE;
04240     return stored->fog_src == new->fog_src;
04241 }
04242 
04243 static GLhandleARB find_glsl_vshader(const struct wined3d_context *context,
04244         struct wined3d_shader_buffer *buffer, struct wined3d_shader *shader,
04245         const struct vs_compile_args *args)
04246 {
04247     UINT i;
04248     DWORD new_size;
04249     struct glsl_vs_compiled_shader *new_array;
04250     DWORD use_map = shader->device->strided_streams.use_map;
04251     struct glsl_vshader_private *shader_data;
04252     GLhandleARB ret;
04253 
04254     if (!shader->backend_data)
04255     {
04256         shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
04257         if (!shader->backend_data)
04258         {
04259             ERR("Failed to allocate backend data.\n");
04260             return 0;
04261         }
04262     }
04263     shader_data = shader->backend_data;
04264 
04265     /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
04266      * so a linear search is more performant than a hashmap or a binary search
04267      * (cache coherency etc)
04268      */
04269     for(i = 0; i < shader_data->num_gl_shaders; i++) {
04270         if(vs_args_equal(&shader_data->gl_shaders[i].args, args, use_map)) {
04271             return shader_data->gl_shaders[i].prgId;
04272         }
04273     }
04274 
04275     TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader);
04276 
04277     if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
04278         if (shader_data->num_gl_shaders)
04279         {
04280             new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
04281             new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders,
04282                                     new_size * sizeof(*shader_data->gl_shaders));
04283         } else {
04284             new_array = HeapAlloc(GetProcessHeap(), 0, sizeof(*shader_data->gl_shaders));
04285             new_size = 1;
04286         }
04287 
04288         if(!new_array) {
04289             ERR("Out of memory\n");
04290             return 0;
04291         }
04292         shader_data->gl_shaders = new_array;
04293         shader_data->shader_array_size = new_size;
04294     }
04295 
04296     shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
04297 
04298     shader_buffer_clear(buffer);
04299     ret = shader_glsl_generate_vshader(context, buffer, shader, args);
04300     shader_data->gl_shaders[shader_data->num_gl_shaders++].prgId = ret;
04301 
04302     return ret;
04303 }
04304 
04314 /* GL locking is done by the caller */
04315 static void set_glsl_shader_program(const struct wined3d_context *context,
04316         struct wined3d_device *device, BOOL use_ps, BOOL use_vs)
04317 {
04318     const struct wined3d_state *state = &device->stateBlock->state;
04319     struct wined3d_shader *vshader = use_vs ? state->vertex_shader : NULL;
04320     struct wined3d_shader *pshader = use_ps ? state->pixel_shader : NULL;
04321     const struct wined3d_gl_info *gl_info = context->gl_info;
04322     struct shader_glsl_priv *priv = device->shader_priv;
04323     struct glsl_shader_prog_link *entry    = NULL;
04324     GLhandleARB programId                  = 0;
04325     GLhandleARB reorder_shader_id          = 0;
04326     unsigned int i;
04327     char glsl_name[8];
04328     struct ps_compile_args ps_compile_args;
04329     struct vs_compile_args vs_compile_args;
04330 
04331     if (vshader) find_vs_compile_args(state, vshader, &vs_compile_args);
04332     if (pshader) find_ps_compile_args(state, pshader, &ps_compile_args);
04333 
04334     entry = get_glsl_program_entry(priv, vshader, pshader, &vs_compile_args, &ps_compile_args);
04335     if (entry)
04336     {
04337         priv->glsl_program = entry;
04338         return;
04339     }
04340 
04341     /* If we get to this point, then no matching program exists, so we create one */
04342     programId = GL_EXTCALL(glCreateProgramObjectARB());
04343     TRACE("Created new GLSL shader program %u\n", programId);
04344 
04345     /* Create the entry */
04346     entry = HeapAlloc(GetProcessHeap(), 0, sizeof(struct glsl_shader_prog_link));
04347     entry->programId = programId;
04348     entry->vshader = vshader;
04349     entry->pshader = pshader;
04350     entry->vs_args = vs_compile_args;
04351     entry->ps_args = ps_compile_args;
04352     entry->constant_version = 0;
04353     entry->np2Fixup_info = NULL;
04354     /* Add the hash table entry */
04355     add_glsl_program_entry(priv, entry);
04356 
04357     /* Set the current program */
04358     priv->glsl_program = entry;
04359 
04360     /* Attach GLSL vshader */
04361     if (vshader)
04362     {
04363         GLhandleARB vshader_id = find_glsl_vshader(context, &priv->shader_buffer, vshader, &vs_compile_args);
04364         WORD map = vshader->reg_maps.input_registers;
04365         char tmp_name[10];
04366 
04367         reorder_shader_id = generate_param_reorder_function(&priv->shader_buffer, vshader, pshader, gl_info);
04368         TRACE("Attaching GLSL shader object %u to program %u\n", reorder_shader_id, programId);
04369         GL_EXTCALL(glAttachObjectARB(programId, reorder_shader_id));
04370         checkGLcall("glAttachObjectARB");
04371         /* Flag the reorder function for deletion, then it will be freed automatically when the program
04372          * is destroyed
04373          */
04374         GL_EXTCALL(glDeleteObjectARB(reorder_shader_id));
04375 
04376         TRACE("Attaching GLSL shader object %u to program %u\n", vshader_id, programId);
04377         GL_EXTCALL(glAttachObjectARB(programId, vshader_id));
04378         checkGLcall("glAttachObjectARB");
04379 
04380         /* Bind vertex attributes to a corresponding index number to match
04381          * the same index numbers as ARB_vertex_programs (makes loading
04382          * vertex attributes simpler).  With this method, we can use the
04383          * exact same code to load the attributes later for both ARB and
04384          * GLSL shaders.
04385          *
04386          * We have to do this here because we need to know the Program ID
04387          * in order to make the bindings work, and it has to be done prior
04388          * to linking the GLSL program. */
04389         for (i = 0; map; map >>= 1, ++i)
04390         {
04391             if (!(map & 1)) continue;
04392 
04393             snprintf(tmp_name, sizeof(tmp_name), "attrib%u", i);
04394             GL_EXTCALL(glBindAttribLocationARB(programId, i, tmp_name));
04395         }
04396         checkGLcall("glBindAttribLocationARB");
04397 
04398         list_add_head(&vshader->linked_programs, &entry->vshader_entry);
04399     }
04400 
04401     /* Attach GLSL pshader */
04402     if (pshader)
04403     {
04404         GLhandleARB pshader_id = find_glsl_pshader(context, &priv->shader_buffer,
04405                 pshader, &ps_compile_args, &entry->np2Fixup_info);
04406         TRACE("Attaching GLSL shader object %u to program %u\n", pshader_id, programId);
04407         GL_EXTCALL(glAttachObjectARB(programId, pshader_id));
04408         checkGLcall("glAttachObjectARB");
04409 
04410         list_add_head(&pshader->linked_programs, &entry->pshader_entry);
04411     }
04412 
04413     /* Link the program */
04414     TRACE("Linking GLSL shader program %u\n", programId);
04415     GL_EXTCALL(glLinkProgramARB(programId));
04416     shader_glsl_validate_link(gl_info, programId);
04417 
04418     entry->vuniformF_locations = HeapAlloc(GetProcessHeap(), 0,
04419             sizeof(GLhandleARB) * gl_info->limits.glsl_vs_float_constants);
04420     for (i = 0; i < gl_info->limits.glsl_vs_float_constants; ++i)
04421     {
04422         snprintf(glsl_name, sizeof(glsl_name), "VC[%i]", i);
04423         entry->vuniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
04424     }
04425     for (i = 0; i < MAX_CONST_I; ++i)
04426     {
04427         snprintf(glsl_name, sizeof(glsl_name), "VI[%i]", i);
04428         entry->vuniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
04429     }
04430     entry->puniformF_locations = HeapAlloc(GetProcessHeap(), 0,
04431             sizeof(GLhandleARB) * gl_info->limits.glsl_ps_float_constants);
04432     for (i = 0; i < gl_info->limits.glsl_ps_float_constants; ++i)
04433     {
04434         snprintf(glsl_name, sizeof(glsl_name), "PC[%i]", i);
04435         entry->puniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
04436     }
04437     for (i = 0; i < MAX_CONST_I; ++i)
04438     {
04439         snprintf(glsl_name, sizeof(glsl_name), "PI[%i]", i);
04440         entry->puniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
04441     }
04442 
04443     if(pshader) {
04444         char name[32];
04445 
04446         for(i = 0; i < MAX_TEXTURES; i++) {
04447             sprintf(name, "bumpenvmat%u", i);
04448             entry->bumpenvmat_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
04449             sprintf(name, "luminancescale%u", i);
04450             entry->luminancescale_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
04451             sprintf(name, "luminanceoffset%u", i);
04452             entry->luminanceoffset_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
04453         }
04454 
04455         if (ps_compile_args.np2_fixup) {
04456             if (entry->np2Fixup_info) {
04457                 entry->np2Fixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "PsamplerNP2Fixup"));
04458             } else {
04459                 FIXME("NP2 texcoord fixup needed for this pixelshader, but no fixup uniform found.\n");
04460             }
04461         }
04462     }
04463 
04464     entry->posFixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "posFixup"));
04465     entry->ycorrection_location = GL_EXTCALL(glGetUniformLocationARB(programId, "ycorrection"));
04466     checkGLcall("Find glsl program uniform locations");
04467 
04468     if (pshader && pshader->reg_maps.shader_version.major >= 3
04469             && pshader->u.ps.declared_in_count > vec4_varyings(3, gl_info))
04470     {
04471         TRACE("Shader %d needs vertex color clamping disabled\n", programId);
04472         entry->vertex_color_clamp = GL_FALSE;
04473     } else {
04474         entry->vertex_color_clamp = GL_FIXED_ONLY_ARB;
04475     }
04476 
04477     /* Set the shader to allow uniform loading on it */
04478     GL_EXTCALL(glUseProgramObjectARB(programId));
04479     checkGLcall("glUseProgramObjectARB(programId)");
04480 
04481     /* Load the vertex and pixel samplers now. The function that finds the mappings makes sure
04482      * that it stays the same for each vertexshader-pixelshader pair(=linked glsl program). If
04483      * a pshader with fixed function pipeline is used there are no vertex samplers, and if a
04484      * vertex shader with fixed function pixel processing is used we make sure that the card
04485      * supports enough samplers to allow the max number of vertex samplers with all possible
04486      * fixed function fragment processing setups. So once the program is linked these samplers
04487      * won't change.
04488      */
04489     if (vshader) shader_glsl_load_vsamplers(gl_info, device->texUnitMap, programId);
04490     if (pshader) shader_glsl_load_psamplers(gl_info, device->texUnitMap, programId);
04491 
04492     /* If the local constants do not have to be loaded with the environment constants,
04493      * load them now to have them hardcoded in the GLSL program. This saves some CPU cycles
04494      * later
04495      */
04496     if (pshader && !pshader->load_local_constsF)
04497         hardcode_local_constants(pshader, gl_info, programId, 'P');
04498     if (vshader && !vshader->load_local_constsF)
04499         hardcode_local_constants(vshader, gl_info, programId, 'V');
04500 }
04501 
04502 /* GL locking is done by the caller */
04503 static GLhandleARB create_glsl_blt_shader(const struct wined3d_gl_info *gl_info, enum tex_types tex_type, BOOL masked)
04504 {
04505     GLhandleARB program_id;
04506     GLhandleARB vshader_id, pshader_id;
04507     const char *blt_pshader;
04508 
04509     static const char *blt_vshader =
04510         "#version 120\n"
04511         "void main(void)\n"
04512         "{\n"
04513         "    gl_Position = gl_Vertex;\n"
04514         "    gl_FrontColor = vec4(1.0);\n"
04515         "    gl_TexCoord[0] = gl_MultiTexCoord0;\n"
04516         "}\n";
04517 
04518     static const char * const blt_pshaders_full[tex_type_count] =
04519     {
04520         /* tex_1d */
04521         NULL,
04522         /* tex_2d */
04523         "#version 120\n"
04524         "uniform sampler2D sampler;\n"
04525         "void main(void)\n"
04526         "{\n"
04527         "    gl_FragDepth = texture2D(sampler, gl_TexCoord[0].xy).x;\n"
04528         "}\n",
04529         /* tex_3d */
04530         NULL,
04531         /* tex_cube */
04532         "#version 120\n"
04533         "uniform samplerCube sampler;\n"
04534         "void main(void)\n"
04535         "{\n"
04536         "    gl_FragDepth = textureCube(sampler, gl_TexCoord[0].xyz).x;\n"
04537         "}\n",
04538         /* tex_rect */
04539         "#version 120\n"
04540         "#extension GL_ARB_texture_rectangle : enable\n"
04541         "uniform sampler2DRect sampler;\n"
04542         "void main(void)\n"
04543         "{\n"
04544         "    gl_FragDepth = texture2DRect(sampler, gl_TexCoord[0].xy).x;\n"
04545         "}\n",
04546     };
04547 
04548     static const char * const blt_pshaders_masked[tex_type_count] =
04549     {
04550         /* tex_1d */
04551         NULL,
04552         /* tex_2d */
04553         "#version 120\n"
04554         "uniform sampler2D sampler;\n"
04555         "uniform vec4 mask;\n"
04556         "void main(void)\n"
04557         "{\n"
04558         "    if (all(lessThan(gl_FragCoord.xy, mask.zw))) discard;\n"
04559         "    gl_FragDepth = texture2D(sampler, gl_TexCoord[0].xy).x;\n"
04560         "}\n",
04561         /* tex_3d */
04562         NULL,
04563         /* tex_cube */
04564         "#version 120\n"
04565         "uniform samplerCube sampler;\n"
04566         "uniform vec4 mask;\n"
04567         "void main(void)\n"
04568         "{\n"
04569         "    if (all(lessThan(gl_FragCoord.xy, mask.zw))) discard;\n"
04570         "    gl_FragDepth = textureCube(sampler, gl_TexCoord[0].xyz).x;\n"
04571         "}\n",
04572         /* tex_rect */
04573         "#version 120\n"
04574         "#extension GL_ARB_texture_rectangle : enable\n"
04575         "uniform sampler2DRect sampler;\n"
04576         "uniform vec4 mask;\n"
04577         "void main(void)\n"
04578         "{\n"
04579         "    if (all(lessThan(gl_FragCoord.xy, mask.zw))) discard;\n"
04580         "    gl_FragDepth = texture2DRect(sampler, gl_TexCoord[0].xy).x;\n"
04581         "}\n",
04582     };
04583 
04584     blt_pshader = masked ? blt_pshaders_masked[tex_type] : blt_pshaders_full[tex_type];
04585     if (!blt_pshader)
04586     {
04587         FIXME("tex_type %#x not supported\n", tex_type);
04588         return 0;
04589     }
04590 
04591     vshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
04592     shader_glsl_compile(gl_info, vshader_id, blt_vshader);
04593 
04594     pshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
04595     shader_glsl_compile(gl_info, pshader_id, blt_pshader);
04596 
04597     program_id = GL_EXTCALL(glCreateProgramObjectARB());
04598     GL_EXTCALL(glAttachObjectARB(program_id, vshader_id));
04599     GL_EXTCALL(glAttachObjectARB(program_id, pshader_id));
04600     GL_EXTCALL(glLinkProgramARB(program_id));
04601 
04602     shader_glsl_validate_link(gl_info, program_id);
04603 
04604     /* Once linked we can mark the shaders for deletion. They will be deleted once the program
04605      * is destroyed
04606      */
04607     GL_EXTCALL(glDeleteObjectARB(vshader_id));
04608     GL_EXTCALL(glDeleteObjectARB(pshader_id));
04609     return program_id;
04610 }
04611 
04612 /* GL locking is done by the caller */
04613 static void shader_glsl_select(const struct wined3d_context *context, BOOL usePS, BOOL useVS)
04614 {
04615     const struct wined3d_gl_info *gl_info = context->gl_info;
04616     struct wined3d_device *device = context->swapchain->device;
04617     struct shader_glsl_priv *priv = device->shader_priv;
04618     GLhandleARB program_id = 0;
04619     GLenum old_vertex_color_clamp, current_vertex_color_clamp;
04620 
04621     old_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB;
04622 
04623     if (useVS || usePS) set_glsl_shader_program(context, device, usePS, useVS);
04624     else priv->glsl_program = NULL;
04625 
04626     current_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB;
04627 
04628     if (old_vertex_color_clamp != current_vertex_color_clamp)
04629     {
04630         if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT])
04631         {
04632             GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, current_vertex_color_clamp));
04633             checkGLcall("glClampColorARB");
04634         }
04635         else
04636         {
04637             FIXME("vertex color clamp needs to be changed, but extension not supported.\n");
04638         }
04639     }
04640 
04641     program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
04642     if (program_id) TRACE("Using GLSL program %u\n", program_id);
04643     GL_EXTCALL(glUseProgramObjectARB(program_id));
04644     checkGLcall("glUseProgramObjectARB");
04645 
04646     /* In case that NP2 texcoord fixup data is found for the selected program, trigger a reload of the
04647      * constants. This has to be done because it can't be guaranteed that sampler() (from state.c) is
04648      * called between selecting the shader and using it, which results in wrong fixup for some frames. */
04649     if (priv->glsl_program && priv->glsl_program->np2Fixup_info)
04650     {
04651         shader_glsl_load_np2fixup_constants(priv, gl_info, &device->stateBlock->state);
04652     }
04653 }
04654 
04655 /* GL locking is done by the caller */
04656 static void shader_glsl_select_depth_blt(void *shader_priv, const struct wined3d_gl_info *gl_info,
04657         enum tex_types tex_type, const SIZE *ds_mask_size)
04658 {
04659     BOOL masked = ds_mask_size->cx && ds_mask_size->cy;
04660     struct shader_glsl_priv *priv = shader_priv;
04661     GLhandleARB *blt_program;
04662     GLint loc;
04663 
04664     blt_program = masked ? &priv->depth_blt_program_masked[tex_type] : &priv->depth_blt_program_full[tex_type];
04665     if (!*blt_program)
04666     {
04667         *blt_program = create_glsl_blt_shader(gl_info, tex_type, masked);
04668         loc = GL_EXTCALL(glGetUniformLocationARB(*blt_program, "sampler"));
04669         GL_EXTCALL(glUseProgramObjectARB(*blt_program));
04670         GL_EXTCALL(glUniform1iARB(loc, 0));
04671     }
04672     else
04673     {
04674         GL_EXTCALL(glUseProgramObjectARB(*blt_program));
04675     }
04676 
04677     if (masked)
04678     {
04679         loc = GL_EXTCALL(glGetUniformLocationARB(*blt_program, "mask"));
04680         GL_EXTCALL(glUniform4fARB(loc, 0.0f, 0.0f, (float)ds_mask_size->cx, (float)ds_mask_size->cy));
04681     }
04682 }
04683 
04684 /* GL locking is done by the caller */
04685 static void shader_glsl_deselect_depth_blt(void *shader_priv, const struct wined3d_gl_info *gl_info)
04686 {
04687     struct shader_glsl_priv *priv = shader_priv;
04688     GLhandleARB program_id;
04689 
04690     program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
04691     if (program_id) TRACE("Using GLSL program %u\n", program_id);
04692 
04693     GL_EXTCALL(glUseProgramObjectARB(program_id));
04694     checkGLcall("glUseProgramObjectARB");
04695 }
04696 
04697 static void shader_glsl_destroy(struct wined3d_shader *shader)
04698 {
04699     struct wined3d_device *device = shader->device;
04700     struct shader_glsl_priv *priv = device->shader_priv;
04701     const struct wined3d_gl_info *gl_info;
04702     const struct list *linked_programs;
04703     struct wined3d_context *context;
04704 
04705     char pshader = shader_is_pshader_version(shader->reg_maps.shader_version.type);
04706 
04707     if (pshader)
04708     {
04709         struct glsl_pshader_private *shader_data = shader->backend_data;
04710 
04711         if (!shader_data || !shader_data->num_gl_shaders)
04712         {
04713             HeapFree(GetProcessHeap(), 0, shader_data);
04714             shader->backend_data = NULL;
04715             return;
04716         }
04717 
04718         context = context_acquire(device, NULL);
04719         gl_info = context->gl_info;
04720 
04721         if (priv->glsl_program && priv->glsl_program->pshader == shader)
04722         {
04723             ENTER_GL();
04724             shader_glsl_select(context, FALSE, FALSE);
04725             LEAVE_GL();
04726         }
04727     }
04728     else
04729     {
04730         struct glsl_vshader_private *shader_data = shader->backend_data;
04731 
04732         if (!shader_data || !shader_data->num_gl_shaders)
04733         {
04734             HeapFree(GetProcessHeap(), 0, shader_data);
04735             shader->backend_data = NULL;
04736             return;
04737         }
04738 
04739         context = context_acquire(device, NULL);
04740         gl_info = context->gl_info;
04741 
04742         if (priv->glsl_program && priv->glsl_program->vshader == shader)
04743         {
04744             ENTER_GL();
04745             shader_glsl_select(context, FALSE, FALSE);
04746             LEAVE_GL();
04747         }
04748     }
04749 
04750     linked_programs = &shader->linked_programs;
04751 
04752     TRACE("Deleting linked programs\n");
04753     if (linked_programs->next) {
04754         struct glsl_shader_prog_link *entry, *entry2;
04755 
04756         ENTER_GL();
04757         if(pshader) {
04758             LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, pshader_entry) {
04759                 delete_glsl_program_entry(priv, gl_info, entry);
04760             }
04761         } else {
04762             LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) {
04763                 delete_glsl_program_entry(priv, gl_info, entry);
04764             }
04765         }
04766         LEAVE_GL();
04767     }
04768 
04769     if (pshader)
04770     {
04771         struct glsl_pshader_private *shader_data = shader->backend_data;
04772         UINT i;
04773 
04774         ENTER_GL();
04775         for(i = 0; i < shader_data->num_gl_shaders; i++) {
04776             TRACE("deleting pshader %u\n", shader_data->gl_shaders[i].prgId);
04777             GL_EXTCALL(glDeleteObjectARB(shader_data->gl_shaders[i].prgId));
04778             checkGLcall("glDeleteObjectARB");
04779         }
04780         LEAVE_GL();
04781         HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
04782     }
04783     else
04784     {
04785         struct glsl_vshader_private *shader_data = shader->backend_data;
04786         UINT i;
04787 
04788         ENTER_GL();
04789         for(i = 0; i < shader_data->num_gl_shaders; i++) {
04790             TRACE("deleting vshader %u\n", shader_data->gl_shaders[i].prgId);
04791             GL_EXTCALL(glDeleteObjectARB(shader_data->gl_shaders[i].prgId));
04792             checkGLcall("glDeleteObjectARB");
04793         }
04794         LEAVE_GL();
04795         HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
04796     }
04797 
04798     HeapFree(GetProcessHeap(), 0, shader->backend_data);
04799     shader->backend_data = NULL;
04800 
04801     context_release(context);
04802 }
04803 
04804 static int glsl_program_key_compare(const void *key, const struct wine_rb_entry *entry)
04805 {
04806     const struct glsl_program_key *k = key;
04807     const struct glsl_shader_prog_link *prog = WINE_RB_ENTRY_VALUE(entry,
04808             const struct glsl_shader_prog_link, program_lookup_entry);
04809     int cmp;
04810 
04811     if (k->vshader > prog->vshader) return 1;
04812     else if (k->vshader < prog->vshader) return -1;
04813 
04814     if (k->pshader > prog->pshader) return 1;
04815     else if (k->pshader < prog->pshader) return -1;
04816 
04817     if (k->vshader && (cmp = memcmp(&k->vs_args, &prog->vs_args, sizeof(prog->vs_args)))) return cmp;
04818     if (k->pshader && (cmp = memcmp(&k->ps_args, &prog->ps_args, sizeof(prog->ps_args)))) return cmp;
04819 
04820     return 0;
04821 }
04822 
04823 static BOOL constant_heap_init(struct constant_heap *heap, unsigned int constant_count)
04824 {
04825     SIZE_T size = (constant_count + 1) * sizeof(*heap->entries) + constant_count * sizeof(*heap->positions);
04826     void *mem = HeapAlloc(GetProcessHeap(), 0, size);
04827 
04828     if (!mem)
04829     {
04830         ERR("Failed to allocate memory\n");
04831         return FALSE;
04832     }
04833 
04834     heap->entries = mem;
04835     heap->entries[1].version = 0;
04836     heap->positions = (unsigned int *)(heap->entries + constant_count + 1);
04837     heap->size = 1;
04838 
04839     return TRUE;
04840 }
04841 
04842 static void constant_heap_free(struct constant_heap *heap)
04843 {
04844     HeapFree(GetProcessHeap(), 0, heap->entries);
04845 }
04846 
04847 static const struct wine_rb_functions wined3d_glsl_program_rb_functions =
04848 {
04849     wined3d_rb_alloc,
04850     wined3d_rb_realloc,
04851     wined3d_rb_free,
04852     glsl_program_key_compare,
04853 };
04854 
04855 static HRESULT shader_glsl_alloc(struct wined3d_device *device)
04856 {
04857     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
04858     struct shader_glsl_priv *priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_glsl_priv));
04859     SIZE_T stack_size = wined3d_log2i(max(gl_info->limits.glsl_vs_float_constants,
04860             gl_info->limits.glsl_ps_float_constants)) + 1;
04861 
04862     if (!shader_buffer_init(&priv->shader_buffer))
04863     {
04864         ERR("Failed to initialize shader buffer.\n");
04865         goto fail;
04866     }
04867 
04868     priv->stack = HeapAlloc(GetProcessHeap(), 0, stack_size * sizeof(*priv->stack));
04869     if (!priv->stack)
04870     {
04871         ERR("Failed to allocate memory.\n");
04872         goto fail;
04873     }
04874 
04875     if (!constant_heap_init(&priv->vconst_heap, gl_info->limits.glsl_vs_float_constants))
04876     {
04877         ERR("Failed to initialize vertex shader constant heap\n");
04878         goto fail;
04879     }
04880 
04881     if (!constant_heap_init(&priv->pconst_heap, gl_info->limits.glsl_ps_float_constants))
04882     {
04883         ERR("Failed to initialize pixel shader constant heap\n");
04884         goto fail;
04885     }
04886 
04887     if (wine_rb_init(&priv->program_lookup, &wined3d_glsl_program_rb_functions) == -1)
04888     {
04889         ERR("Failed to initialize rbtree.\n");
04890         goto fail;
04891     }
04892 
04893     priv->next_constant_version = 1;
04894 
04895     device->shader_priv = priv;
04896     return WINED3D_OK;
04897 
04898 fail:
04899     constant_heap_free(&priv->pconst_heap);
04900     constant_heap_free(&priv->vconst_heap);
04901     HeapFree(GetProcessHeap(), 0, priv->stack);
04902     shader_buffer_free(&priv->shader_buffer);
04903     HeapFree(GetProcessHeap(), 0, priv);
04904     return E_OUTOFMEMORY;
04905 }
04906 
04907 /* Context activation is done by the caller. */
04908 static void shader_glsl_free(struct wined3d_device *device)
04909 {
04910     const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
04911     struct shader_glsl_priv *priv = device->shader_priv;
04912     int i;
04913 
04914     ENTER_GL();
04915     for (i = 0; i < tex_type_count; ++i)
04916     {
04917         if (priv->depth_blt_program_full[i])
04918         {
04919             GL_EXTCALL(glDeleteObjectARB(priv->depth_blt_program_full[i]));
04920         }
04921         if (priv->depth_blt_program_masked[i])
04922         {
04923             GL_EXTCALL(glDeleteObjectARB(priv->depth_blt_program_masked[i]));
04924         }
04925     }
04926     LEAVE_GL();
04927 
04928     wine_rb_destroy(&priv->program_lookup, NULL, NULL);
04929     constant_heap_free(&priv->pconst_heap);
04930     constant_heap_free(&priv->vconst_heap);
04931     HeapFree(GetProcessHeap(), 0, priv->stack);
04932     shader_buffer_free(&priv->shader_buffer);
04933 
04934     HeapFree(GetProcessHeap(), 0, device->shader_priv);
04935     device->shader_priv = NULL;
04936 }
04937 
04938 static void shader_glsl_context_destroyed(void *shader_priv, const struct wined3d_context *context) {}
04939 
04940 static void shader_glsl_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *caps)
04941 {
04942     if (gl_info->supported[EXT_GPU_SHADER4] && gl_info->supported[ARB_GEOMETRY_SHADER4]
04943             && gl_info->glsl_version >= MAKEDWORD_VERSION(1, 50))
04944     {
04945         caps->VertexShaderVersion = 4;
04946         caps->PixelShaderVersion = 4;
04947     }
04948     /* ARB_shader_texture_lod or EXT_gpu_shader4 is required for the SM3
04949      * texldd and texldl instructions. */
04950     else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD] || gl_info->supported[EXT_GPU_SHADER4])
04951     {
04952         caps->VertexShaderVersion = 3;
04953         caps->PixelShaderVersion = 3;
04954     }
04955     else
04956     {
04957         caps->VertexShaderVersion = 2;
04958         caps->PixelShaderVersion = 2;
04959     }
04960 
04961     caps->MaxVertexShaderConst = gl_info->limits.glsl_vs_float_constants;
04962     caps->MaxPixelShaderConst = gl_info->limits.glsl_ps_float_constants;
04963 
04964     /* FIXME: The following line is card dependent. -8.0 to 8.0 is the
04965      * Direct3D minimum requirement.
04966      *
04967      * Both GL_ARB_fragment_program and GLSL require a "maximum representable magnitude"
04968      * of colors to be 2^10, and 2^32 for other floats. Should we use 1024 here?
04969      *
04970      * The problem is that the refrast clamps temporary results in the shader to
04971      * [-MaxValue;+MaxValue]. If the card's max value is bigger than the one we advertize here,
04972      * then applications may miss the clamping behavior. On the other hand, if it is smaller,
04973      * the shader will generate incorrect results too. Unfortunately, GL deliberately doesn't
04974      * offer a way to query this.
04975      */
04976     caps->PixelShader1xMaxValue = 8.0;
04977 
04978     caps->VSClipping = TRUE;
04979 
04980     TRACE_(d3d_caps)("Hardware vertex shader version %u enabled (GLSL).\n",
04981             caps->VertexShaderVersion);
04982     TRACE_(d3d_caps)("Hardware pixel shader version %u enabled (GLSL).\n",
04983             caps->PixelShaderVersion);
04984 }
04985 
04986 static BOOL shader_glsl_color_fixup_supported(struct color_fixup_desc fixup)
04987 {
04988     if (TRACE_ON(d3d_shader) && TRACE_ON(d3d))
04989     {
04990         TRACE("Checking support for fixup:\n");
04991         dump_color_fixup_desc(fixup);
04992     }
04993 
04994     /* We support everything except YUV conversions. */
04995     if (!is_complex_fixup(fixup))
04996     {
04997         TRACE("[OK]\n");
04998         return TRUE;
04999     }
05000 
05001     TRACE("[FAILED]\n");
05002     return FALSE;
05003 }
05004 
05005 static const SHADER_HANDLER shader_glsl_instruction_handler_table[WINED3DSIH_TABLE_SIZE] =
05006 {
05007     /* WINED3DSIH_ABS           */ shader_glsl_map2gl,
05008     /* WINED3DSIH_ADD           */ shader_glsl_arith,
05009     /* WINED3DSIH_AND           */ NULL,
05010     /* WINED3DSIH_BEM           */ shader_glsl_bem,
05011     /* WINED3DSIH_BREAK         */ shader_glsl_break,
05012     /* WINED3DSIH_BREAKC        */ shader_glsl_breakc,
05013     /* WINED3DSIH_BREAKP        */ NULL,
05014     /* WINED3DSIH_CALL          */ shader_glsl_call,
05015     /* WINED3DSIH_CALLNZ        */ shader_glsl_callnz,
05016     /* WINED3DSIH_CMP           */ shader_glsl_cmp,
05017     /* WINED3DSIH_CND           */ shader_glsl_cnd,
05018     /* WINED3DSIH_CRS           */ shader_glsl_cross,
05019     /* WINED3DSIH_CUT           */ NULL,
05020     /* WINED3DSIH_DCL           */ NULL,
05021     /* WINED3DSIH_DEF           */ NULL,
05022     /* WINED3DSIH_DEFB          */ NULL,
05023     /* WINED3DSIH_DEFI          */ NULL,
05024     /* WINED3DSIH_DIV           */ NULL,
05025     /* WINED3DSIH_DP2ADD        */ shader_glsl_dp2add,
05026     /* WINED3DSIH_DP3           */ shader_glsl_dot,
05027     /* WINED3DSIH_DP4           */ shader_glsl_dot,
05028     /* WINED3DSIH_DST           */ shader_glsl_dst,
05029     /* WINED3DSIH_DSX           */ shader_glsl_map2gl,
05030     /* WINED3DSIH_DSY           */ shader_glsl_map2gl,
05031     /* WINED3DSIH_ELSE          */ shader_glsl_else,
05032     /* WINED3DSIH_EMIT          */ NULL,
05033     /* WINED3DSIH_ENDIF         */ shader_glsl_end,
05034     /* WINED3DSIH_ENDLOOP       */ shader_glsl_end,
05035     /* WINED3DSIH_ENDREP        */ shader_glsl_end,
05036     /* WINED3DSIH_EQ            */ NULL,
05037     /* WINED3DSIH_EXP           */ shader_glsl_map2gl,
05038     /* WINED3DSIH_EXPP          */ shader_glsl_expp,
05039     /* WINED3DSIH_FRC           */ shader_glsl_map2gl,
05040     /* WINED3DSIH_FTOI          */ NULL,
05041     /* WINED3DSIH_GE            */ NULL,
05042     /* WINED3DSIH_IADD          */ NULL,
05043     /* WINED3DSIH_IEQ           */ NULL,
05044     /* WINED3DSIH_IF            */ shader_glsl_if,
05045     /* WINED3DSIH_IFC           */ shader_glsl_ifc,
05046     /* WINED3DSIH_IGE           */ NULL,
05047     /* WINED3DSIH_IMUL          */ NULL,
05048     /* WINED3DSIH_ITOF          */ NULL,
05049     /* WINED3DSIH_LABEL         */ shader_glsl_label,
05050     /* WINED3DSIH_LD            */ NULL,
05051     /* WINED3DSIH_LIT           */ shader_glsl_lit,
05052     /* WINED3DSIH_LOG           */ shader_glsl_log,
05053     /* WINED3DSIH_LOGP          */ shader_glsl_log,
05054     /* WINED3DSIH_LOOP          */ shader_glsl_loop,
05055     /* WINED3DSIH_LRP           */ shader_glsl_lrp,
05056     /* WINED3DSIH_LT            */ NULL,
05057     /* WINED3DSIH_M3x2          */ shader_glsl_mnxn,
05058     /* WINED3DSIH_M3x3          */ shader_glsl_mnxn,
05059     /* WINED3DSIH_M3x4          */ shader_glsl_mnxn,
05060     /* WINED3DSIH_M4x3          */ shader_glsl_mnxn,
05061     /* WINED3DSIH_M4x4          */ shader_glsl_mnxn,
05062     /* WINED3DSIH_MAD           */ shader_glsl_mad,
05063     /* WINED3DSIH_MAX           */ shader_glsl_map2gl,
05064     /* WINED3DSIH_MIN           */ shader_glsl_map2gl,
05065     /* WINED3DSIH_MOV           */ shader_glsl_mov,
05066     /* WINED3DSIH_MOVA          */ shader_glsl_mov,
05067     /* WINED3DSIH_MOVC          */ NULL,
05068     /* WINED3DSIH_MUL           */ shader_glsl_arith,
05069     /* WINED3DSIH_NOP           */ NULL,
05070     /* WINED3DSIH_NRM           */ shader_glsl_nrm,
05071     /* WINED3DSIH_PHASE         */ NULL,
05072     /* WINED3DSIH_POW           */ shader_glsl_pow,
05073     /* WINED3DSIH_RCP           */ shader_glsl_rcp,
05074     /* WINED3DSIH_REP           */ shader_glsl_rep,
05075     /* WINED3DSIH_RET           */ shader_glsl_ret,
05076     /* WINED3DSIH_ROUND_NI      */ NULL,
05077     /* WINED3DSIH_RSQ           */ shader_glsl_rsq,
05078     /* WINED3DSIH_SAMPLE        */ NULL,
05079     /* WINED3DSIH_SAMPLE_GRAD   */ NULL,
05080     /* WINED3DSIH_SAMPLE_LOD    */ NULL,
05081     /* WINED3DSIH_SETP          */ NULL,
05082     /* WINED3DSIH_SGE           */ shader_glsl_compare,
05083     /* WINED3DSIH_SGN           */ shader_glsl_sgn,
05084     /* WINED3DSIH_SINCOS        */ shader_glsl_sincos,
05085     /* WINED3DSIH_SLT           */ shader_glsl_compare,
05086     /* WINED3DSIH_SQRT          */ NULL,
05087     /* WINED3DSIH_SUB           */ shader_glsl_arith,
05088     /* WINED3DSIH_TEX           */ shader_glsl_tex,
05089     /* WINED3DSIH_TEXBEM        */ shader_glsl_texbem,
05090     /* WINED3DSIH_TEXBEML       */ shader_glsl_texbem,
05091     /* WINED3DSIH_TEXCOORD      */ shader_glsl_texcoord,
05092     /* WINED3DSIH_TEXDEPTH      */ shader_glsl_texdepth,
05093     /* WINED3DSIH_TEXDP3        */ shader_glsl_texdp3,
05094     /* WINED3DSIH_TEXDP3TEX     */ shader_glsl_texdp3tex,
05095     /* WINED3DSIH_TEXKILL       */ shader_glsl_texkill,
05096     /* WINED3DSIH_TEXLDD        */ shader_glsl_texldd,
05097     /* WINED3DSIH_TEXLDL        */ shader_glsl_texldl,
05098     /* WINED3DSIH_TEXM3x2DEPTH  */ shader_glsl_texm3x2depth,
05099     /* WINED3DSIH_TEXM3x2PAD    */ shader_glsl_texm3x2pad,
05100     /* WINED3DSIH_TEXM3x2TEX    */ shader_glsl_texm3x2tex,
05101     /* WINED3DSIH_TEXM3x3       */ shader_glsl_texm3x3,
05102     /* WINED3DSIH_TEXM3x3DIFF   */ NULL,
05103     /* WINED3DSIH_TEXM3x3PAD    */ shader_glsl_texm3x3pad,
05104     /* WINED3DSIH_TEXM3x3SPEC   */ shader_glsl_texm3x3spec,
05105     /* WINED3DSIH_TEXM3x3TEX    */ shader_glsl_texm3x3tex,
05106     /* WINED3DSIH_TEXM3x3VSPEC  */ shader_glsl_texm3x3vspec,
05107     /* WINED3DSIH_TEXREG2AR     */ shader_glsl_texreg2ar,
05108     /* WINED3DSIH_TEXREG2GB     */ shader_glsl_texreg2gb,
05109     /* WINED3DSIH_TEXREG2RGB    */ shader_glsl_texreg2rgb,
05110     /* WINED3DSIH_UDIV          */ NULL,
05111     /* WINED3DSIH_USHR          */ NULL,
05112     /* WINED3DSIH_UTOF          */ NULL,
05113     /* WINED3DSIH_XOR           */ NULL,
05114 };
05115 
05116 static void shader_glsl_handle_instruction(const struct wined3d_shader_instruction *ins) {
05117     SHADER_HANDLER hw_fct;
05118 
05119     /* Select handler */
05120     hw_fct = shader_glsl_instruction_handler_table[ins->handler_idx];
05121 
05122     /* Unhandled opcode */
05123     if (!hw_fct)
05124     {
05125         FIXME("Backend can't handle opcode %#x\n", ins->handler_idx);
05126         return;
05127     }
05128     hw_fct(ins);
05129 
05130     shader_glsl_add_instruction_modifiers(ins);
05131 }
05132 
05133 const struct wined3d_shader_backend_ops glsl_shader_backend =
05134 {
05135     shader_glsl_handle_instruction,
05136     shader_glsl_select,
05137     shader_glsl_select_depth_blt,
05138     shader_glsl_deselect_depth_blt,
05139     shader_glsl_update_float_vertex_constants,
05140     shader_glsl_update_float_pixel_constants,
05141     shader_glsl_load_constants,
05142     shader_glsl_load_np2fixup_constants,
05143     shader_glsl_destroy,
05144     shader_glsl_alloc,
05145     shader_glsl_free,
05146     shader_glsl_context_destroyed,
05147     shader_glsl_get_caps,
05148     shader_glsl_color_fixup_supported,
05149 };

Generated on Mon May 28 2012 04:21:52 for ReactOS by doxygen 1.7.6.1

ReactOS is a registered trademark or a trademark of ReactOS Foundation in the United States and other countries.