Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenglsl_shader.c
Go to the documentation of this file.
00001 /* 00002 * GLSL pixel and vertex shader implementation 00003 * 00004 * Copyright 2006 Jason Green 00005 * Copyright 2006-2007 Henri Verbeet 00006 * Copyright 2007-2008 Stefan Dösinger for CodeWeavers 00007 * Copyright 2009-2011 Henri Verbeet for CodeWeavers 00008 * 00009 * This library is free software; you can redistribute it and/or 00010 * modify it under the terms of the GNU Lesser General Public 00011 * License as published by the Free Software Foundation; either 00012 * version 2.1 of the License, or (at your option) any later version. 00013 * 00014 * This library is distributed in the hope that it will be useful, 00015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00017 * Lesser General Public License for more details. 00018 * 00019 * You should have received a copy of the GNU Lesser General Public 00020 * License along with this library; if not, write to the Free Software 00021 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 00022 */ 00023 00024 /* 00025 * D3D shader asm has swizzles on source parameters, and write masks for 00026 * destination parameters. GLSL uses swizzles for both. The result of this is 00027 * that for example "mov dst.xw, src.zyxw" becomes "dst.xw = src.zw" in GLSL. 00028 * Ie, to generate a proper GLSL source swizzle, we need to take the D3D write 00029 * mask for the destination parameter into account. 00030 */ 00031 00032 #include "config.h" 00033 #include <limits.h> 00034 #include <stdio.h> 00035 #include "wined3d_private.h" 00036 00037 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader); 00038 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants); 00039 WINE_DECLARE_DEBUG_CHANNEL(d3d_caps); 00040 WINE_DECLARE_DEBUG_CHANNEL(d3d); 00041 00042 #define WINED3D_GLSL_SAMPLE_PROJECTED 0x1 00043 #define WINED3D_GLSL_SAMPLE_RECT 0x2 00044 #define WINED3D_GLSL_SAMPLE_LOD 0x4 00045 #define WINED3D_GLSL_SAMPLE_GRAD 0x8 00046 00047 struct glsl_dst_param 00048 { 00049 char reg_name[150]; 00050 char mask_str[6]; 00051 }; 00052 00053 struct glsl_src_param 00054 { 00055 char reg_name[150]; 00056 char param_str[200]; 00057 }; 00058 00059 struct glsl_sample_function 00060 { 00061 const char *name; 00062 DWORD coord_mask; 00063 }; 00064 00065 enum heap_node_op 00066 { 00067 HEAP_NODE_TRAVERSE_LEFT, 00068 HEAP_NODE_TRAVERSE_RIGHT, 00069 HEAP_NODE_POP, 00070 }; 00071 00072 struct constant_entry 00073 { 00074 unsigned int idx; 00075 unsigned int version; 00076 }; 00077 00078 struct constant_heap 00079 { 00080 struct constant_entry *entries; 00081 unsigned int *positions; 00082 unsigned int size; 00083 }; 00084 00085 /* GLSL shader private data */ 00086 struct shader_glsl_priv { 00087 struct wined3d_shader_buffer shader_buffer; 00088 struct wine_rb_tree program_lookup; 00089 struct glsl_shader_prog_link *glsl_program; 00090 struct constant_heap vconst_heap; 00091 struct constant_heap pconst_heap; 00092 unsigned char *stack; 00093 GLhandleARB depth_blt_program_full[tex_type_count]; 00094 GLhandleARB depth_blt_program_masked[tex_type_count]; 00095 UINT next_constant_version; 00096 }; 00097 00098 /* Struct to maintain data about a linked GLSL program */ 00099 struct glsl_shader_prog_link { 00100 struct wine_rb_entry program_lookup_entry; 00101 struct list vshader_entry; 00102 struct list pshader_entry; 00103 GLhandleARB programId; 00104 GLint *vuniformF_locations; 00105 GLint *puniformF_locations; 00106 GLint vuniformI_locations[MAX_CONST_I]; 00107 GLint puniformI_locations[MAX_CONST_I]; 00108 GLint posFixup_location; 00109 GLint np2Fixup_location; 00110 GLint bumpenvmat_location[MAX_TEXTURES]; 00111 GLint luminancescale_location[MAX_TEXTURES]; 00112 GLint luminanceoffset_location[MAX_TEXTURES]; 00113 GLint ycorrection_location; 00114 GLenum vertex_color_clamp; 00115 const struct wined3d_shader *vshader; 00116 const struct wined3d_shader *pshader; 00117 struct vs_compile_args vs_args; 00118 struct ps_compile_args ps_args; 00119 UINT constant_version; 00120 const struct ps_np2fixup_info *np2Fixup_info; 00121 }; 00122 00123 struct glsl_program_key 00124 { 00125 const struct wined3d_shader *vshader; 00126 const struct wined3d_shader *pshader; 00127 struct ps_compile_args ps_args; 00128 struct vs_compile_args vs_args; 00129 }; 00130 00131 struct shader_glsl_ctx_priv { 00132 const struct vs_compile_args *cur_vs_args; 00133 const struct ps_compile_args *cur_ps_args; 00134 struct ps_np2fixup_info *cur_np2fixup_info; 00135 }; 00136 00137 struct glsl_ps_compiled_shader 00138 { 00139 struct ps_compile_args args; 00140 struct ps_np2fixup_info np2fixup; 00141 GLhandleARB prgId; 00142 }; 00143 00144 struct glsl_pshader_private 00145 { 00146 struct glsl_ps_compiled_shader *gl_shaders; 00147 UINT num_gl_shaders, shader_array_size; 00148 }; 00149 00150 struct glsl_vs_compiled_shader 00151 { 00152 struct vs_compile_args args; 00153 GLhandleARB prgId; 00154 }; 00155 00156 struct glsl_vshader_private 00157 { 00158 struct glsl_vs_compiled_shader *gl_shaders; 00159 UINT num_gl_shaders, shader_array_size; 00160 }; 00161 00162 static const char *debug_gl_shader_type(GLenum type) 00163 { 00164 switch (type) 00165 { 00166 #define WINED3D_TO_STR(u) case u: return #u 00167 WINED3D_TO_STR(GL_VERTEX_SHADER_ARB); 00168 WINED3D_TO_STR(GL_GEOMETRY_SHADER_ARB); 00169 WINED3D_TO_STR(GL_FRAGMENT_SHADER_ARB); 00170 #undef WINED3D_TO_STR 00171 default: 00172 return wine_dbg_sprintf("UNKNOWN(%#x)", type); 00173 } 00174 } 00175 00176 /* Extract a line from the info log. 00177 * Note that this modifies the source string. */ 00178 static char *get_info_log_line(char **ptr) 00179 { 00180 char *p, *q; 00181 00182 p = *ptr; 00183 if (!(q = strstr(p, "\n"))) 00184 { 00185 if (!*p) return NULL; 00186 *ptr += strlen(p); 00187 return p; 00188 } 00189 *q = '\0'; 00190 *ptr = q + 1; 00191 00192 return p; 00193 } 00194 00196 /* GL locking is done by the caller */ 00197 static void print_glsl_info_log(const struct wined3d_gl_info *gl_info, GLhandleARB obj) 00198 { 00199 int infologLength = 0; 00200 char *infoLog; 00201 00202 if (!WARN_ON(d3d_shader) && !FIXME_ON(d3d_shader)) 00203 return; 00204 00205 GL_EXTCALL(glGetObjectParameterivARB(obj, 00206 GL_OBJECT_INFO_LOG_LENGTH_ARB, 00207 &infologLength)); 00208 00209 /* A size of 1 is just a null-terminated string, so the log should be bigger than 00210 * that if there are errors. */ 00211 if (infologLength > 1) 00212 { 00213 char *ptr, *line; 00214 00215 infoLog = HeapAlloc(GetProcessHeap(), 0, infologLength); 00216 /* The info log is supposed to be zero-terminated, but at least some 00217 * versions of fglrx don't terminate the string properly. The reported 00218 * length does include the terminator, so explicitly set it to zero 00219 * here. */ 00220 infoLog[infologLength - 1] = 0; 00221 GL_EXTCALL(glGetInfoLogARB(obj, infologLength, NULL, infoLog)); 00222 00223 ptr = infoLog; 00224 if (gl_info->quirks & WINED3D_QUIRK_INFO_LOG_SPAM) 00225 { 00226 WARN("Info log received from GLSL shader #%u:\n", obj); 00227 while ((line = get_info_log_line(&ptr))) WARN(" %s\n", line); 00228 } 00229 else 00230 { 00231 FIXME("Info log received from GLSL shader #%u:\n", obj); 00232 while ((line = get_info_log_line(&ptr))) FIXME(" %s\n", line); 00233 } 00234 HeapFree(GetProcessHeap(), 0, infoLog); 00235 } 00236 } 00237 00238 /* GL locking is done by the caller. */ 00239 static void shader_glsl_compile(const struct wined3d_gl_info *gl_info, GLhandleARB shader, const char *src) 00240 { 00241 TRACE("Compiling shader object %u.\n", shader); 00242 GL_EXTCALL(glShaderSourceARB(shader, 1, &src, NULL)); 00243 checkGLcall("glShaderSourceARB"); 00244 GL_EXTCALL(glCompileShaderARB(shader)); 00245 checkGLcall("glCompileShaderARB"); 00246 print_glsl_info_log(gl_info, shader); 00247 } 00248 00249 /* GL locking is done by the caller. */ 00250 static void shader_glsl_dump_program_source(const struct wined3d_gl_info *gl_info, GLhandleARB program) 00251 { 00252 GLint i, object_count, source_size = -1; 00253 GLhandleARB *objects; 00254 char *source = NULL; 00255 00256 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_ATTACHED_OBJECTS_ARB, &object_count)); 00257 objects = HeapAlloc(GetProcessHeap(), 0, object_count * sizeof(*objects)); 00258 if (!objects) 00259 { 00260 ERR("Failed to allocate object array memory.\n"); 00261 return; 00262 } 00263 00264 GL_EXTCALL(glGetAttachedObjectsARB(program, object_count, NULL, objects)); 00265 for (i = 0; i < object_count; ++i) 00266 { 00267 char *ptr, *line; 00268 GLint tmp; 00269 00270 GL_EXTCALL(glGetObjectParameterivARB(objects[i], GL_OBJECT_SHADER_SOURCE_LENGTH_ARB, &tmp)); 00271 00272 if (source_size < tmp) 00273 { 00274 HeapFree(GetProcessHeap(), 0, source); 00275 00276 source = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, tmp); 00277 if (!source) 00278 { 00279 ERR("Failed to allocate %d bytes for shader source.\n", tmp); 00280 HeapFree(GetProcessHeap(), 0, objects); 00281 return; 00282 } 00283 source_size = tmp; 00284 } 00285 00286 FIXME("Object %u:\n", objects[i]); 00287 GL_EXTCALL(glGetObjectParameterivARB(objects[i], GL_OBJECT_SUBTYPE_ARB, &tmp)); 00288 FIXME(" GL_OBJECT_SUBTYPE_ARB: %s.\n", debug_gl_shader_type(tmp)); 00289 GL_EXTCALL(glGetObjectParameterivARB(objects[i], GL_OBJECT_COMPILE_STATUS_ARB, &tmp)); 00290 FIXME(" GL_OBJECT_COMPILE_STATUS_ARB: %d.\n", tmp); 00291 FIXME("\n"); 00292 00293 ptr = source; 00294 GL_EXTCALL(glGetShaderSourceARB(objects[i], source_size, NULL, source)); 00295 while ((line = get_info_log_line(&ptr))) FIXME(" %s\n", line); 00296 FIXME("\n"); 00297 } 00298 00299 HeapFree(GetProcessHeap(), 0, source); 00300 HeapFree(GetProcessHeap(), 0, objects); 00301 } 00302 00303 /* GL locking is done by the caller. */ 00304 static void shader_glsl_validate_link(const struct wined3d_gl_info *gl_info, GLhandleARB program) 00305 { 00306 GLint tmp; 00307 00308 if (!TRACE_ON(d3d_shader) && !FIXME_ON(d3d_shader)) return; 00309 00310 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_TYPE_ARB, &tmp)); 00311 if (tmp == GL_PROGRAM_OBJECT_ARB) 00312 { 00313 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_LINK_STATUS_ARB, &tmp)); 00314 if (!tmp) 00315 { 00316 FIXME("Program %u link status invalid.\n", program); 00317 shader_glsl_dump_program_source(gl_info, program); 00318 } 00319 } 00320 00321 print_glsl_info_log(gl_info, program); 00322 } 00323 00327 /* GL locking is done by the caller */ 00328 static void shader_glsl_load_psamplers(const struct wined3d_gl_info *gl_info, 00329 const DWORD *tex_unit_map, GLhandleARB programId) 00330 { 00331 GLint name_loc; 00332 int i; 00333 char sampler_name[20]; 00334 00335 for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) { 00336 snprintf(sampler_name, sizeof(sampler_name), "Psampler%d", i); 00337 name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name)); 00338 if (name_loc != -1) { 00339 DWORD mapped_unit = tex_unit_map[i]; 00340 if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < gl_info->limits.fragment_samplers) 00341 { 00342 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit); 00343 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit)); 00344 checkGLcall("glUniform1iARB"); 00345 } else { 00346 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit); 00347 } 00348 } 00349 } 00350 } 00351 00352 /* GL locking is done by the caller */ 00353 static void shader_glsl_load_vsamplers(const struct wined3d_gl_info *gl_info, 00354 const DWORD *tex_unit_map, GLhandleARB programId) 00355 { 00356 GLint name_loc; 00357 char sampler_name[20]; 00358 int i; 00359 00360 for (i = 0; i < MAX_VERTEX_SAMPLERS; ++i) { 00361 snprintf(sampler_name, sizeof(sampler_name), "Vsampler%d", i); 00362 name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name)); 00363 if (name_loc != -1) { 00364 DWORD mapped_unit = tex_unit_map[MAX_FRAGMENT_SAMPLERS + i]; 00365 if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < gl_info->limits.combined_samplers) 00366 { 00367 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit); 00368 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit)); 00369 checkGLcall("glUniform1iARB"); 00370 } else { 00371 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit); 00372 } 00373 } 00374 } 00375 } 00376 00377 /* GL locking is done by the caller */ 00378 static inline void walk_constant_heap(const struct wined3d_gl_info *gl_info, const float *constants, 00379 const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version) 00380 { 00381 int stack_idx = 0; 00382 unsigned int heap_idx = 1; 00383 unsigned int idx; 00384 00385 if (heap->entries[heap_idx].version <= version) return; 00386 00387 idx = heap->entries[heap_idx].idx; 00388 if (constant_locations[idx] != -1) GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4])); 00389 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT; 00390 00391 while (stack_idx >= 0) 00392 { 00393 /* Note that we fall through to the next case statement. */ 00394 switch(stack[stack_idx]) 00395 { 00396 case HEAP_NODE_TRAVERSE_LEFT: 00397 { 00398 unsigned int left_idx = heap_idx << 1; 00399 if (left_idx < heap->size && heap->entries[left_idx].version > version) 00400 { 00401 heap_idx = left_idx; 00402 idx = heap->entries[heap_idx].idx; 00403 if (constant_locations[idx] != -1) 00404 GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4])); 00405 00406 stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT; 00407 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT; 00408 break; 00409 } 00410 } 00411 00412 case HEAP_NODE_TRAVERSE_RIGHT: 00413 { 00414 unsigned int right_idx = (heap_idx << 1) + 1; 00415 if (right_idx < heap->size && heap->entries[right_idx].version > version) 00416 { 00417 heap_idx = right_idx; 00418 idx = heap->entries[heap_idx].idx; 00419 if (constant_locations[idx] != -1) 00420 GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4])); 00421 00422 stack[stack_idx++] = HEAP_NODE_POP; 00423 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT; 00424 break; 00425 } 00426 } 00427 00428 case HEAP_NODE_POP: 00429 heap_idx >>= 1; 00430 --stack_idx; 00431 break; 00432 } 00433 } 00434 checkGLcall("walk_constant_heap()"); 00435 } 00436 00437 /* GL locking is done by the caller */ 00438 static inline void apply_clamped_constant(const struct wined3d_gl_info *gl_info, GLint location, const GLfloat *data) 00439 { 00440 GLfloat clamped_constant[4]; 00441 00442 if (location == -1) return; 00443 00444 clamped_constant[0] = data[0] < -1.0f ? -1.0f : data[0] > 1.0f ? 1.0f : data[0]; 00445 clamped_constant[1] = data[1] < -1.0f ? -1.0f : data[1] > 1.0f ? 1.0f : data[1]; 00446 clamped_constant[2] = data[2] < -1.0f ? -1.0f : data[2] > 1.0f ? 1.0f : data[2]; 00447 clamped_constant[3] = data[3] < -1.0f ? -1.0f : data[3] > 1.0f ? 1.0f : data[3]; 00448 00449 GL_EXTCALL(glUniform4fvARB(location, 1, clamped_constant)); 00450 } 00451 00452 /* GL locking is done by the caller */ 00453 static inline void walk_constant_heap_clamped(const struct wined3d_gl_info *gl_info, const float *constants, 00454 const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version) 00455 { 00456 int stack_idx = 0; 00457 unsigned int heap_idx = 1; 00458 unsigned int idx; 00459 00460 if (heap->entries[heap_idx].version <= version) return; 00461 00462 idx = heap->entries[heap_idx].idx; 00463 apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]); 00464 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT; 00465 00466 while (stack_idx >= 0) 00467 { 00468 /* Note that we fall through to the next case statement. */ 00469 switch(stack[stack_idx]) 00470 { 00471 case HEAP_NODE_TRAVERSE_LEFT: 00472 { 00473 unsigned int left_idx = heap_idx << 1; 00474 if (left_idx < heap->size && heap->entries[left_idx].version > version) 00475 { 00476 heap_idx = left_idx; 00477 idx = heap->entries[heap_idx].idx; 00478 apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]); 00479 00480 stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT; 00481 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT; 00482 break; 00483 } 00484 } 00485 00486 case HEAP_NODE_TRAVERSE_RIGHT: 00487 { 00488 unsigned int right_idx = (heap_idx << 1) + 1; 00489 if (right_idx < heap->size && heap->entries[right_idx].version > version) 00490 { 00491 heap_idx = right_idx; 00492 idx = heap->entries[heap_idx].idx; 00493 apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]); 00494 00495 stack[stack_idx++] = HEAP_NODE_POP; 00496 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT; 00497 break; 00498 } 00499 } 00500 00501 case HEAP_NODE_POP: 00502 heap_idx >>= 1; 00503 --stack_idx; 00504 break; 00505 } 00506 } 00507 checkGLcall("walk_constant_heap_clamped()"); 00508 } 00509 00510 /* Loads floating point constants (aka uniforms) into the currently set GLSL program. */ 00511 /* GL locking is done by the caller */ 00512 static void shader_glsl_load_constantsF(const struct wined3d_shader *shader, const struct wined3d_gl_info *gl_info, 00513 const float *constants, const GLint *constant_locations, const struct constant_heap *heap, 00514 unsigned char *stack, UINT version) 00515 { 00516 const struct wined3d_shader_lconst *lconst; 00517 00518 /* 1.X pshaders have the constants clamped to [-1;1] implicitly. */ 00519 if (shader->reg_maps.shader_version.major == 1 00520 && shader_is_pshader_version(shader->reg_maps.shader_version.type)) 00521 walk_constant_heap_clamped(gl_info, constants, constant_locations, heap, stack, version); 00522 else 00523 walk_constant_heap(gl_info, constants, constant_locations, heap, stack, version); 00524 00525 if (!shader->load_local_constsF) 00526 { 00527 TRACE("No need to load local float constants for this shader\n"); 00528 return; 00529 } 00530 00531 /* Immediate constants are clamped to [-1;1] at shader creation time if needed */ 00532 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 00533 { 00534 GLint location = constant_locations[lconst->idx]; 00535 /* We found this uniform name in the program - go ahead and send the data */ 00536 if (location != -1) GL_EXTCALL(glUniform4fvARB(location, 1, (const GLfloat *)lconst->value)); 00537 } 00538 checkGLcall("glUniform4fvARB()"); 00539 } 00540 00541 /* Loads integer constants (aka uniforms) into the currently set GLSL program. */ 00542 /* GL locking is done by the caller */ 00543 static void shader_glsl_load_constantsI(const struct wined3d_shader *shader, const struct wined3d_gl_info *gl_info, 00544 const GLint locations[MAX_CONST_I], const int *constants, WORD constants_set) 00545 { 00546 unsigned int i; 00547 struct list* ptr; 00548 00549 for (i = 0; constants_set; constants_set >>= 1, ++i) 00550 { 00551 if (!(constants_set & 1)) continue; 00552 00553 TRACE_(d3d_constants)("Loading constants %u: %i, %i, %i, %i\n", 00554 i, constants[i*4], constants[i*4+1], constants[i*4+2], constants[i*4+3]); 00555 00556 /* We found this uniform name in the program - go ahead and send the data */ 00557 GL_EXTCALL(glUniform4ivARB(locations[i], 1, &constants[i*4])); 00558 checkGLcall("glUniform4ivARB"); 00559 } 00560 00561 /* Load immediate constants */ 00562 ptr = list_head(&shader->constantsI); 00563 while (ptr) 00564 { 00565 const struct wined3d_shader_lconst *lconst = LIST_ENTRY(ptr, const struct wined3d_shader_lconst, entry); 00566 unsigned int idx = lconst->idx; 00567 const GLint *values = (const GLint *)lconst->value; 00568 00569 TRACE_(d3d_constants)("Loading local constants %i: %i, %i, %i, %i\n", idx, 00570 values[0], values[1], values[2], values[3]); 00571 00572 /* We found this uniform name in the program - go ahead and send the data */ 00573 GL_EXTCALL(glUniform4ivARB(locations[idx], 1, values)); 00574 checkGLcall("glUniform4ivARB"); 00575 ptr = list_next(&shader->constantsI, ptr); 00576 } 00577 } 00578 00579 /* Loads boolean constants (aka uniforms) into the currently set GLSL program. */ 00580 /* GL locking is done by the caller */ 00581 static void shader_glsl_load_constantsB(const struct wined3d_shader *shader, const struct wined3d_gl_info *gl_info, 00582 GLhandleARB programId, const BOOL *constants, WORD constants_set) 00583 { 00584 GLint tmp_loc; 00585 unsigned int i; 00586 char tmp_name[8]; 00587 const char *prefix; 00588 struct list* ptr; 00589 00590 switch (shader->reg_maps.shader_version.type) 00591 { 00592 case WINED3D_SHADER_TYPE_VERTEX: 00593 prefix = "VB"; 00594 break; 00595 00596 case WINED3D_SHADER_TYPE_GEOMETRY: 00597 prefix = "GB"; 00598 break; 00599 00600 case WINED3D_SHADER_TYPE_PIXEL: 00601 prefix = "PB"; 00602 break; 00603 00604 default: 00605 FIXME("Unknown shader type %#x.\n", 00606 shader->reg_maps.shader_version.type); 00607 prefix = "UB"; 00608 break; 00609 } 00610 00611 /* TODO: Benchmark and see if it would be beneficial to store the 00612 * locations of the constants to avoid looking up each time */ 00613 for (i = 0; constants_set; constants_set >>= 1, ++i) 00614 { 00615 if (!(constants_set & 1)) continue; 00616 00617 TRACE_(d3d_constants)("Loading constants %i: %i;\n", i, constants[i]); 00618 00619 /* TODO: Benchmark and see if it would be beneficial to store the 00620 * locations of the constants to avoid looking up each time */ 00621 snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, i); 00622 tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name)); 00623 if (tmp_loc != -1) 00624 { 00625 /* We found this uniform name in the program - go ahead and send the data */ 00626 GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, &constants[i])); 00627 checkGLcall("glUniform1ivARB"); 00628 } 00629 } 00630 00631 /* Load immediate constants */ 00632 ptr = list_head(&shader->constantsB); 00633 while (ptr) 00634 { 00635 const struct wined3d_shader_lconst *lconst = LIST_ENTRY(ptr, const struct wined3d_shader_lconst, entry); 00636 unsigned int idx = lconst->idx; 00637 const GLint *values = (const GLint *)lconst->value; 00638 00639 TRACE_(d3d_constants)("Loading local constants %i: %i\n", idx, values[0]); 00640 00641 snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, idx); 00642 tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name)); 00643 if (tmp_loc != -1) { 00644 /* We found this uniform name in the program - go ahead and send the data */ 00645 GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, values)); 00646 checkGLcall("glUniform1ivARB"); 00647 } 00648 ptr = list_next(&shader->constantsB, ptr); 00649 } 00650 } 00651 00652 static void reset_program_constant_version(struct wine_rb_entry *entry, void *context) 00653 { 00654 WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry)->constant_version = 0; 00655 } 00656 00660 /* GL locking is done by the caller (state handler) */ 00661 static void shader_glsl_load_np2fixup_constants(void *shader_priv, 00662 const struct wined3d_gl_info *gl_info, const struct wined3d_state *state) 00663 { 00664 struct shader_glsl_priv *glsl_priv = shader_priv; 00665 const struct glsl_shader_prog_link *prog = glsl_priv->glsl_program; 00666 00667 /* No GLSL program set - nothing to do. */ 00668 if (!prog) return; 00669 00670 /* NP2 texcoord fixup is (currently) only done for pixelshaders. */ 00671 if (!use_ps(state)) return; 00672 00673 if (prog->ps_args.np2_fixup && prog->np2Fixup_location != -1) 00674 { 00675 UINT i; 00676 UINT fixup = prog->ps_args.np2_fixup; 00677 GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS]; 00678 00679 for (i = 0; fixup; fixup >>= 1, ++i) 00680 { 00681 const struct wined3d_texture *tex = state->textures[i]; 00682 const unsigned char idx = prog->np2Fixup_info->idx[i]; 00683 GLfloat *tex_dim = &np2fixup_constants[(idx >> 1) * 4]; 00684 00685 if (!tex) 00686 { 00687 ERR("Nonexistent texture is flagged for NP2 texcoord fixup.\n"); 00688 continue; 00689 } 00690 00691 if (idx % 2) 00692 { 00693 tex_dim[2] = tex->pow2_matrix[0]; 00694 tex_dim[3] = tex->pow2_matrix[5]; 00695 } 00696 else 00697 { 00698 tex_dim[0] = tex->pow2_matrix[0]; 00699 tex_dim[1] = tex->pow2_matrix[5]; 00700 } 00701 } 00702 00703 GL_EXTCALL(glUniform4fvARB(prog->np2Fixup_location, prog->np2Fixup_info->num_consts, np2fixup_constants)); 00704 } 00705 } 00706 00710 /* GL locking is done by the caller (state handler) */ 00711 static void shader_glsl_load_constants(const struct wined3d_context *context, 00712 char usePixelShader, char useVertexShader) 00713 { 00714 const struct wined3d_gl_info *gl_info = context->gl_info; 00715 struct wined3d_device *device = context->swapchain->device; 00716 struct wined3d_stateblock *stateBlock = device->stateBlock; 00717 const struct wined3d_state *state = &stateBlock->state; 00718 struct shader_glsl_priv *priv = device->shader_priv; 00719 float position_fixup[4]; 00720 00721 GLhandleARB programId; 00722 struct glsl_shader_prog_link *prog = priv->glsl_program; 00723 UINT constant_version; 00724 int i; 00725 00726 if (!prog) { 00727 /* No GLSL program set - nothing to do. */ 00728 return; 00729 } 00730 programId = prog->programId; 00731 constant_version = prog->constant_version; 00732 00733 if (useVertexShader) 00734 { 00735 const struct wined3d_shader *vshader = state->vertex_shader; 00736 00737 /* Load DirectX 9 float constants/uniforms for vertex shader */ 00738 shader_glsl_load_constantsF(vshader, gl_info, state->vs_consts_f, 00739 prog->vuniformF_locations, &priv->vconst_heap, priv->stack, constant_version); 00740 00741 /* Load DirectX 9 integer constants/uniforms for vertex shader */ 00742 shader_glsl_load_constantsI(vshader, gl_info, prog->vuniformI_locations, state->vs_consts_i, 00743 stateBlock->changed.vertexShaderConstantsI & vshader->reg_maps.integer_constants); 00744 00745 /* Load DirectX 9 boolean constants/uniforms for vertex shader */ 00746 shader_glsl_load_constantsB(vshader, gl_info, programId, state->vs_consts_b, 00747 stateBlock->changed.vertexShaderConstantsB & vshader->reg_maps.boolean_constants); 00748 00749 /* Upload the position fixup params */ 00750 shader_get_position_fixup(context, state, position_fixup); 00751 GL_EXTCALL(glUniform4fvARB(prog->posFixup_location, 1, position_fixup)); 00752 checkGLcall("glUniform4fvARB"); 00753 } 00754 00755 if (usePixelShader) 00756 { 00757 const struct wined3d_shader *pshader = state->pixel_shader; 00758 00759 /* Load DirectX 9 float constants/uniforms for pixel shader */ 00760 shader_glsl_load_constantsF(pshader, gl_info, state->ps_consts_f, 00761 prog->puniformF_locations, &priv->pconst_heap, priv->stack, constant_version); 00762 00763 /* Load DirectX 9 integer constants/uniforms for pixel shader */ 00764 shader_glsl_load_constantsI(pshader, gl_info, prog->puniformI_locations, state->ps_consts_i, 00765 stateBlock->changed.pixelShaderConstantsI & pshader->reg_maps.integer_constants); 00766 00767 /* Load DirectX 9 boolean constants/uniforms for pixel shader */ 00768 shader_glsl_load_constantsB(pshader, gl_info, programId, state->ps_consts_b, 00769 stateBlock->changed.pixelShaderConstantsB & pshader->reg_maps.boolean_constants); 00770 00771 /* Upload the environment bump map matrix if needed. The needsbumpmat member specifies the texture stage to load the matrix from. 00772 * It can't be 0 for a valid texbem instruction. 00773 */ 00774 for(i = 0; i < MAX_TEXTURES; i++) { 00775 const float *data; 00776 00777 if(prog->bumpenvmat_location[i] == -1) continue; 00778 00779 data = (const float *)&state->texture_states[i][WINED3D_TSS_BUMPENV_MAT00]; 00780 GL_EXTCALL(glUniformMatrix2fvARB(prog->bumpenvmat_location[i], 1, 0, data)); 00781 checkGLcall("glUniformMatrix2fvARB"); 00782 00783 /* texbeml needs the luminance scale and offset too. If texbeml 00784 * is used, needsbumpmat is set too, so we can check that in the 00785 * needsbumpmat check. */ 00786 if (prog->luminancescale_location[i] != -1) 00787 { 00788 const GLfloat *scale = (const GLfloat *)&state->texture_states[i][WINED3D_TSS_BUMPENV_LSCALE]; 00789 const GLfloat *offset = (const GLfloat *)&state->texture_states[i][WINED3D_TSS_BUMPENV_LOFFSET]; 00790 00791 GL_EXTCALL(glUniform1fvARB(prog->luminancescale_location[i], 1, scale)); 00792 checkGLcall("glUniform1fvARB"); 00793 GL_EXTCALL(glUniform1fvARB(prog->luminanceoffset_location[i], 1, offset)); 00794 checkGLcall("glUniform1fvARB"); 00795 } 00796 } 00797 00798 if (prog->ycorrection_location != -1) 00799 { 00800 float correction_params[4]; 00801 00802 if (context->render_offscreen) 00803 { 00804 correction_params[0] = 0.0f; 00805 correction_params[1] = 1.0f; 00806 } else { 00807 /* position is window relative, not viewport relative */ 00808 correction_params[0] = (float) context->current_rt->resource.height; 00809 correction_params[1] = -1.0f; 00810 } 00811 GL_EXTCALL(glUniform4fvARB(prog->ycorrection_location, 1, correction_params)); 00812 } 00813 } 00814 00815 if (priv->next_constant_version == UINT_MAX) 00816 { 00817 TRACE("Max constant version reached, resetting to 0.\n"); 00818 wine_rb_for_each_entry(&priv->program_lookup, reset_program_constant_version, NULL); 00819 priv->next_constant_version = 1; 00820 } 00821 else 00822 { 00823 prog->constant_version = priv->next_constant_version++; 00824 } 00825 } 00826 00827 static void update_heap_entry(const struct constant_heap *heap, unsigned int idx, 00828 unsigned int heap_idx, DWORD new_version) 00829 { 00830 struct constant_entry *entries = heap->entries; 00831 unsigned int *positions = heap->positions; 00832 unsigned int parent_idx; 00833 00834 while (heap_idx > 1) 00835 { 00836 parent_idx = heap_idx >> 1; 00837 00838 if (new_version <= entries[parent_idx].version) break; 00839 00840 entries[heap_idx] = entries[parent_idx]; 00841 positions[entries[parent_idx].idx] = heap_idx; 00842 heap_idx = parent_idx; 00843 } 00844 00845 entries[heap_idx].version = new_version; 00846 entries[heap_idx].idx = idx; 00847 positions[idx] = heap_idx; 00848 } 00849 00850 static void shader_glsl_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count) 00851 { 00852 struct shader_glsl_priv *priv = device->shader_priv; 00853 struct constant_heap *heap = &priv->vconst_heap; 00854 UINT i; 00855 00856 for (i = start; i < count + start; ++i) 00857 { 00858 if (!device->stateBlock->changed.vertexShaderConstantsF[i]) 00859 update_heap_entry(heap, i, heap->size++, priv->next_constant_version); 00860 else 00861 update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version); 00862 } 00863 } 00864 00865 static void shader_glsl_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count) 00866 { 00867 struct shader_glsl_priv *priv = device->shader_priv; 00868 struct constant_heap *heap = &priv->pconst_heap; 00869 UINT i; 00870 00871 for (i = start; i < count + start; ++i) 00872 { 00873 if (!device->stateBlock->changed.pixelShaderConstantsF[i]) 00874 update_heap_entry(heap, i, heap->size++, priv->next_constant_version); 00875 else 00876 update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version); 00877 } 00878 } 00879 00880 static unsigned int vec4_varyings(DWORD shader_major, const struct wined3d_gl_info *gl_info) 00881 { 00882 unsigned int ret = gl_info->limits.glsl_varyings / 4; 00883 /* 4.0 shaders do not write clip coords because d3d10 does not support user clipplanes */ 00884 if(shader_major > 3) return ret; 00885 00886 /* 3.0 shaders may need an extra varying for the clip coord on some cards(mostly dx10 ones) */ 00887 if (gl_info->quirks & WINED3D_QUIRK_GLSL_CLIP_VARYING) ret -= 1; 00888 return ret; 00889 } 00890 00892 static void shader_generate_glsl_declarations(const struct wined3d_context *context, 00893 struct wined3d_shader_buffer *buffer, const struct wined3d_shader *shader, 00894 const struct wined3d_shader_reg_maps *reg_maps, const struct shader_glsl_ctx_priv *ctx_priv) 00895 { 00896 const struct wined3d_state *state = &shader->device->stateBlock->state; 00897 const struct ps_compile_args *ps_args = ctx_priv->cur_ps_args; 00898 const struct wined3d_gl_info *gl_info = context->gl_info; 00899 const struct wined3d_fb_state *fb = &shader->device->fb; 00900 unsigned int i, extra_constants_needed = 0; 00901 const struct wined3d_shader_lconst *lconst; 00902 DWORD map; 00903 00904 /* There are some minor differences between pixel and vertex shaders */ 00905 char pshader = shader_is_pshader_version(reg_maps->shader_version.type); 00906 char prefix = pshader ? 'P' : 'V'; 00907 00908 /* Prototype the subroutines */ 00909 for (i = 0, map = reg_maps->labels; map; map >>= 1, ++i) 00910 { 00911 if (map & 1) shader_addline(buffer, "void subroutine%u();\n", i); 00912 } 00913 00914 /* Declare the constants (aka uniforms) */ 00915 if (shader->limits.constant_float > 0) 00916 { 00917 unsigned max_constantsF; 00918 /* Unless the shader uses indirect addressing, always declare the maximum array size and ignore that we need some 00919 * uniforms privately. E.g. if GL supports 256 uniforms, and we need 2 for the pos fixup and immediate values, still 00920 * declare VC[256]. If the shader needs more uniforms than we have it won't work in any case. If it uses less, the 00921 * compiler will figure out which uniforms are really used and strip them out. This allows a shader to use c255 on 00922 * a dx9 card, as long as it doesn't also use all the other constants. 00923 * 00924 * If the shader uses indirect addressing the compiler must assume that all declared uniforms are used. In this case, 00925 * declare only the amount that we're assured to have. 00926 * 00927 * Thus we run into problems in these two cases: 00928 * 1) The shader really uses more uniforms than supported 00929 * 2) The shader uses indirect addressing, less constants than supported, but uses a constant index > #supported consts 00930 */ 00931 if (pshader) 00932 { 00933 /* No indirect addressing here. */ 00934 max_constantsF = gl_info->limits.glsl_ps_float_constants; 00935 } 00936 else 00937 { 00938 if (reg_maps->usesrelconstF) 00939 { 00940 /* Subtract the other potential uniforms from the max 00941 * available (bools, ints, and 1 row of projection matrix). 00942 * Subtract another uniform for immediate values, which have 00943 * to be loaded via uniform by the driver as well. The shader 00944 * code only uses 0.5, 2.0, 1.0, 128 and -128 in vertex 00945 * shader code, so one vec4 should be enough. (Unfortunately 00946 * the Nvidia driver doesn't store 128 and -128 in one float). 00947 * 00948 * Writing gl_ClipVertex requires one uniform for each 00949 * clipplane as well. */ 00950 max_constantsF = gl_info->limits.glsl_vs_float_constants - 3; 00951 if(ctx_priv->cur_vs_args->clip_enabled) 00952 { 00953 max_constantsF -= gl_info->limits.clipplanes; 00954 } 00955 max_constantsF -= count_bits(reg_maps->integer_constants); 00956 /* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly, 00957 * so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but 00958 * for now take this into account when calculating the number of available constants 00959 */ 00960 max_constantsF -= count_bits(reg_maps->boolean_constants); 00961 /* Set by driver quirks in directx.c */ 00962 max_constantsF -= gl_info->reserved_glsl_constants; 00963 } 00964 else 00965 { 00966 max_constantsF = gl_info->limits.glsl_vs_float_constants; 00967 } 00968 } 00969 max_constantsF = min(shader->limits.constant_float, max_constantsF); 00970 shader_addline(buffer, "uniform vec4 %cC[%u];\n", prefix, max_constantsF); 00971 } 00972 00973 /* Always declare the full set of constants, the compiler can remove the 00974 * unused ones because d3d doesn't (yet) support indirect int and bool 00975 * constant addressing. This avoids problems if the app uses e.g. i0 and i9. */ 00976 if (shader->limits.constant_int > 0 && reg_maps->integer_constants) 00977 shader_addline(buffer, "uniform ivec4 %cI[%u];\n", prefix, shader->limits.constant_int); 00978 00979 if (shader->limits.constant_bool > 0 && reg_maps->boolean_constants) 00980 shader_addline(buffer, "uniform bool %cB[%u];\n", prefix, shader->limits.constant_bool); 00981 00982 if (!pshader) 00983 { 00984 shader_addline(buffer, "uniform vec4 posFixup;\n"); 00985 shader_addline(buffer, "void order_ps_input(in vec4[%u]);\n", MAX_REG_OUTPUT); 00986 } 00987 else 00988 { 00989 for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i) 00990 { 00991 if (!(map & 1)) continue; 00992 00993 shader_addline(buffer, "uniform mat2 bumpenvmat%d;\n", i); 00994 00995 if (reg_maps->luminanceparams & (1 << i)) 00996 { 00997 shader_addline(buffer, "uniform float luminancescale%d;\n", i); 00998 shader_addline(buffer, "uniform float luminanceoffset%d;\n", i); 00999 extra_constants_needed++; 01000 } 01001 01002 extra_constants_needed++; 01003 } 01004 01005 if (ps_args->srgb_correction) 01006 { 01007 shader_addline(buffer, "const vec4 srgb_const0 = vec4(%.8e, %.8e, %.8e, %.8e);\n", 01008 srgb_pow, srgb_mul_high, srgb_sub_high, srgb_mul_low); 01009 shader_addline(buffer, "const vec4 srgb_const1 = vec4(%.8e, 0.0, 0.0, 0.0);\n", 01010 srgb_cmp); 01011 } 01012 if (reg_maps->vpos || reg_maps->usesdsy) 01013 { 01014 if (shader->limits.constant_float + extra_constants_needed 01015 + 1 < gl_info->limits.glsl_ps_float_constants) 01016 { 01017 shader_addline(buffer, "uniform vec4 ycorrection;\n"); 01018 extra_constants_needed++; 01019 } 01020 else 01021 { 01022 /* This happens because we do not have proper tracking of the constant registers that are 01023 * actually used, only the max limit of the shader version 01024 */ 01025 FIXME("Cannot find a free uniform for vpos correction params\n"); 01026 shader_addline(buffer, "const vec4 ycorrection = vec4(%f, %f, 0.0, 0.0);\n", 01027 context->render_offscreen ? 0.0f : fb->render_targets[0]->resource.height, 01028 context->render_offscreen ? 1.0f : -1.0f); 01029 } 01030 shader_addline(buffer, "vec4 vpos;\n"); 01031 } 01032 } 01033 01034 /* Declare texture samplers */ 01035 for (i = 0; i < shader->limits.sampler; ++i) 01036 { 01037 if (reg_maps->sampler_type[i]) 01038 { 01039 const struct wined3d_texture *texture; 01040 01041 switch (reg_maps->sampler_type[i]) 01042 { 01043 case WINED3DSTT_1D: 01044 if (pshader && ps_args->shadow & (1 << i)) 01045 shader_addline(buffer, "uniform sampler1DShadow %csampler%u;\n", prefix, i); 01046 else 01047 shader_addline(buffer, "uniform sampler1D %csampler%u;\n", prefix, i); 01048 break; 01049 case WINED3DSTT_2D: 01050 texture = state->textures[i]; 01051 if (pshader && ps_args->shadow & (1 << i)) 01052 { 01053 if (texture && texture->target == GL_TEXTURE_RECTANGLE_ARB) 01054 shader_addline(buffer, "uniform sampler2DRectShadow %csampler%u;\n", prefix, i); 01055 else 01056 shader_addline(buffer, "uniform sampler2DShadow %csampler%u;\n", prefix, i); 01057 } 01058 else 01059 { 01060 if (texture && texture->target == GL_TEXTURE_RECTANGLE_ARB) 01061 shader_addline(buffer, "uniform sampler2DRect %csampler%u;\n", prefix, i); 01062 else 01063 shader_addline(buffer, "uniform sampler2D %csampler%u;\n", prefix, i); 01064 } 01065 break; 01066 case WINED3DSTT_CUBE: 01067 if (pshader && ps_args->shadow & (1 << i)) FIXME("Unsupported Cube shadow sampler.\n"); 01068 shader_addline(buffer, "uniform samplerCube %csampler%u;\n", prefix, i); 01069 break; 01070 case WINED3DSTT_VOLUME: 01071 if (pshader && ps_args->shadow & (1 << i)) FIXME("Unsupported 3D shadow sampler.\n"); 01072 shader_addline(buffer, "uniform sampler3D %csampler%u;\n", prefix, i); 01073 break; 01074 default: 01075 shader_addline(buffer, "uniform unsupported_sampler %csampler%u;\n", prefix, i); 01076 FIXME("Unrecognized sampler type: %#x\n", reg_maps->sampler_type[i]); 01077 break; 01078 } 01079 } 01080 } 01081 01082 /* Declare uniforms for NP2 texcoord fixup: 01083 * This is NOT done inside the loop that declares the texture samplers since the NP2 fixup code 01084 * is currently only used for the GeforceFX series and when forcing the ARB_npot extension off. 01085 * Modern cards just skip the code anyway, so put it inside a separate loop. */ 01086 if (pshader && ps_args->np2_fixup) { 01087 01088 struct ps_np2fixup_info* const fixup = ctx_priv->cur_np2fixup_info; 01089 UINT cur = 0; 01090 01091 /* NP2/RECT textures in OpenGL use texcoords in the range [0,width]x[0,height] 01092 * while D3D has them in the (normalized) [0,1]x[0,1] range. 01093 * samplerNP2Fixup stores texture dimensions and is updated through 01094 * shader_glsl_load_np2fixup_constants when the sampler changes. */ 01095 01096 for (i = 0; i < shader->limits.sampler; ++i) 01097 { 01098 if (reg_maps->sampler_type[i]) 01099 { 01100 if (!(ps_args->np2_fixup & (1 << i))) continue; 01101 01102 if (WINED3DSTT_2D != reg_maps->sampler_type[i]) { 01103 FIXME("Non-2D texture is flagged for NP2 texcoord fixup.\n"); 01104 continue; 01105 } 01106 01107 fixup->idx[i] = cur++; 01108 } 01109 } 01110 01111 fixup->num_consts = (cur + 1) >> 1; 01112 shader_addline(buffer, "uniform vec4 %csamplerNP2Fixup[%u];\n", prefix, fixup->num_consts); 01113 } 01114 01115 /* Declare address variables */ 01116 for (i = 0, map = reg_maps->address; map; map >>= 1, ++i) 01117 { 01118 if (map & 1) shader_addline(buffer, "ivec4 A%u;\n", i); 01119 } 01120 01121 /* Declare texture coordinate temporaries and initialize them */ 01122 for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i) 01123 { 01124 if (map & 1) shader_addline(buffer, "vec4 T%u = gl_TexCoord[%u];\n", i, i); 01125 } 01126 01127 /* Declare input register varyings. Only pixel shader, vertex shaders have that declared in the 01128 * helper function shader that is linked in at link time 01129 */ 01130 if (pshader && reg_maps->shader_version.major >= 3) 01131 { 01132 UINT in_count = min(vec4_varyings(reg_maps->shader_version.major, gl_info), shader->limits.packed_input); 01133 01134 if (use_vs(state)) 01135 shader_addline(buffer, "varying vec4 IN[%u];\n", in_count); 01136 else 01137 /* TODO: Write a replacement shader for the fixed function vertex pipeline, so this isn't needed. 01138 * For fixed function vertex processing + 3.0 pixel shader we need a separate function in the 01139 * pixel shader that reads the fixed function color into the packed input registers. */ 01140 shader_addline(buffer, "vec4 IN[%u];\n", in_count); 01141 } 01142 01143 /* Declare output register temporaries */ 01144 if (shader->limits.packed_output) 01145 shader_addline(buffer, "vec4 OUT[%u];\n", shader->limits.packed_output); 01146 01147 /* Declare temporary variables */ 01148 for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i) 01149 { 01150 if (map & 1) shader_addline(buffer, "vec4 R%u;\n", i); 01151 } 01152 01153 /* Declare attributes */ 01154 if (reg_maps->shader_version.type == WINED3D_SHADER_TYPE_VERTEX) 01155 { 01156 for (i = 0, map = reg_maps->input_registers; map; map >>= 1, ++i) 01157 { 01158 if (map & 1) shader_addline(buffer, "attribute vec4 attrib%i;\n", i); 01159 } 01160 } 01161 01162 /* Declare loop registers aLx */ 01163 for (i = 0; i < reg_maps->loop_depth; i++) { 01164 shader_addline(buffer, "int aL%u;\n", i); 01165 shader_addline(buffer, "int tmpInt%u;\n", i); 01166 } 01167 01168 /* Temporary variables for matrix operations */ 01169 shader_addline(buffer, "vec4 tmp0;\n"); 01170 shader_addline(buffer, "vec4 tmp1;\n"); 01171 01172 /* Local constants use a different name so they can be loaded once at shader link time 01173 * They can't be hardcoded into the shader text via LC = {x, y, z, w}; because the 01174 * float -> string conversion can cause precision loss. 01175 */ 01176 if (!shader->load_local_constsF) 01177 { 01178 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 01179 { 01180 shader_addline(buffer, "uniform vec4 %cLC%u;\n", prefix, lconst->idx); 01181 } 01182 } 01183 01184 /* Start the main program */ 01185 shader_addline(buffer, "void main() {\n"); 01186 if(pshader && reg_maps->vpos) { 01187 /* DirectX apps expect integer values, while OpenGL drivers add approximately 0.5. This causes 01188 * off-by-one problems as spotted by the vPos d3d9 visual test. Unfortunately the ATI cards do 01189 * not add exactly 0.5, but rather something like 0.49999999 or 0.50000001, which still causes 01190 * precision troubles when we just subtract 0.5. 01191 * 01192 * To deal with that just floor() the position. This will eliminate the fraction on all cards. 01193 * 01194 * TODO: Test how that behaves with multisampling once we can enable multisampling in winex11. 01195 * 01196 * An advantage of floor is that it works even if the driver doesn't add 1/2. It is somewhat 01197 * questionable if 1.5, 2.5, ... are the proper values to return in gl_FragCoord, even though 01198 * coordinates specify the pixel centers instead of the pixel corners. This code will behave 01199 * correctly on drivers that returns integer values. 01200 */ 01201 shader_addline(buffer, "vpos = floor(vec4(0, ycorrection[0], 0, 0) + gl_FragCoord * vec4(1, ycorrection[1], 1, 1));\n"); 01202 } 01203 } 01204 01205 /***************************************************************************** 01206 * Functions to generate GLSL strings from DirectX Shader bytecode begin here. 01207 * 01208 * For more information, see http://wiki.winehq.org/DirectX-Shaders 01209 ****************************************************************************/ 01210 01211 /* Prototypes */ 01212 static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins, 01213 const struct wined3d_shader_src_param *wined3d_src, DWORD mask, struct glsl_src_param *glsl_src); 01214 01216 static const char * const shift_glsl_tab[] = { 01217 "", /* 0 (none) */ 01218 "2.0 * ", /* 1 (x2) */ 01219 "4.0 * ", /* 2 (x4) */ 01220 "8.0 * ", /* 3 (x8) */ 01221 "16.0 * ", /* 4 (x16) */ 01222 "32.0 * ", /* 5 (x32) */ 01223 "", /* 6 (x64) */ 01224 "", /* 7 (x128) */ 01225 "", /* 8 (d256) */ 01226 "", /* 9 (d128) */ 01227 "", /* 10 (d64) */ 01228 "", /* 11 (d32) */ 01229 "0.0625 * ", /* 12 (d16) */ 01230 "0.125 * ", /* 13 (d8) */ 01231 "0.25 * ", /* 14 (d4) */ 01232 "0.5 * " /* 15 (d2) */ 01233 }; 01234 01235 /* Generate a GLSL parameter that does the input modifier computation and return the input register/mask to use */ 01236 static void shader_glsl_gen_modifier(enum wined3d_shader_src_modifier src_modifier, 01237 const char *in_reg, const char *in_regswizzle, char *out_str) 01238 { 01239 out_str[0] = 0; 01240 01241 switch (src_modifier) 01242 { 01243 case WINED3DSPSM_DZ: /* Need to handle this in the instructions itself (texld & texcrd). */ 01244 case WINED3DSPSM_DW: 01245 case WINED3DSPSM_NONE: 01246 sprintf(out_str, "%s%s", in_reg, in_regswizzle); 01247 break; 01248 case WINED3DSPSM_NEG: 01249 sprintf(out_str, "-%s%s", in_reg, in_regswizzle); 01250 break; 01251 case WINED3DSPSM_NOT: 01252 sprintf(out_str, "!%s%s", in_reg, in_regswizzle); 01253 break; 01254 case WINED3DSPSM_BIAS: 01255 sprintf(out_str, "(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle); 01256 break; 01257 case WINED3DSPSM_BIASNEG: 01258 sprintf(out_str, "-(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle); 01259 break; 01260 case WINED3DSPSM_SIGN: 01261 sprintf(out_str, "(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle); 01262 break; 01263 case WINED3DSPSM_SIGNNEG: 01264 sprintf(out_str, "-(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle); 01265 break; 01266 case WINED3DSPSM_COMP: 01267 sprintf(out_str, "(1.0 - %s%s)", in_reg, in_regswizzle); 01268 break; 01269 case WINED3DSPSM_X2: 01270 sprintf(out_str, "(2.0 * %s%s)", in_reg, in_regswizzle); 01271 break; 01272 case WINED3DSPSM_X2NEG: 01273 sprintf(out_str, "-(2.0 * %s%s)", in_reg, in_regswizzle); 01274 break; 01275 case WINED3DSPSM_ABS: 01276 sprintf(out_str, "abs(%s%s)", in_reg, in_regswizzle); 01277 break; 01278 case WINED3DSPSM_ABSNEG: 01279 sprintf(out_str, "-abs(%s%s)", in_reg, in_regswizzle); 01280 break; 01281 default: 01282 FIXME("Unhandled modifier %u\n", src_modifier); 01283 sprintf(out_str, "%s%s", in_reg, in_regswizzle); 01284 } 01285 } 01286 01289 static void shader_glsl_get_register_name(const struct wined3d_shader_register *reg, 01290 char *register_name, BOOL *is_color, const struct wined3d_shader_instruction *ins) 01291 { 01292 /* oPos, oFog and oPts in D3D */ 01293 static const char * const hwrastout_reg_names[] = {"OUT[10]", "OUT[11].x", "OUT[11].y"}; 01294 01295 const struct wined3d_shader *shader = ins->ctx->shader; 01296 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 01297 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; 01298 char pshader = shader_is_pshader_version(reg_maps->shader_version.type); 01299 01300 *is_color = FALSE; 01301 01302 switch (reg->type) 01303 { 01304 case WINED3DSPR_TEMP: 01305 sprintf(register_name, "R%u", reg->idx); 01306 break; 01307 01308 case WINED3DSPR_INPUT: 01309 /* vertex shaders */ 01310 if (!pshader) 01311 { 01312 struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; 01313 if (priv->cur_vs_args->swizzle_map & (1 << reg->idx)) *is_color = TRUE; 01314 sprintf(register_name, "attrib%u", reg->idx); 01315 break; 01316 } 01317 01318 /* pixel shaders >= 3.0 */ 01319 if (reg_maps->shader_version.major >= 3) 01320 { 01321 DWORD idx = shader->u.ps.input_reg_map[reg->idx]; 01322 unsigned int in_count = vec4_varyings(reg_maps->shader_version.major, gl_info); 01323 01324 if (reg->rel_addr) 01325 { 01326 struct glsl_src_param rel_param; 01327 01328 shader_glsl_add_src_param(ins, reg->rel_addr, WINED3DSP_WRITEMASK_0, &rel_param); 01329 01330 /* Removing a + 0 would be an obvious optimization, but macos doesn't see the NOP 01331 * operation there */ 01332 if (idx) 01333 { 01334 if (shader->u.ps.declared_in_count > in_count) 01335 { 01336 sprintf(register_name, 01337 "((%s + %u) > %d ? (%s + %u) > %d ? gl_SecondaryColor : gl_Color : IN[%s + %u])", 01338 rel_param.param_str, idx, in_count - 1, rel_param.param_str, idx, in_count, 01339 rel_param.param_str, idx); 01340 } 01341 else 01342 { 01343 sprintf(register_name, "IN[%s + %u]", rel_param.param_str, idx); 01344 } 01345 } 01346 else 01347 { 01348 if (shader->u.ps.declared_in_count > in_count) 01349 { 01350 sprintf(register_name, "((%s) > %d ? (%s) > %d ? gl_SecondaryColor : gl_Color : IN[%s])", 01351 rel_param.param_str, in_count - 1, rel_param.param_str, in_count, 01352 rel_param.param_str); 01353 } 01354 else 01355 { 01356 sprintf(register_name, "IN[%s]", rel_param.param_str); 01357 } 01358 } 01359 } 01360 else 01361 { 01362 if (idx == in_count) sprintf(register_name, "gl_Color"); 01363 else if (idx == in_count + 1) sprintf(register_name, "gl_SecondaryColor"); 01364 else sprintf(register_name, "IN[%u]", idx); 01365 } 01366 } 01367 else 01368 { 01369 if (!reg->idx) strcpy(register_name, "gl_Color"); 01370 else strcpy(register_name, "gl_SecondaryColor"); 01371 break; 01372 } 01373 break; 01374 01375 case WINED3DSPR_CONST: 01376 { 01377 const char prefix = pshader ? 'P' : 'V'; 01378 01379 /* Relative addressing */ 01380 if (reg->rel_addr) 01381 { 01382 struct glsl_src_param rel_param; 01383 shader_glsl_add_src_param(ins, reg->rel_addr, WINED3DSP_WRITEMASK_0, &rel_param); 01384 if (reg->idx) sprintf(register_name, "%cC[%s + %u]", prefix, rel_param.param_str, reg->idx); 01385 else sprintf(register_name, "%cC[%s]", prefix, rel_param.param_str); 01386 } 01387 else 01388 { 01389 if (shader_constant_is_local(shader, reg->idx)) 01390 sprintf(register_name, "%cLC%u", prefix, reg->idx); 01391 else 01392 sprintf(register_name, "%cC[%u]", prefix, reg->idx); 01393 } 01394 } 01395 break; 01396 01397 case WINED3DSPR_CONSTINT: 01398 if (pshader) sprintf(register_name, "PI[%u]", reg->idx); 01399 else sprintf(register_name, "VI[%u]", reg->idx); 01400 break; 01401 01402 case WINED3DSPR_CONSTBOOL: 01403 if (pshader) sprintf(register_name, "PB[%u]", reg->idx); 01404 else sprintf(register_name, "VB[%u]", reg->idx); 01405 break; 01406 01407 case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */ 01408 if (pshader) sprintf(register_name, "T%u", reg->idx); 01409 else sprintf(register_name, "A%u", reg->idx); 01410 break; 01411 01412 case WINED3DSPR_LOOP: 01413 sprintf(register_name, "aL%u", ins->ctx->loop_state->current_reg - 1); 01414 break; 01415 01416 case WINED3DSPR_SAMPLER: 01417 if (pshader) sprintf(register_name, "Psampler%u", reg->idx); 01418 else sprintf(register_name, "Vsampler%u", reg->idx); 01419 break; 01420 01421 case WINED3DSPR_COLOROUT: 01422 if (reg->idx >= gl_info->limits.buffers) 01423 WARN("Write to render target %u, only %d supported.\n", reg->idx, gl_info->limits.buffers); 01424 01425 sprintf(register_name, "gl_FragData[%u]", reg->idx); 01426 break; 01427 01428 case WINED3DSPR_RASTOUT: 01429 sprintf(register_name, "%s", hwrastout_reg_names[reg->idx]); 01430 break; 01431 01432 case WINED3DSPR_DEPTHOUT: 01433 sprintf(register_name, "gl_FragDepth"); 01434 break; 01435 01436 case WINED3DSPR_ATTROUT: 01437 if (!reg->idx) sprintf(register_name, "OUT[8]"); 01438 else sprintf(register_name, "OUT[9]"); 01439 break; 01440 01441 case WINED3DSPR_TEXCRDOUT: 01442 /* Vertex shaders >= 3.0: WINED3DSPR_OUTPUT */ 01443 sprintf(register_name, "OUT[%u]", reg->idx); 01444 break; 01445 01446 case WINED3DSPR_MISCTYPE: 01447 if (!reg->idx) 01448 { 01449 /* vPos */ 01450 sprintf(register_name, "vpos"); 01451 } 01452 else if (reg->idx == 1) 01453 { 01454 /* Note that gl_FrontFacing is a bool, while vFace is 01455 * a float for which the sign determines front/back */ 01456 sprintf(register_name, "(gl_FrontFacing ? 1.0 : -1.0)"); 01457 } 01458 else 01459 { 01460 FIXME("Unhandled misctype register %d\n", reg->idx); 01461 sprintf(register_name, "unrecognized_register"); 01462 } 01463 break; 01464 01465 case WINED3DSPR_IMMCONST: 01466 switch (reg->immconst_type) 01467 { 01468 case WINED3D_IMMCONST_SCALAR: 01469 sprintf(register_name, "%.8e", *(const float *)reg->immconst_data); 01470 break; 01471 01472 case WINED3D_IMMCONST_VEC4: 01473 sprintf(register_name, "vec4(%.8e, %.8e, %.8e, %.8e)", 01474 *(const float *)®->immconst_data[0], *(const float *)®->immconst_data[1], 01475 *(const float *)®->immconst_data[2], *(const float *)®->immconst_data[3]); 01476 break; 01477 01478 default: 01479 FIXME("Unhandled immconst type %#x\n", reg->immconst_type); 01480 sprintf(register_name, "<unhandled_immconst_type %#x>", reg->immconst_type); 01481 } 01482 break; 01483 01484 default: 01485 FIXME("Unhandled register name Type(%d)\n", reg->type); 01486 sprintf(register_name, "unrecognized_register"); 01487 break; 01488 } 01489 } 01490 01491 static void shader_glsl_write_mask_to_str(DWORD write_mask, char *str) 01492 { 01493 *str++ = '.'; 01494 if (write_mask & WINED3DSP_WRITEMASK_0) *str++ = 'x'; 01495 if (write_mask & WINED3DSP_WRITEMASK_1) *str++ = 'y'; 01496 if (write_mask & WINED3DSP_WRITEMASK_2) *str++ = 'z'; 01497 if (write_mask & WINED3DSP_WRITEMASK_3) *str++ = 'w'; 01498 *str = '\0'; 01499 } 01500 01501 /* Get the GLSL write mask for the destination register */ 01502 static DWORD shader_glsl_get_write_mask(const struct wined3d_shader_dst_param *param, char *write_mask) 01503 { 01504 DWORD mask = param->write_mask; 01505 01506 if (shader_is_scalar(¶m->reg)) 01507 { 01508 mask = WINED3DSP_WRITEMASK_0; 01509 *write_mask = '\0'; 01510 } 01511 else 01512 { 01513 shader_glsl_write_mask_to_str(mask, write_mask); 01514 } 01515 01516 return mask; 01517 } 01518 01519 static unsigned int shader_glsl_get_write_mask_size(DWORD write_mask) { 01520 unsigned int size = 0; 01521 01522 if (write_mask & WINED3DSP_WRITEMASK_0) ++size; 01523 if (write_mask & WINED3DSP_WRITEMASK_1) ++size; 01524 if (write_mask & WINED3DSP_WRITEMASK_2) ++size; 01525 if (write_mask & WINED3DSP_WRITEMASK_3) ++size; 01526 01527 return size; 01528 } 01529 01530 static void shader_glsl_swizzle_to_str(const DWORD swizzle, BOOL fixup, DWORD mask, char *str) 01531 { 01532 /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra", 01533 * but addressed as "rgba". To fix this we need to swap the register's x 01534 * and z components. */ 01535 const char *swizzle_chars = fixup ? "zyxw" : "xyzw"; 01536 01537 *str++ = '.'; 01538 /* swizzle bits fields: wwzzyyxx */ 01539 if (mask & WINED3DSP_WRITEMASK_0) *str++ = swizzle_chars[swizzle & 0x03]; 01540 if (mask & WINED3DSP_WRITEMASK_1) *str++ = swizzle_chars[(swizzle >> 2) & 0x03]; 01541 if (mask & WINED3DSP_WRITEMASK_2) *str++ = swizzle_chars[(swizzle >> 4) & 0x03]; 01542 if (mask & WINED3DSP_WRITEMASK_3) *str++ = swizzle_chars[(swizzle >> 6) & 0x03]; 01543 *str = '\0'; 01544 } 01545 01546 static void shader_glsl_get_swizzle(const struct wined3d_shader_src_param *param, 01547 BOOL fixup, DWORD mask, char *swizzle_str) 01548 { 01549 if (shader_is_scalar(¶m->reg)) 01550 *swizzle_str = '\0'; 01551 else 01552 shader_glsl_swizzle_to_str(param->swizzle, fixup, mask, swizzle_str); 01553 } 01554 01555 /* From a given parameter token, generate the corresponding GLSL string. 01556 * Also, return the actual register name and swizzle in case the 01557 * caller needs this information as well. */ 01558 static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins, 01559 const struct wined3d_shader_src_param *wined3d_src, DWORD mask, struct glsl_src_param *glsl_src) 01560 { 01561 BOOL is_color = FALSE; 01562 char swizzle_str[6]; 01563 01564 glsl_src->reg_name[0] = '\0'; 01565 glsl_src->param_str[0] = '\0'; 01566 swizzle_str[0] = '\0'; 01567 01568 shader_glsl_get_register_name(&wined3d_src->reg, glsl_src->reg_name, &is_color, ins); 01569 shader_glsl_get_swizzle(wined3d_src, is_color, mask, swizzle_str); 01570 shader_glsl_gen_modifier(wined3d_src->modifiers, glsl_src->reg_name, swizzle_str, glsl_src->param_str); 01571 } 01572 01573 /* From a given parameter token, generate the corresponding GLSL string. 01574 * Also, return the actual register name and swizzle in case the 01575 * caller needs this information as well. */ 01576 static DWORD shader_glsl_add_dst_param(const struct wined3d_shader_instruction *ins, 01577 const struct wined3d_shader_dst_param *wined3d_dst, struct glsl_dst_param *glsl_dst) 01578 { 01579 BOOL is_color = FALSE; 01580 01581 glsl_dst->mask_str[0] = '\0'; 01582 glsl_dst->reg_name[0] = '\0'; 01583 01584 shader_glsl_get_register_name(&wined3d_dst->reg, glsl_dst->reg_name, &is_color, ins); 01585 return shader_glsl_get_write_mask(wined3d_dst, glsl_dst->mask_str); 01586 } 01587 01588 /* Append the destination part of the instruction to the buffer, return the effective write mask */ 01589 static DWORD shader_glsl_append_dst_ext(struct wined3d_shader_buffer *buffer, 01590 const struct wined3d_shader_instruction *ins, const struct wined3d_shader_dst_param *dst) 01591 { 01592 struct glsl_dst_param glsl_dst; 01593 DWORD mask; 01594 01595 mask = shader_glsl_add_dst_param(ins, dst, &glsl_dst); 01596 if (mask) shader_addline(buffer, "%s%s = %s(", glsl_dst.reg_name, glsl_dst.mask_str, shift_glsl_tab[dst->shift]); 01597 01598 return mask; 01599 } 01600 01601 /* Append the destination part of the instruction to the buffer, return the effective write mask */ 01602 static DWORD shader_glsl_append_dst(struct wined3d_shader_buffer *buffer, const struct wined3d_shader_instruction *ins) 01603 { 01604 return shader_glsl_append_dst_ext(buffer, ins, &ins->dst[0]); 01605 } 01606 01608 static void shader_glsl_add_instruction_modifiers(const struct wined3d_shader_instruction *ins) 01609 { 01610 struct glsl_dst_param dst_param; 01611 DWORD modifiers; 01612 01613 if (!ins->dst_count) return; 01614 01615 modifiers = ins->dst[0].modifiers; 01616 if (!modifiers) return; 01617 01618 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param); 01619 01620 if (modifiers & WINED3DSPDM_SATURATE) 01621 { 01622 /* _SAT means to clamp the value of the register to between 0 and 1 */ 01623 shader_addline(ins->ctx->buffer, "%s%s = clamp(%s%s, 0.0, 1.0);\n", dst_param.reg_name, 01624 dst_param.mask_str, dst_param.reg_name, dst_param.mask_str); 01625 } 01626 01627 if (modifiers & WINED3DSPDM_MSAMPCENTROID) 01628 { 01629 FIXME("_centroid modifier not handled\n"); 01630 } 01631 01632 if (modifiers & WINED3DSPDM_PARTIALPRECISION) 01633 { 01634 /* MSDN says this modifier can be safely ignored, so that's what we'll do. */ 01635 } 01636 } 01637 01638 static const char *shader_glsl_get_rel_op(enum wined3d_shader_rel_op op) 01639 { 01640 switch (op) 01641 { 01642 case WINED3D_SHADER_REL_OP_GT: return ">"; 01643 case WINED3D_SHADER_REL_OP_EQ: return "=="; 01644 case WINED3D_SHADER_REL_OP_GE: return ">="; 01645 case WINED3D_SHADER_REL_OP_LT: return "<"; 01646 case WINED3D_SHADER_REL_OP_NE: return "!="; 01647 case WINED3D_SHADER_REL_OP_LE: return "<="; 01648 default: 01649 FIXME("Unrecognized operator %#x.\n", op); 01650 return "(\?\?)"; 01651 } 01652 } 01653 01654 static void shader_glsl_get_sample_function(const struct wined3d_shader_context *ctx, 01655 DWORD sampler_idx, DWORD flags, struct glsl_sample_function *sample_function) 01656 { 01657 enum wined3d_sampler_texture_type sampler_type = ctx->reg_maps->sampler_type[sampler_idx]; 01658 const struct wined3d_gl_info *gl_info = ctx->gl_info; 01659 BOOL shadow = shader_is_pshader_version(ctx->reg_maps->shader_version.type) 01660 && (((const struct shader_glsl_ctx_priv *)ctx->backend_data)->cur_ps_args->shadow & (1 << sampler_idx)); 01661 BOOL projected = flags & WINED3D_GLSL_SAMPLE_PROJECTED; 01662 BOOL texrect = flags & WINED3D_GLSL_SAMPLE_RECT; 01663 BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD; 01664 BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD; 01665 01666 /* Note that there's no such thing as a projected cube texture. */ 01667 switch(sampler_type) { 01668 case WINED3DSTT_1D: 01669 if (shadow) 01670 { 01671 if (lod) 01672 { 01673 sample_function->name = projected ? "shadow1DProjLod" : "shadow1DLod"; 01674 } 01675 else if (grad) 01676 { 01677 if (gl_info->supported[EXT_GPU_SHADER4]) 01678 sample_function->name = projected ? "shadow1DProjGrad" : "shadow1DGrad"; 01679 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD]) 01680 sample_function->name = projected ? "shadow1DProjGradARB" : "shadow1DGradARB"; 01681 else 01682 { 01683 FIXME("Unsupported 1D shadow grad function.\n"); 01684 sample_function->name = "unsupported1DGrad"; 01685 } 01686 } 01687 else 01688 { 01689 sample_function->name = projected ? "shadow1DProj" : "shadow1D"; 01690 } 01691 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1; 01692 } 01693 else 01694 { 01695 if (lod) 01696 { 01697 sample_function->name = projected ? "texture1DProjLod" : "texture1DLod"; 01698 } 01699 else if (grad) 01700 { 01701 if (gl_info->supported[EXT_GPU_SHADER4]) 01702 sample_function->name = projected ? "texture1DProjGrad" : "texture1DGrad"; 01703 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD]) 01704 sample_function->name = projected ? "texture1DProjGradARB" : "texture1DGradARB"; 01705 else 01706 { 01707 FIXME("Unsupported 1D grad function.\n"); 01708 sample_function->name = "unsupported1DGrad"; 01709 } 01710 } 01711 else 01712 { 01713 sample_function->name = projected ? "texture1DProj" : "texture1D"; 01714 } 01715 sample_function->coord_mask = WINED3DSP_WRITEMASK_0; 01716 } 01717 break; 01718 01719 case WINED3DSTT_2D: 01720 if (shadow) 01721 { 01722 if (texrect) 01723 { 01724 if (lod) 01725 { 01726 sample_function->name = projected ? "shadow2DRectProjLod" : "shadow2DRectLod"; 01727 } 01728 else if (grad) 01729 { 01730 if (gl_info->supported[EXT_GPU_SHADER4]) 01731 sample_function->name = projected ? "shadow2DRectProjGrad" : "shadow2DRectGrad"; 01732 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD]) 01733 sample_function->name = projected ? "shadow2DRectProjGradARB" : "shadow2DRectGradARB"; 01734 else 01735 { 01736 FIXME("Unsupported RECT shadow grad function.\n"); 01737 sample_function->name = "unsupported2DRectGrad"; 01738 } 01739 } 01740 else 01741 { 01742 sample_function->name = projected ? "shadow2DRectProj" : "shadow2DRect"; 01743 } 01744 } 01745 else 01746 { 01747 if (lod) 01748 { 01749 sample_function->name = projected ? "shadow2DProjLod" : "shadow2DLod"; 01750 } 01751 else if (grad) 01752 { 01753 if (gl_info->supported[EXT_GPU_SHADER4]) 01754 sample_function->name = projected ? "shadow2DProjGrad" : "shadow2DGrad"; 01755 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD]) 01756 sample_function->name = projected ? "shadow2DProjGradARB" : "shadow2DGradARB"; 01757 else 01758 { 01759 FIXME("Unsupported 2D shadow grad function.\n"); 01760 sample_function->name = "unsupported2DGrad"; 01761 } 01762 } 01763 else 01764 { 01765 sample_function->name = projected ? "shadow2DProj" : "shadow2D"; 01766 } 01767 } 01768 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 01769 } 01770 else 01771 { 01772 if (texrect) 01773 { 01774 if (lod) 01775 { 01776 sample_function->name = projected ? "texture2DRectProjLod" : "texture2DRectLod"; 01777 } 01778 else if (grad) 01779 { 01780 if (gl_info->supported[EXT_GPU_SHADER4]) 01781 sample_function->name = projected ? "texture2DRectProjGrad" : "texture2DRectGrad"; 01782 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD]) 01783 sample_function->name = projected ? "texture2DRectProjGradARB" : "texture2DRectGradARB"; 01784 else 01785 { 01786 FIXME("Unsupported RECT grad function.\n"); 01787 sample_function->name = "unsupported2DRectGrad"; 01788 } 01789 } 01790 else 01791 { 01792 sample_function->name = projected ? "texture2DRectProj" : "texture2DRect"; 01793 } 01794 } 01795 else 01796 { 01797 if (lod) 01798 { 01799 sample_function->name = projected ? "texture2DProjLod" : "texture2DLod"; 01800 } 01801 else if (grad) 01802 { 01803 if (gl_info->supported[EXT_GPU_SHADER4]) 01804 sample_function->name = projected ? "texture2DProjGrad" : "texture2DGrad"; 01805 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD]) 01806 sample_function->name = projected ? "texture2DProjGradARB" : "texture2DGradARB"; 01807 else 01808 { 01809 FIXME("Unsupported 2D grad function.\n"); 01810 sample_function->name = "unsupported2DGrad"; 01811 } 01812 } 01813 else 01814 { 01815 sample_function->name = projected ? "texture2DProj" : "texture2D"; 01816 } 01817 } 01818 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1; 01819 } 01820 break; 01821 01822 case WINED3DSTT_CUBE: 01823 if (shadow) 01824 { 01825 FIXME("Unsupported Cube shadow function.\n"); 01826 sample_function->name = "unsupportedCubeShadow"; 01827 sample_function->coord_mask = 0; 01828 } 01829 else 01830 { 01831 if (lod) 01832 { 01833 sample_function->name = "textureCubeLod"; 01834 } 01835 else if (grad) 01836 { 01837 if (gl_info->supported[EXT_GPU_SHADER4]) 01838 sample_function->name = "textureCubeGrad"; 01839 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD]) 01840 sample_function->name = "textureCubeGradARB"; 01841 else 01842 { 01843 FIXME("Unsupported Cube grad function.\n"); 01844 sample_function->name = "unsupportedCubeGrad"; 01845 } 01846 } 01847 else 01848 { 01849 sample_function->name = "textureCube"; 01850 } 01851 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 01852 } 01853 break; 01854 01855 case WINED3DSTT_VOLUME: 01856 if (shadow) 01857 { 01858 FIXME("Unsupported 3D shadow function.\n"); 01859 sample_function->name = "unsupported3DShadow"; 01860 sample_function->coord_mask = 0; 01861 } 01862 else 01863 { 01864 if (lod) 01865 { 01866 sample_function->name = projected ? "texture3DProjLod" : "texture3DLod"; 01867 } 01868 else if (grad) 01869 { 01870 if (gl_info->supported[EXT_GPU_SHADER4]) 01871 sample_function->name = projected ? "texture3DProjGrad" : "texture3DGrad"; 01872 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD]) 01873 sample_function->name = projected ? "texture3DProjGradARB" : "texture3DGradARB"; 01874 else 01875 { 01876 FIXME("Unsupported 3D grad function.\n"); 01877 sample_function->name = "unsupported3DGrad"; 01878 } 01879 } 01880 else 01881 { 01882 sample_function->name = projected ? "texture3DProj" : "texture3D"; 01883 } 01884 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 01885 } 01886 break; 01887 01888 default: 01889 sample_function->name = ""; 01890 sample_function->coord_mask = 0; 01891 FIXME("Unrecognized sampler type: %#x;\n", sampler_type); 01892 break; 01893 } 01894 } 01895 01896 static void shader_glsl_append_fixup_arg(char *arguments, const char *reg_name, 01897 BOOL sign_fixup, enum fixup_channel_source channel_source) 01898 { 01899 switch(channel_source) 01900 { 01901 case CHANNEL_SOURCE_ZERO: 01902 strcat(arguments, "0.0"); 01903 break; 01904 01905 case CHANNEL_SOURCE_ONE: 01906 strcat(arguments, "1.0"); 01907 break; 01908 01909 case CHANNEL_SOURCE_X: 01910 strcat(arguments, reg_name); 01911 strcat(arguments, ".x"); 01912 break; 01913 01914 case CHANNEL_SOURCE_Y: 01915 strcat(arguments, reg_name); 01916 strcat(arguments, ".y"); 01917 break; 01918 01919 case CHANNEL_SOURCE_Z: 01920 strcat(arguments, reg_name); 01921 strcat(arguments, ".z"); 01922 break; 01923 01924 case CHANNEL_SOURCE_W: 01925 strcat(arguments, reg_name); 01926 strcat(arguments, ".w"); 01927 break; 01928 01929 default: 01930 FIXME("Unhandled channel source %#x\n", channel_source); 01931 strcat(arguments, "undefined"); 01932 break; 01933 } 01934 01935 if (sign_fixup) strcat(arguments, " * 2.0 - 1.0"); 01936 } 01937 01938 static void shader_glsl_color_correction(const struct wined3d_shader_instruction *ins, struct color_fixup_desc fixup) 01939 { 01940 struct wined3d_shader_dst_param dst; 01941 unsigned int mask_size, remaining; 01942 struct glsl_dst_param dst_param; 01943 char arguments[256]; 01944 DWORD mask; 01945 01946 mask = 0; 01947 if (fixup.x_sign_fixup || fixup.x_source != CHANNEL_SOURCE_X) mask |= WINED3DSP_WRITEMASK_0; 01948 if (fixup.y_sign_fixup || fixup.y_source != CHANNEL_SOURCE_Y) mask |= WINED3DSP_WRITEMASK_1; 01949 if (fixup.z_sign_fixup || fixup.z_source != CHANNEL_SOURCE_Z) mask |= WINED3DSP_WRITEMASK_2; 01950 if (fixup.w_sign_fixup || fixup.w_source != CHANNEL_SOURCE_W) mask |= WINED3DSP_WRITEMASK_3; 01951 mask &= ins->dst[0].write_mask; 01952 01953 if (!mask) return; /* Nothing to do */ 01954 01955 if (is_complex_fixup(fixup)) 01956 { 01957 enum complex_fixup complex_fixup = get_complex_fixup(fixup); 01958 FIXME("Complex fixup (%#x) not supported\n",complex_fixup); 01959 return; 01960 } 01961 01962 mask_size = shader_glsl_get_write_mask_size(mask); 01963 01964 dst = ins->dst[0]; 01965 dst.write_mask = mask; 01966 shader_glsl_add_dst_param(ins, &dst, &dst_param); 01967 01968 arguments[0] = '\0'; 01969 remaining = mask_size; 01970 if (mask & WINED3DSP_WRITEMASK_0) 01971 { 01972 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.x_sign_fixup, fixup.x_source); 01973 if (--remaining) strcat(arguments, ", "); 01974 } 01975 if (mask & WINED3DSP_WRITEMASK_1) 01976 { 01977 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.y_sign_fixup, fixup.y_source); 01978 if (--remaining) strcat(arguments, ", "); 01979 } 01980 if (mask & WINED3DSP_WRITEMASK_2) 01981 { 01982 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.z_sign_fixup, fixup.z_source); 01983 if (--remaining) strcat(arguments, ", "); 01984 } 01985 if (mask & WINED3DSP_WRITEMASK_3) 01986 { 01987 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.w_sign_fixup, fixup.w_source); 01988 if (--remaining) strcat(arguments, ", "); 01989 } 01990 01991 if (mask_size > 1) 01992 { 01993 shader_addline(ins->ctx->buffer, "%s%s = vec%u(%s);\n", 01994 dst_param.reg_name, dst_param.mask_str, mask_size, arguments); 01995 } 01996 else 01997 { 01998 shader_addline(ins->ctx->buffer, "%s%s = %s;\n", dst_param.reg_name, dst_param.mask_str, arguments); 01999 } 02000 } 02001 02002 static void PRINTF_ATTR(8, 9) shader_glsl_gen_sample_code(const struct wined3d_shader_instruction *ins, 02003 DWORD sampler, const struct glsl_sample_function *sample_function, DWORD swizzle, 02004 const char *dx, const char *dy, const char *bias, const char *coord_reg_fmt, ...) 02005 { 02006 const char *sampler_base; 02007 char dst_swizzle[6]; 02008 struct color_fixup_desc fixup; 02009 BOOL np2_fixup = FALSE; 02010 va_list args; 02011 02012 shader_glsl_swizzle_to_str(swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle); 02013 02014 if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) 02015 { 02016 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; 02017 fixup = priv->cur_ps_args->color_fixup[sampler]; 02018 sampler_base = "Psampler"; 02019 02020 if(priv->cur_ps_args->np2_fixup & (1 << sampler)) { 02021 if(bias) { 02022 FIXME("Biased sampling from NP2 textures is unsupported\n"); 02023 } else { 02024 np2_fixup = TRUE; 02025 } 02026 } 02027 } else { 02028 sampler_base = "Vsampler"; 02029 fixup = COLOR_FIXUP_IDENTITY; /* FIXME: Vshader color fixup */ 02030 } 02031 02032 shader_glsl_append_dst(ins->ctx->buffer, ins); 02033 02034 shader_addline(ins->ctx->buffer, "%s(%s%u, ", sample_function->name, sampler_base, sampler); 02035 02036 va_start(args, coord_reg_fmt); 02037 shader_vaddline(ins->ctx->buffer, coord_reg_fmt, args); 02038 va_end(args); 02039 02040 if(bias) { 02041 shader_addline(ins->ctx->buffer, ", %s)%s);\n", bias, dst_swizzle); 02042 } else { 02043 if (np2_fixup) { 02044 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; 02045 const unsigned char idx = priv->cur_np2fixup_info->idx[sampler]; 02046 02047 shader_addline(ins->ctx->buffer, " * PsamplerNP2Fixup[%u].%s)%s);\n", idx >> 1, 02048 (idx % 2) ? "zw" : "xy", dst_swizzle); 02049 } else if(dx && dy) { 02050 shader_addline(ins->ctx->buffer, ", %s, %s)%s);\n", dx, dy, dst_swizzle); 02051 } else { 02052 shader_addline(ins->ctx->buffer, ")%s);\n", dst_swizzle); 02053 } 02054 } 02055 02056 if(!is_identity_fixup(fixup)) { 02057 shader_glsl_color_correction(ins, fixup); 02058 } 02059 } 02060 02061 /***************************************************************************** 02062 * Begin processing individual instruction opcodes 02063 ****************************************************************************/ 02064 02065 /* Generate GLSL arithmetic functions (dst = src1 + src2) */ 02066 static void shader_glsl_arith(const struct wined3d_shader_instruction *ins) 02067 { 02068 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02069 struct glsl_src_param src0_param; 02070 struct glsl_src_param src1_param; 02071 DWORD write_mask; 02072 char op; 02073 02074 /* Determine the GLSL operator to use based on the opcode */ 02075 switch (ins->handler_idx) 02076 { 02077 case WINED3DSIH_MUL: op = '*'; break; 02078 case WINED3DSIH_ADD: op = '+'; break; 02079 case WINED3DSIH_SUB: op = '-'; break; 02080 default: 02081 op = ' '; 02082 FIXME("Opcode %#x not yet handled in GLSL\n", ins->handler_idx); 02083 break; 02084 } 02085 02086 write_mask = shader_glsl_append_dst(buffer, ins); 02087 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param); 02088 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param); 02089 shader_addline(buffer, "%s %c %s);\n", src0_param.param_str, op, src1_param.param_str); 02090 } 02091 02092 /* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */ 02093 static void shader_glsl_mov(const struct wined3d_shader_instruction *ins) 02094 { 02095 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; 02096 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02097 struct glsl_src_param src0_param; 02098 DWORD write_mask; 02099 02100 write_mask = shader_glsl_append_dst(buffer, ins); 02101 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param); 02102 02103 /* In vs_1_1 WINED3DSIO_MOV can write to the address register. In later 02104 * shader versions WINED3DSIO_MOVA is used for this. */ 02105 if (ins->ctx->reg_maps->shader_version.major == 1 02106 && !shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type) 02107 && ins->dst[0].reg.type == WINED3DSPR_ADDR) 02108 { 02109 /* This is a simple floor() */ 02110 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask); 02111 if (mask_size > 1) { 02112 shader_addline(buffer, "ivec%d(floor(%s)));\n", mask_size, src0_param.param_str); 02113 } else { 02114 shader_addline(buffer, "int(floor(%s)));\n", src0_param.param_str); 02115 } 02116 } 02117 else if(ins->handler_idx == WINED3DSIH_MOVA) 02118 { 02119 /* We need to *round* to the nearest int here. */ 02120 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask); 02121 02122 if (gl_info->supported[EXT_GPU_SHADER4]) 02123 { 02124 if (mask_size > 1) 02125 shader_addline(buffer, "ivec%d(round(%s)));\n", mask_size, src0_param.param_str); 02126 else 02127 shader_addline(buffer, "int(round(%s)));\n", src0_param.param_str); 02128 } 02129 else 02130 { 02131 if (mask_size > 1) 02132 shader_addline(buffer, "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s)));\n", 02133 mask_size, src0_param.param_str, mask_size, src0_param.param_str); 02134 else 02135 shader_addline(buffer, "int(floor(abs(%s) + 0.5) * sign(%s)));\n", 02136 src0_param.param_str, src0_param.param_str); 02137 } 02138 } 02139 else 02140 { 02141 shader_addline(buffer, "%s);\n", src0_param.param_str); 02142 } 02143 } 02144 02145 /* Process the dot product operators DP3 and DP4 in GLSL (dst = dot(src0, src1)) */ 02146 static void shader_glsl_dot(const struct wined3d_shader_instruction *ins) 02147 { 02148 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02149 struct glsl_src_param src0_param; 02150 struct glsl_src_param src1_param; 02151 DWORD dst_write_mask, src_write_mask; 02152 unsigned int dst_size = 0; 02153 02154 dst_write_mask = shader_glsl_append_dst(buffer, ins); 02155 dst_size = shader_glsl_get_write_mask_size(dst_write_mask); 02156 02157 /* dp3 works on vec3, dp4 on vec4 */ 02158 if (ins->handler_idx == WINED3DSIH_DP4) 02159 { 02160 src_write_mask = WINED3DSP_WRITEMASK_ALL; 02161 } else { 02162 src_write_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 02163 } 02164 02165 shader_glsl_add_src_param(ins, &ins->src[0], src_write_mask, &src0_param); 02166 shader_glsl_add_src_param(ins, &ins->src[1], src_write_mask, &src1_param); 02167 02168 if (dst_size > 1) { 02169 shader_addline(buffer, "vec%d(dot(%s, %s)));\n", dst_size, src0_param.param_str, src1_param.param_str); 02170 } else { 02171 shader_addline(buffer, "dot(%s, %s));\n", src0_param.param_str, src1_param.param_str); 02172 } 02173 } 02174 02175 /* Note that this instruction has some restrictions. The destination write mask 02176 * can't contain the w component, and the source swizzles have to be .xyzw */ 02177 static void shader_glsl_cross(const struct wined3d_shader_instruction *ins) 02178 { 02179 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 02180 struct glsl_src_param src0_param; 02181 struct glsl_src_param src1_param; 02182 char dst_mask[6]; 02183 02184 shader_glsl_get_write_mask(&ins->dst[0], dst_mask); 02185 shader_glsl_append_dst(ins->ctx->buffer, ins); 02186 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param); 02187 shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param); 02188 shader_addline(ins->ctx->buffer, "cross(%s, %s)%s);\n", src0_param.param_str, src1_param.param_str, dst_mask); 02189 } 02190 02191 /* Process the WINED3DSIO_POW instruction in GLSL (dst = |src0|^src1) 02192 * Src0 and src1 are scalars. Note that D3D uses the absolute of src0, while 02193 * GLSL uses the value as-is. */ 02194 static void shader_glsl_pow(const struct wined3d_shader_instruction *ins) 02195 { 02196 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02197 struct glsl_src_param src0_param; 02198 struct glsl_src_param src1_param; 02199 DWORD dst_write_mask; 02200 unsigned int dst_size; 02201 02202 dst_write_mask = shader_glsl_append_dst(buffer, ins); 02203 dst_size = shader_glsl_get_write_mask_size(dst_write_mask); 02204 02205 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param); 02206 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param); 02207 02208 if (dst_size > 1) 02209 { 02210 shader_addline(buffer, "vec%u(%s == 0.0 ? 1.0 : pow(abs(%s), %s)));\n", 02211 dst_size, src1_param.param_str, src0_param.param_str, src1_param.param_str); 02212 } 02213 else 02214 { 02215 shader_addline(buffer, "%s == 0.0 ? 1.0 : pow(abs(%s), %s));\n", 02216 src1_param.param_str, src0_param.param_str, src1_param.param_str); 02217 } 02218 } 02219 02220 /* Process the WINED3DSIO_LOG instruction in GLSL (dst = log2(|src0|)) 02221 * Src0 is a scalar. Note that D3D uses the absolute of src0, while 02222 * GLSL uses the value as-is. */ 02223 static void shader_glsl_log(const struct wined3d_shader_instruction *ins) 02224 { 02225 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02226 struct glsl_src_param src0_param; 02227 DWORD dst_write_mask; 02228 unsigned int dst_size; 02229 02230 dst_write_mask = shader_glsl_append_dst(buffer, ins); 02231 dst_size = shader_glsl_get_write_mask_size(dst_write_mask); 02232 02233 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param); 02234 02235 if (dst_size > 1) 02236 { 02237 shader_addline(buffer, "vec%u(log2(abs(%s))));\n", 02238 dst_size, src0_param.param_str); 02239 } 02240 else 02241 { 02242 shader_addline(buffer, "log2(abs(%s)));\n", 02243 src0_param.param_str); 02244 } 02245 } 02246 02247 /* Map the opcode 1-to-1 to the GL code (arg->dst = instruction(src0, src1, ...) */ 02248 static void shader_glsl_map2gl(const struct wined3d_shader_instruction *ins) 02249 { 02250 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02251 struct glsl_src_param src_param; 02252 const char *instruction; 02253 DWORD write_mask; 02254 unsigned i; 02255 02256 /* Determine the GLSL function to use based on the opcode */ 02257 /* TODO: Possibly make this a table for faster lookups */ 02258 switch (ins->handler_idx) 02259 { 02260 case WINED3DSIH_MIN: instruction = "min"; break; 02261 case WINED3DSIH_MAX: instruction = "max"; break; 02262 case WINED3DSIH_ABS: instruction = "abs"; break; 02263 case WINED3DSIH_FRC: instruction = "fract"; break; 02264 case WINED3DSIH_EXP: instruction = "exp2"; break; 02265 case WINED3DSIH_DSX: instruction = "dFdx"; break; 02266 case WINED3DSIH_DSY: instruction = "ycorrection.y * dFdy"; break; 02267 default: instruction = ""; 02268 FIXME("Opcode %#x not yet handled in GLSL\n", ins->handler_idx); 02269 break; 02270 } 02271 02272 write_mask = shader_glsl_append_dst(buffer, ins); 02273 02274 shader_addline(buffer, "%s(", instruction); 02275 02276 if (ins->src_count) 02277 { 02278 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param); 02279 shader_addline(buffer, "%s", src_param.param_str); 02280 for (i = 1; i < ins->src_count; ++i) 02281 { 02282 shader_glsl_add_src_param(ins, &ins->src[i], write_mask, &src_param); 02283 shader_addline(buffer, ", %s", src_param.param_str); 02284 } 02285 } 02286 02287 shader_addline(buffer, "));\n"); 02288 } 02289 02290 static void shader_glsl_nrm(const struct wined3d_shader_instruction *ins) 02291 { 02292 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02293 struct glsl_src_param src_param; 02294 unsigned int mask_size; 02295 DWORD write_mask; 02296 char dst_mask[6]; 02297 02298 write_mask = shader_glsl_get_write_mask(ins->dst, dst_mask); 02299 mask_size = shader_glsl_get_write_mask_size(write_mask); 02300 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param); 02301 02302 shader_addline(buffer, "tmp0.x = dot(%s, %s);\n", 02303 src_param.param_str, src_param.param_str); 02304 shader_glsl_append_dst(buffer, ins); 02305 02306 if (mask_size > 1) 02307 { 02308 shader_addline(buffer, "tmp0.x == 0.0 ? vec%u(0.0) : (%s * inversesqrt(tmp0.x)));\n", 02309 mask_size, src_param.param_str); 02310 } 02311 else 02312 { 02313 shader_addline(buffer, "tmp0.x == 0.0 ? 0.0 : (%s * inversesqrt(tmp0.x)));\n", 02314 src_param.param_str); 02315 } 02316 } 02317 02327 static void shader_glsl_expp(const struct wined3d_shader_instruction *ins) 02328 { 02329 struct glsl_src_param src_param; 02330 02331 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src_param); 02332 02333 if (ins->ctx->reg_maps->shader_version.major < 2) 02334 { 02335 char dst_mask[6]; 02336 02337 shader_addline(ins->ctx->buffer, "tmp0.x = exp2(floor(%s));\n", src_param.param_str); 02338 shader_addline(ins->ctx->buffer, "tmp0.y = %s - floor(%s);\n", src_param.param_str, src_param.param_str); 02339 shader_addline(ins->ctx->buffer, "tmp0.z = exp2(%s);\n", src_param.param_str); 02340 shader_addline(ins->ctx->buffer, "tmp0.w = 1.0;\n"); 02341 02342 shader_glsl_append_dst(ins->ctx->buffer, ins); 02343 shader_glsl_get_write_mask(&ins->dst[0], dst_mask); 02344 shader_addline(ins->ctx->buffer, "tmp0%s);\n", dst_mask); 02345 } else { 02346 DWORD write_mask; 02347 unsigned int mask_size; 02348 02349 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins); 02350 mask_size = shader_glsl_get_write_mask_size(write_mask); 02351 02352 if (mask_size > 1) { 02353 shader_addline(ins->ctx->buffer, "vec%d(exp2(%s)));\n", mask_size, src_param.param_str); 02354 } else { 02355 shader_addline(ins->ctx->buffer, "exp2(%s));\n", src_param.param_str); 02356 } 02357 } 02358 } 02359 02361 static void shader_glsl_rcp(const struct wined3d_shader_instruction *ins) 02362 { 02363 struct glsl_src_param src_param; 02364 DWORD write_mask; 02365 unsigned int mask_size; 02366 02367 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins); 02368 mask_size = shader_glsl_get_write_mask_size(write_mask); 02369 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src_param); 02370 02371 if (mask_size > 1) 02372 { 02373 shader_addline(ins->ctx->buffer, "vec%u(1.0 / %s));\n", 02374 mask_size, src_param.param_str); 02375 } 02376 else 02377 { 02378 shader_addline(ins->ctx->buffer, "1.0 / %s);\n", 02379 src_param.param_str); 02380 } 02381 } 02382 02383 static void shader_glsl_rsq(const struct wined3d_shader_instruction *ins) 02384 { 02385 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02386 struct glsl_src_param src_param; 02387 DWORD write_mask; 02388 unsigned int mask_size; 02389 02390 write_mask = shader_glsl_append_dst(buffer, ins); 02391 mask_size = shader_glsl_get_write_mask_size(write_mask); 02392 02393 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src_param); 02394 02395 if (mask_size > 1) 02396 { 02397 shader_addline(buffer, "vec%u(inversesqrt(abs(%s))));\n", 02398 mask_size, src_param.param_str); 02399 } 02400 else 02401 { 02402 shader_addline(buffer, "inversesqrt(abs(%s)));\n", 02403 src_param.param_str); 02404 } 02405 } 02406 02408 static void shader_glsl_compare(const struct wined3d_shader_instruction *ins) 02409 { 02410 struct glsl_src_param src0_param; 02411 struct glsl_src_param src1_param; 02412 DWORD write_mask; 02413 unsigned int mask_size; 02414 02415 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins); 02416 mask_size = shader_glsl_get_write_mask_size(write_mask); 02417 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param); 02418 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param); 02419 02420 if (mask_size > 1) { 02421 const char *compare; 02422 02423 switch(ins->handler_idx) 02424 { 02425 case WINED3DSIH_SLT: compare = "lessThan"; break; 02426 case WINED3DSIH_SGE: compare = "greaterThanEqual"; break; 02427 default: compare = ""; 02428 FIXME("Can't handle opcode %#x\n", ins->handler_idx); 02429 } 02430 02431 shader_addline(ins->ctx->buffer, "vec%d(%s(%s, %s)));\n", mask_size, compare, 02432 src0_param.param_str, src1_param.param_str); 02433 } else { 02434 switch(ins->handler_idx) 02435 { 02436 case WINED3DSIH_SLT: 02437 /* Step(src0, src1) is not suitable here because if src0 == src1 SLT is supposed, 02438 * to return 0.0 but step returns 1.0 because step is not < x 02439 * An alternative is a bvec compare padded with an unused second component. 02440 * step(src1 * -1.0, src0 * -1.0) is not an option because it suffers from the same 02441 * issue. Playing with not() is not possible either because not() does not accept 02442 * a scalar. 02443 */ 02444 shader_addline(ins->ctx->buffer, "(%s < %s) ? 1.0 : 0.0);\n", 02445 src0_param.param_str, src1_param.param_str); 02446 break; 02447 case WINED3DSIH_SGE: 02448 /* Here we can use the step() function and safe a conditional */ 02449 shader_addline(ins->ctx->buffer, "step(%s, %s));\n", src1_param.param_str, src0_param.param_str); 02450 break; 02451 default: 02452 FIXME("Can't handle opcode %#x\n", ins->handler_idx); 02453 } 02454 02455 } 02456 } 02457 02459 static void shader_glsl_cmp(const struct wined3d_shader_instruction *ins) 02460 { 02461 struct glsl_src_param src0_param; 02462 struct glsl_src_param src1_param; 02463 struct glsl_src_param src2_param; 02464 DWORD write_mask, cmp_channel = 0; 02465 unsigned int i, j; 02466 char mask_char[6]; 02467 BOOL temp_destination = FALSE; 02468 02469 if (shader_is_scalar(&ins->src[0].reg)) 02470 { 02471 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins); 02472 02473 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param); 02474 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param); 02475 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param); 02476 02477 shader_addline(ins->ctx->buffer, "%s >= 0.0 ? %s : %s);\n", 02478 src0_param.param_str, src1_param.param_str, src2_param.param_str); 02479 } else { 02480 DWORD dst_mask = ins->dst[0].write_mask; 02481 struct wined3d_shader_dst_param dst = ins->dst[0]; 02482 02483 /* Cycle through all source0 channels */ 02484 for (i=0; i<4; i++) { 02485 write_mask = 0; 02486 /* Find the destination channels which use the current source0 channel */ 02487 for (j=0; j<4; j++) { 02488 if (((ins->src[0].swizzle >> (2 * j)) & 0x3) == i) 02489 { 02490 write_mask |= WINED3DSP_WRITEMASK_0 << j; 02491 cmp_channel = WINED3DSP_WRITEMASK_0 << j; 02492 } 02493 } 02494 dst.write_mask = dst_mask & write_mask; 02495 02496 /* Splitting the cmp instruction up in multiple lines imposes a problem: 02497 * The first lines may overwrite source parameters of the following lines. 02498 * Deal with that by using a temporary destination register if needed 02499 */ 02500 if ((ins->src[0].reg.idx == ins->dst[0].reg.idx 02501 && ins->src[0].reg.type == ins->dst[0].reg.type) 02502 || (ins->src[1].reg.idx == ins->dst[0].reg.idx 02503 && ins->src[1].reg.type == ins->dst[0].reg.type) 02504 || (ins->src[2].reg.idx == ins->dst[0].reg.idx 02505 && ins->src[2].reg.type == ins->dst[0].reg.type)) 02506 { 02507 write_mask = shader_glsl_get_write_mask(&dst, mask_char); 02508 if (!write_mask) continue; 02509 shader_addline(ins->ctx->buffer, "tmp0%s = (", mask_char); 02510 temp_destination = TRUE; 02511 } else { 02512 write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst); 02513 if (!write_mask) continue; 02514 } 02515 02516 shader_glsl_add_src_param(ins, &ins->src[0], cmp_channel, &src0_param); 02517 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param); 02518 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param); 02519 02520 shader_addline(ins->ctx->buffer, "%s >= 0.0 ? %s : %s);\n", 02521 src0_param.param_str, src1_param.param_str, src2_param.param_str); 02522 } 02523 02524 if(temp_destination) { 02525 shader_glsl_get_write_mask(&ins->dst[0], mask_char); 02526 shader_glsl_append_dst(ins->ctx->buffer, ins); 02527 shader_addline(ins->ctx->buffer, "tmp0%s);\n", mask_char); 02528 } 02529 } 02530 02531 } 02532 02534 /* For ps 1.1-1.3, only a single component of src0 is used. For ps 1.4 02535 * the compare is done per component of src0. */ 02536 static void shader_glsl_cnd(const struct wined3d_shader_instruction *ins) 02537 { 02538 struct wined3d_shader_dst_param dst; 02539 struct glsl_src_param src0_param; 02540 struct glsl_src_param src1_param; 02541 struct glsl_src_param src2_param; 02542 DWORD write_mask, cmp_channel = 0; 02543 unsigned int i, j; 02544 DWORD dst_mask; 02545 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 02546 ins->ctx->reg_maps->shader_version.minor); 02547 02548 if (shader_version < WINED3D_SHADER_VERSION(1, 4)) 02549 { 02550 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins); 02551 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param); 02552 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param); 02553 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param); 02554 02555 /* Fun: The D3DSI_COISSUE flag changes the semantic of the cnd instruction for < 1.4 shaders */ 02556 if (ins->coissue) 02557 { 02558 shader_addline(ins->ctx->buffer, "%s /* COISSUE! */);\n", src1_param.param_str); 02559 } else { 02560 shader_addline(ins->ctx->buffer, "%s > 0.5 ? %s : %s);\n", 02561 src0_param.param_str, src1_param.param_str, src2_param.param_str); 02562 } 02563 return; 02564 } 02565 /* Cycle through all source0 channels */ 02566 dst_mask = ins->dst[0].write_mask; 02567 dst = ins->dst[0]; 02568 for (i=0; i<4; i++) { 02569 write_mask = 0; 02570 /* Find the destination channels which use the current source0 channel */ 02571 for (j=0; j<4; j++) { 02572 if (((ins->src[0].swizzle >> (2 * j)) & 0x3) == i) 02573 { 02574 write_mask |= WINED3DSP_WRITEMASK_0 << j; 02575 cmp_channel = WINED3DSP_WRITEMASK_0 << j; 02576 } 02577 } 02578 02579 dst.write_mask = dst_mask & write_mask; 02580 write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst); 02581 if (!write_mask) continue; 02582 02583 shader_glsl_add_src_param(ins, &ins->src[0], cmp_channel, &src0_param); 02584 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param); 02585 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param); 02586 02587 shader_addline(ins->ctx->buffer, "%s > 0.5 ? %s : %s);\n", 02588 src0_param.param_str, src1_param.param_str, src2_param.param_str); 02589 } 02590 } 02591 02593 static void shader_glsl_mad(const struct wined3d_shader_instruction *ins) 02594 { 02595 struct glsl_src_param src0_param; 02596 struct glsl_src_param src1_param; 02597 struct glsl_src_param src2_param; 02598 DWORD write_mask; 02599 02600 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins); 02601 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param); 02602 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param); 02603 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param); 02604 shader_addline(ins->ctx->buffer, "(%s * %s) + %s);\n", 02605 src0_param.param_str, src1_param.param_str, src2_param.param_str); 02606 } 02607 02608 /* Handles transforming all WINED3DSIO_M?x? opcodes for 02609 Vertex shaders to GLSL codes */ 02610 static void shader_glsl_mnxn(const struct wined3d_shader_instruction *ins) 02611 { 02612 int i; 02613 int nComponents = 0; 02614 struct wined3d_shader_dst_param tmp_dst = {{0}}; 02615 struct wined3d_shader_src_param tmp_src[2] = {{{0}}}; 02616 struct wined3d_shader_instruction tmp_ins; 02617 02618 memset(&tmp_ins, 0, sizeof(tmp_ins)); 02619 02620 /* Set constants for the temporary argument */ 02621 tmp_ins.ctx = ins->ctx; 02622 tmp_ins.dst_count = 1; 02623 tmp_ins.dst = &tmp_dst; 02624 tmp_ins.src_count = 2; 02625 tmp_ins.src = tmp_src; 02626 02627 switch(ins->handler_idx) 02628 { 02629 case WINED3DSIH_M4x4: 02630 nComponents = 4; 02631 tmp_ins.handler_idx = WINED3DSIH_DP4; 02632 break; 02633 case WINED3DSIH_M4x3: 02634 nComponents = 3; 02635 tmp_ins.handler_idx = WINED3DSIH_DP4; 02636 break; 02637 case WINED3DSIH_M3x4: 02638 nComponents = 4; 02639 tmp_ins.handler_idx = WINED3DSIH_DP3; 02640 break; 02641 case WINED3DSIH_M3x3: 02642 nComponents = 3; 02643 tmp_ins.handler_idx = WINED3DSIH_DP3; 02644 break; 02645 case WINED3DSIH_M3x2: 02646 nComponents = 2; 02647 tmp_ins.handler_idx = WINED3DSIH_DP3; 02648 break; 02649 default: 02650 break; 02651 } 02652 02653 tmp_dst = ins->dst[0]; 02654 tmp_src[0] = ins->src[0]; 02655 tmp_src[1] = ins->src[1]; 02656 for (i = 0; i < nComponents; ++i) 02657 { 02658 tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i; 02659 shader_glsl_dot(&tmp_ins); 02660 ++tmp_src[1].reg.idx; 02661 } 02662 } 02663 02670 static void shader_glsl_lrp(const struct wined3d_shader_instruction *ins) 02671 { 02672 struct glsl_src_param src0_param; 02673 struct glsl_src_param src1_param; 02674 struct glsl_src_param src2_param; 02675 DWORD write_mask; 02676 02677 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins); 02678 02679 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param); 02680 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param); 02681 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param); 02682 02683 shader_addline(ins->ctx->buffer, "mix(%s, %s, %s));\n", 02684 src2_param.param_str, src1_param.param_str, src0_param.param_str); 02685 } 02686 02693 static void shader_glsl_lit(const struct wined3d_shader_instruction *ins) 02694 { 02695 struct glsl_src_param src0_param; 02696 struct glsl_src_param src1_param; 02697 struct glsl_src_param src3_param; 02698 char dst_mask[6]; 02699 02700 shader_glsl_append_dst(ins->ctx->buffer, ins); 02701 shader_glsl_get_write_mask(&ins->dst[0], dst_mask); 02702 02703 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param); 02704 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_1, &src1_param); 02705 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src3_param); 02706 02707 /* The sdk specifies the instruction like this 02708 * dst.x = 1.0; 02709 * if(src.x > 0.0) dst.y = src.x 02710 * else dst.y = 0.0. 02711 * if(src.x > 0.0 && src.y > 0.0) dst.z = pow(src.y, power); 02712 * else dst.z = 0.0; 02713 * dst.w = 1.0; 02714 * (where power = src.w clamped between -128 and 128) 02715 * 02716 * Obviously that has quite a few conditionals in it which we don't like. So the first step is this: 02717 * dst.x = 1.0 ... No further explanation needed 02718 * dst.y = max(src.y, 0.0); ... If x < 0.0, use 0.0, otherwise x. Same as the conditional 02719 * dst.z = x > 0.0 ? pow(max(y, 0.0), p) : 0; ... 0 ^ power is 0, and otherwise we use y anyway 02720 * dst.w = 1.0. ... Nothing fancy. 02721 * 02722 * So we still have one conditional in there. So do this: 02723 * dst.z = pow(max(0.0, src.y) * step(0.0, src.x), power); 02724 * 02725 * step(0.0, x) will return 1 if src.x > 0.0, and 0 otherwise. So if y is 0 we get pow(0.0 * 1.0, power), 02726 * which sets dst.z to 0. If y > 0, but x = 0.0, we get pow(y * 0.0, power), which results in 0 too. 02727 * if both x and y are > 0, we get pow(y * 1.0, power), as it is supposed to. 02728 * 02729 * Unfortunately pow(0.0 ^ 0.0) returns NaN on most GPUs, but lit with src.y = 0 and src.w = 0 returns 02730 * a non-NaN value in dst.z. What we return doesn't matter, as long as it is not NaN. Return 0, which is 02731 * what all Windows HW drivers and GL_ARB_vertex_program's LIT do. 02732 */ 02733 shader_addline(ins->ctx->buffer, 02734 "vec4(1.0, max(%s, 0.0), %s == 0.0 ? 0.0 : " 02735 "pow(max(0.0, %s) * step(0.0, %s), clamp(%s, -128.0, 128.0)), 1.0)%s);\n", 02736 src0_param.param_str, src3_param.param_str, src1_param.param_str, 02737 src0_param.param_str, src3_param.param_str, dst_mask); 02738 } 02739 02746 static void shader_glsl_dst(const struct wined3d_shader_instruction *ins) 02747 { 02748 struct glsl_src_param src0y_param; 02749 struct glsl_src_param src0z_param; 02750 struct glsl_src_param src1y_param; 02751 struct glsl_src_param src1w_param; 02752 char dst_mask[6]; 02753 02754 shader_glsl_append_dst(ins->ctx->buffer, ins); 02755 shader_glsl_get_write_mask(&ins->dst[0], dst_mask); 02756 02757 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_1, &src0y_param); 02758 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &src0z_param); 02759 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_1, &src1y_param); 02760 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_3, &src1w_param); 02761 02762 shader_addline(ins->ctx->buffer, "vec4(1.0, %s * %s, %s, %s))%s;\n", 02763 src0y_param.param_str, src1y_param.param_str, src0z_param.param_str, src1w_param.param_str, dst_mask); 02764 } 02765 02775 static void shader_glsl_sincos(const struct wined3d_shader_instruction *ins) 02776 { 02777 struct glsl_src_param src0_param; 02778 DWORD write_mask; 02779 02780 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins); 02781 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param); 02782 02783 switch (write_mask) { 02784 case WINED3DSP_WRITEMASK_0: 02785 shader_addline(ins->ctx->buffer, "cos(%s));\n", src0_param.param_str); 02786 break; 02787 02788 case WINED3DSP_WRITEMASK_1: 02789 shader_addline(ins->ctx->buffer, "sin(%s));\n", src0_param.param_str); 02790 break; 02791 02792 case (WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1): 02793 shader_addline(ins->ctx->buffer, "vec2(cos(%s), sin(%s)));\n", src0_param.param_str, src0_param.param_str); 02794 break; 02795 02796 default: 02797 ERR("Write mask should be .x, .y or .xy\n"); 02798 break; 02799 } 02800 } 02801 02802 /* sgn in vs_2_0 has 2 extra parameters(registers for temporary storage) which we don't use 02803 * here. But those extra parameters require a dedicated function for sgn, since map2gl would 02804 * generate invalid code 02805 */ 02806 static void shader_glsl_sgn(const struct wined3d_shader_instruction *ins) 02807 { 02808 struct glsl_src_param src0_param; 02809 DWORD write_mask; 02810 02811 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins); 02812 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param); 02813 02814 shader_addline(ins->ctx->buffer, "sign(%s));\n", src0_param.param_str); 02815 } 02816 02822 /* FIXME: I don't think nested loops will work correctly this way. */ 02823 static void shader_glsl_loop(const struct wined3d_shader_instruction *ins) 02824 { 02825 struct wined3d_shader_loop_state *loop_state = ins->ctx->loop_state; 02826 const struct wined3d_shader *shader = ins->ctx->shader; 02827 const struct wined3d_shader_lconst *constant; 02828 struct glsl_src_param src1_param; 02829 const DWORD *control_values = NULL; 02830 02831 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_ALL, &src1_param); 02832 02833 /* Try to hardcode the loop control parameters if possible. Direct3D 9 class hardware doesn't support real 02834 * varying indexing, but Microsoft designed this feature for Shader model 2.x+. If the loop control is 02835 * known at compile time, the GLSL compiler can unroll the loop, and replace indirect addressing with direct 02836 * addressing. 02837 */ 02838 if (ins->src[1].reg.type == WINED3DSPR_CONSTINT) 02839 { 02840 LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry) 02841 { 02842 if (constant->idx == ins->src[1].reg.idx) 02843 { 02844 control_values = constant->value; 02845 break; 02846 } 02847 } 02848 } 02849 02850 if (control_values) 02851 { 02852 struct wined3d_shader_loop_control loop_control; 02853 loop_control.count = control_values[0]; 02854 loop_control.start = control_values[1]; 02855 loop_control.step = (int)control_values[2]; 02856 02857 if (loop_control.step > 0) 02858 { 02859 shader_addline(ins->ctx->buffer, "for (aL%u = %u; aL%u < (%u * %d + %u); aL%u += %d) {\n", 02860 loop_state->current_depth, loop_control.start, 02861 loop_state->current_depth, loop_control.count, loop_control.step, loop_control.start, 02862 loop_state->current_depth, loop_control.step); 02863 } 02864 else if (loop_control.step < 0) 02865 { 02866 shader_addline(ins->ctx->buffer, "for (aL%u = %u; aL%u > (%u * %d + %u); aL%u += %d) {\n", 02867 loop_state->current_depth, loop_control.start, 02868 loop_state->current_depth, loop_control.count, loop_control.step, loop_control.start, 02869 loop_state->current_depth, loop_control.step); 02870 } 02871 else 02872 { 02873 shader_addline(ins->ctx->buffer, "for (aL%u = %u, tmpInt%u = 0; tmpInt%u < %u; tmpInt%u++) {\n", 02874 loop_state->current_depth, loop_control.start, loop_state->current_depth, 02875 loop_state->current_depth, loop_control.count, 02876 loop_state->current_depth); 02877 } 02878 } else { 02879 shader_addline(ins->ctx->buffer, 02880 "for (tmpInt%u = 0, aL%u = %s.y; tmpInt%u < %s.x; tmpInt%u++, aL%u += %s.z) {\n", 02881 loop_state->current_depth, loop_state->current_reg, 02882 src1_param.reg_name, loop_state->current_depth, src1_param.reg_name, 02883 loop_state->current_depth, loop_state->current_reg, src1_param.reg_name); 02884 } 02885 02886 ++loop_state->current_depth; 02887 ++loop_state->current_reg; 02888 } 02889 02890 static void shader_glsl_end(const struct wined3d_shader_instruction *ins) 02891 { 02892 struct wined3d_shader_loop_state *loop_state = ins->ctx->loop_state; 02893 02894 shader_addline(ins->ctx->buffer, "}\n"); 02895 02896 if (ins->handler_idx == WINED3DSIH_ENDLOOP) 02897 { 02898 --loop_state->current_depth; 02899 --loop_state->current_reg; 02900 } 02901 02902 if (ins->handler_idx == WINED3DSIH_ENDREP) 02903 { 02904 --loop_state->current_depth; 02905 } 02906 } 02907 02908 static void shader_glsl_rep(const struct wined3d_shader_instruction *ins) 02909 { 02910 const struct wined3d_shader *shader = ins->ctx->shader; 02911 struct wined3d_shader_loop_state *loop_state = ins->ctx->loop_state; 02912 const struct wined3d_shader_lconst *constant; 02913 struct glsl_src_param src0_param; 02914 const DWORD *control_values = NULL; 02915 02916 /* Try to hardcode local values to help the GLSL compiler to unroll and optimize the loop */ 02917 if (ins->src[0].reg.type == WINED3DSPR_CONSTINT) 02918 { 02919 LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry) 02920 { 02921 if (constant->idx == ins->src[0].reg.idx) 02922 { 02923 control_values = constant->value; 02924 break; 02925 } 02926 } 02927 } 02928 02929 if (control_values) 02930 { 02931 shader_addline(ins->ctx->buffer, "for (tmpInt%d = 0; tmpInt%d < %d; tmpInt%d++) {\n", 02932 loop_state->current_depth, loop_state->current_depth, 02933 control_values[0], loop_state->current_depth); 02934 } 02935 else 02936 { 02937 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param); 02938 shader_addline(ins->ctx->buffer, "for (tmpInt%d = 0; tmpInt%d < %s; tmpInt%d++) {\n", 02939 loop_state->current_depth, loop_state->current_depth, 02940 src0_param.param_str, loop_state->current_depth); 02941 } 02942 02943 ++loop_state->current_depth; 02944 } 02945 02946 static void shader_glsl_if(const struct wined3d_shader_instruction *ins) 02947 { 02948 struct glsl_src_param src0_param; 02949 02950 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param); 02951 shader_addline(ins->ctx->buffer, "if (%s) {\n", src0_param.param_str); 02952 } 02953 02954 static void shader_glsl_ifc(const struct wined3d_shader_instruction *ins) 02955 { 02956 struct glsl_src_param src0_param; 02957 struct glsl_src_param src1_param; 02958 02959 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param); 02960 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param); 02961 02962 shader_addline(ins->ctx->buffer, "if (%s %s %s) {\n", 02963 src0_param.param_str, shader_glsl_get_rel_op(ins->flags), src1_param.param_str); 02964 } 02965 02966 static void shader_glsl_else(const struct wined3d_shader_instruction *ins) 02967 { 02968 shader_addline(ins->ctx->buffer, "} else {\n"); 02969 } 02970 02971 static void shader_glsl_break(const struct wined3d_shader_instruction *ins) 02972 { 02973 shader_addline(ins->ctx->buffer, "break;\n"); 02974 } 02975 02976 /* FIXME: According to MSDN the compare is done per component. */ 02977 static void shader_glsl_breakc(const struct wined3d_shader_instruction *ins) 02978 { 02979 struct glsl_src_param src0_param; 02980 struct glsl_src_param src1_param; 02981 02982 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param); 02983 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param); 02984 02985 shader_addline(ins->ctx->buffer, "if (%s %s %s) break;\n", 02986 src0_param.param_str, shader_glsl_get_rel_op(ins->flags), src1_param.param_str); 02987 } 02988 02989 static void shader_glsl_label(const struct wined3d_shader_instruction *ins) 02990 { 02991 shader_addline(ins->ctx->buffer, "}\n"); 02992 shader_addline(ins->ctx->buffer, "void subroutine%u () {\n", ins->src[0].reg.idx); 02993 } 02994 02995 static void shader_glsl_call(const struct wined3d_shader_instruction *ins) 02996 { 02997 shader_addline(ins->ctx->buffer, "subroutine%u();\n", ins->src[0].reg.idx); 02998 } 02999 03000 static void shader_glsl_callnz(const struct wined3d_shader_instruction *ins) 03001 { 03002 struct glsl_src_param src1_param; 03003 03004 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param); 03005 shader_addline(ins->ctx->buffer, "if (%s) subroutine%u();\n", src1_param.param_str, ins->src[0].reg.idx); 03006 } 03007 03008 static void shader_glsl_ret(const struct wined3d_shader_instruction *ins) 03009 { 03010 /* No-op. The closing } is written when a new function is started, and at the end of the shader. This 03011 * function only suppresses the unhandled instruction warning 03012 */ 03013 } 03014 03015 /********************************************* 03016 * Pixel Shader Specific Code begins here 03017 ********************************************/ 03018 static void shader_glsl_tex(const struct wined3d_shader_instruction *ins) 03019 { 03020 const struct wined3d_shader *shader = ins->ctx->shader; 03021 struct wined3d_device *device = shader->device; 03022 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 03023 ins->ctx->reg_maps->shader_version.minor); 03024 struct glsl_sample_function sample_function; 03025 const struct wined3d_texture *texture; 03026 DWORD sample_flags = 0; 03027 DWORD sampler_idx; 03028 DWORD mask = 0, swizzle; 03029 03030 /* 1.0-1.4: Use destination register as sampler source. 03031 * 2.0+: Use provided sampler source. */ 03032 if (shader_version < WINED3D_SHADER_VERSION(2,0)) sampler_idx = ins->dst[0].reg.idx; 03033 else sampler_idx = ins->src[1].reg.idx; 03034 texture = device->stateBlock->state.textures[sampler_idx]; 03035 03036 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 03037 { 03038 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; 03039 DWORD flags = (priv->cur_ps_args->tex_transform >> sampler_idx * WINED3D_PSARGS_TEXTRANSFORM_SHIFT) 03040 & WINED3D_PSARGS_TEXTRANSFORM_MASK; 03041 enum wined3d_sampler_texture_type sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx]; 03042 03043 /* Projected cube textures don't make a lot of sense, the resulting coordinates stay the same. */ 03044 if (flags & WINED3D_PSARGS_PROJECTED && sampler_type != WINED3DSTT_CUBE) 03045 { 03046 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED; 03047 switch (flags & ~WINED3D_PSARGS_PROJECTED) 03048 { 03049 case WINED3D_TTFF_COUNT1: 03050 FIXME("WINED3D_TTFF_PROJECTED with WINED3D_TTFF_COUNT1?\n"); 03051 break; 03052 case WINED3D_TTFF_COUNT2: 03053 mask = WINED3DSP_WRITEMASK_1; 03054 break; 03055 case WINED3D_TTFF_COUNT3: 03056 mask = WINED3DSP_WRITEMASK_2; 03057 break; 03058 case WINED3D_TTFF_COUNT4: 03059 case WINED3D_TTFF_DISABLE: 03060 mask = WINED3DSP_WRITEMASK_3; 03061 break; 03062 } 03063 } 03064 } 03065 else if (shader_version < WINED3D_SHADER_VERSION(2,0)) 03066 { 03067 enum wined3d_shader_src_modifier src_mod = ins->src[0].modifiers; 03068 03069 if (src_mod == WINED3DSPSM_DZ) { 03070 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED; 03071 mask = WINED3DSP_WRITEMASK_2; 03072 } else if (src_mod == WINED3DSPSM_DW) { 03073 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED; 03074 mask = WINED3DSP_WRITEMASK_3; 03075 } 03076 } else { 03077 if (ins->flags & WINED3DSI_TEXLD_PROJECT) 03078 { 03079 /* ps 2.0 texldp instruction always divides by the fourth component. */ 03080 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED; 03081 mask = WINED3DSP_WRITEMASK_3; 03082 } 03083 } 03084 03085 if (texture && texture->target == GL_TEXTURE_RECTANGLE_ARB) 03086 sample_flags |= WINED3D_GLSL_SAMPLE_RECT; 03087 03088 shader_glsl_get_sample_function(ins->ctx, sampler_idx, sample_flags, &sample_function); 03089 mask |= sample_function.coord_mask; 03090 03091 if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE; 03092 else swizzle = ins->src[1].swizzle; 03093 03094 /* 1.0-1.3: Use destination register as coordinate source. 03095 1.4+: Use provided coordinate source register. */ 03096 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 03097 { 03098 char coord_mask[6]; 03099 shader_glsl_write_mask_to_str(mask, coord_mask); 03100 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL, 03101 "T%u%s", sampler_idx, coord_mask); 03102 } 03103 else 03104 { 03105 struct glsl_src_param coord_param; 03106 shader_glsl_add_src_param(ins, &ins->src[0], mask, &coord_param); 03107 if (ins->flags & WINED3DSI_TEXLD_BIAS) 03108 { 03109 struct glsl_src_param bias; 03110 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &bias); 03111 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, bias.param_str, 03112 "%s", coord_param.param_str); 03113 } else { 03114 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL, 03115 "%s", coord_param.param_str); 03116 } 03117 } 03118 } 03119 03120 static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins) 03121 { 03122 const struct wined3d_shader *shader = ins->ctx->shader; 03123 struct wined3d_device *device = shader->device; 03124 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; 03125 struct glsl_src_param coord_param, dx_param, dy_param; 03126 DWORD sample_flags = WINED3D_GLSL_SAMPLE_GRAD; 03127 struct glsl_sample_function sample_function; 03128 DWORD sampler_idx; 03129 DWORD swizzle = ins->src[1].swizzle; 03130 const struct wined3d_texture *texture; 03131 03132 if (!gl_info->supported[ARB_SHADER_TEXTURE_LOD] && !gl_info->supported[EXT_GPU_SHADER4]) 03133 { 03134 FIXME("texldd used, but not supported by hardware. Falling back to regular tex\n"); 03135 shader_glsl_tex(ins); 03136 return; 03137 } 03138 03139 sampler_idx = ins->src[1].reg.idx; 03140 texture = device->stateBlock->state.textures[sampler_idx]; 03141 if (texture && texture->target == GL_TEXTURE_RECTANGLE_ARB) 03142 sample_flags |= WINED3D_GLSL_SAMPLE_RECT; 03143 03144 shader_glsl_get_sample_function(ins->ctx, sampler_idx, sample_flags, &sample_function); 03145 shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); 03146 shader_glsl_add_src_param(ins, &ins->src[2], sample_function.coord_mask, &dx_param); 03147 shader_glsl_add_src_param(ins, &ins->src[3], sample_function.coord_mask, &dy_param); 03148 03149 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, dx_param.param_str, dy_param.param_str, NULL, 03150 "%s", coord_param.param_str); 03151 } 03152 03153 static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins) 03154 { 03155 const struct wined3d_shader *shader = ins->ctx->shader; 03156 struct wined3d_device *device = shader->device; 03157 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info; 03158 struct glsl_src_param coord_param, lod_param; 03159 DWORD sample_flags = WINED3D_GLSL_SAMPLE_LOD; 03160 struct glsl_sample_function sample_function; 03161 DWORD sampler_idx; 03162 DWORD swizzle = ins->src[1].swizzle; 03163 const struct wined3d_texture *texture; 03164 03165 sampler_idx = ins->src[1].reg.idx; 03166 texture = device->stateBlock->state.textures[sampler_idx]; 03167 if (texture && texture->target == GL_TEXTURE_RECTANGLE_ARB) 03168 sample_flags |= WINED3D_GLSL_SAMPLE_RECT; 03169 03170 shader_glsl_get_sample_function(ins->ctx, sampler_idx, sample_flags, &sample_function); 03171 shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param); 03172 03173 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param); 03174 03175 if (!gl_info->supported[ARB_SHADER_TEXTURE_LOD] && !gl_info->supported[EXT_GPU_SHADER4] 03176 && shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) 03177 { 03178 /* Plain GLSL only supports Lod sampling functions in vertex shaders. 03179 * However, the NVIDIA drivers allow them in fragment shaders as well, 03180 * even without the appropriate extension. */ 03181 WARN("Using %s in fragment shader.\n", sample_function.name); 03182 } 03183 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, lod_param.param_str, 03184 "%s", coord_param.param_str); 03185 } 03186 03187 static void shader_glsl_texcoord(const struct wined3d_shader_instruction *ins) 03188 { 03189 /* FIXME: Make this work for more than just 2D textures */ 03190 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 03191 DWORD write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins); 03192 03193 if (!(ins->ctx->reg_maps->shader_version.major == 1 && ins->ctx->reg_maps->shader_version.minor == 4)) 03194 { 03195 char dst_mask[6]; 03196 03197 shader_glsl_get_write_mask(&ins->dst[0], dst_mask); 03198 shader_addline(buffer, "clamp(gl_TexCoord[%u], 0.0, 1.0)%s);\n", 03199 ins->dst[0].reg.idx, dst_mask); 03200 } 03201 else 03202 { 03203 enum wined3d_shader_src_modifier src_mod = ins->src[0].modifiers; 03204 DWORD reg = ins->src[0].reg.idx; 03205 char dst_swizzle[6]; 03206 03207 shader_glsl_get_swizzle(&ins->src[0], FALSE, write_mask, dst_swizzle); 03208 03209 if (src_mod == WINED3DSPSM_DZ) 03210 { 03211 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask); 03212 struct glsl_src_param div_param; 03213 03214 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &div_param); 03215 03216 if (mask_size > 1) { 03217 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str); 03218 } else { 03219 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str); 03220 } 03221 } 03222 else if (src_mod == WINED3DSPSM_DW) 03223 { 03224 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask); 03225 struct glsl_src_param div_param; 03226 03227 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &div_param); 03228 03229 if (mask_size > 1) { 03230 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str); 03231 } else { 03232 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str); 03233 } 03234 } else { 03235 shader_addline(buffer, "gl_TexCoord[%u]%s);\n", reg, dst_swizzle); 03236 } 03237 } 03238 } 03239 03243 static void shader_glsl_texdp3tex(const struct wined3d_shader_instruction *ins) 03244 { 03245 DWORD sampler_idx = ins->dst[0].reg.idx; 03246 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 03247 struct glsl_sample_function sample_function; 03248 struct glsl_src_param src0_param; 03249 UINT mask_size; 03250 03251 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param); 03252 03253 /* Do I have to take care about the projected bit? I don't think so, since the dp3 returns only one 03254 * scalar, and projected sampling would require 4. 03255 * 03256 * It is a dependent read - not valid with conditional NP2 textures 03257 */ 03258 shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function); 03259 mask_size = shader_glsl_get_write_mask_size(sample_function.coord_mask); 03260 03261 switch(mask_size) 03262 { 03263 case 1: 03264 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, 03265 "dot(gl_TexCoord[%u].xyz, %s)", sampler_idx, src0_param.param_str); 03266 break; 03267 03268 case 2: 03269 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, 03270 "vec2(dot(gl_TexCoord[%u].xyz, %s), 0.0)", sampler_idx, src0_param.param_str); 03271 break; 03272 03273 case 3: 03274 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, 03275 "vec3(dot(gl_TexCoord[%u].xyz, %s), 0.0, 0.0)", sampler_idx, src0_param.param_str); 03276 break; 03277 03278 default: 03279 FIXME("Unexpected mask size %u\n", mask_size); 03280 break; 03281 } 03282 } 03283 03286 static void shader_glsl_texdp3(const struct wined3d_shader_instruction *ins) 03287 { 03288 DWORD dstreg = ins->dst[0].reg.idx; 03289 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 03290 struct glsl_src_param src0_param; 03291 DWORD dst_mask; 03292 unsigned int mask_size; 03293 03294 dst_mask = shader_glsl_append_dst(ins->ctx->buffer, ins); 03295 mask_size = shader_glsl_get_write_mask_size(dst_mask); 03296 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param); 03297 03298 if (mask_size > 1) { 03299 shader_addline(ins->ctx->buffer, "vec%d(dot(T%u.xyz, %s)));\n", mask_size, dstreg, src0_param.param_str); 03300 } else { 03301 shader_addline(ins->ctx->buffer, "dot(T%u.xyz, %s));\n", dstreg, src0_param.param_str); 03302 } 03303 } 03304 03307 static void shader_glsl_texdepth(const struct wined3d_shader_instruction *ins) 03308 { 03309 struct glsl_dst_param dst_param; 03310 03311 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param); 03312 03313 /* Tests show that texdepth never returns anything below 0.0, and that r5.y is clamped to 1.0. 03314 * Negative input is accepted, -0.25 / -0.5 returns 0.5. GL should clamp gl_FragDepth to [0;1], but 03315 * this doesn't always work, so clamp the results manually. Whether or not the x value is clamped at 1 03316 * too is irrelevant, since if x = 0, any y value < 1.0 (and > 1.0 is not allowed) results in a result 03317 * >= 1.0 or < 0.0 03318 */ 03319 shader_addline(ins->ctx->buffer, "gl_FragDepth = clamp((%s.x / min(%s.y, 1.0)), 0.0, 1.0);\n", 03320 dst_param.reg_name, dst_param.reg_name); 03321 } 03322 03328 static void shader_glsl_texm3x2depth(const struct wined3d_shader_instruction *ins) 03329 { 03330 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 03331 DWORD dstreg = ins->dst[0].reg.idx; 03332 struct glsl_src_param src0_param; 03333 03334 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param); 03335 03336 shader_addline(ins->ctx->buffer, "tmp0.y = dot(T%u.xyz, %s);\n", dstreg, src0_param.param_str); 03337 shader_addline(ins->ctx->buffer, "gl_FragDepth = (tmp0.y == 0.0) ? 1.0 : clamp(tmp0.x / tmp0.y, 0.0, 1.0);\n"); 03338 } 03339 03342 static void shader_glsl_texm3x2pad(const struct wined3d_shader_instruction *ins) 03343 { 03344 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 03345 DWORD reg = ins->dst[0].reg.idx; 03346 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 03347 struct glsl_src_param src0_param; 03348 03349 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param); 03350 shader_addline(buffer, "tmp0.x = dot(T%u.xyz, %s);\n", reg, src0_param.param_str); 03351 } 03352 03355 static void shader_glsl_texm3x3pad(const struct wined3d_shader_instruction *ins) 03356 { 03357 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 03358 DWORD reg = ins->dst[0].reg.idx; 03359 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 03360 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 03361 struct glsl_src_param src0_param; 03362 03363 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param); 03364 shader_addline(buffer, "tmp0.%c = dot(T%u.xyz, %s);\n", 'x' + tex_mx->current_row, reg, src0_param.param_str); 03365 tex_mx->texcoord_w[tex_mx->current_row++] = reg; 03366 } 03367 03368 static void shader_glsl_texm3x2tex(const struct wined3d_shader_instruction *ins) 03369 { 03370 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 03371 DWORD reg = ins->dst[0].reg.idx; 03372 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 03373 struct glsl_sample_function sample_function; 03374 struct glsl_src_param src0_param; 03375 03376 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param); 03377 shader_addline(buffer, "tmp0.y = dot(T%u.xyz, %s);\n", reg, src0_param.param_str); 03378 03379 shader_glsl_get_sample_function(ins->ctx, reg, 0, &sample_function); 03380 03381 /* Sample the texture using the calculated coordinates */ 03382 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xy"); 03383 } 03384 03387 static void shader_glsl_texm3x3tex(const struct wined3d_shader_instruction *ins) 03388 { 03389 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 03390 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 03391 struct glsl_sample_function sample_function; 03392 struct glsl_src_param src0_param; 03393 DWORD reg = ins->dst[0].reg.idx; 03394 03395 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param); 03396 shader_addline(ins->ctx->buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str); 03397 03398 /* Dependent read, not valid with conditional NP2 */ 03399 shader_glsl_get_sample_function(ins->ctx, reg, 0, &sample_function); 03400 03401 /* Sample the texture using the calculated coordinates */ 03402 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz"); 03403 03404 tex_mx->current_row = 0; 03405 } 03406 03409 static void shader_glsl_texm3x3(const struct wined3d_shader_instruction *ins) 03410 { 03411 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 03412 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 03413 struct glsl_src_param src0_param; 03414 char dst_mask[6]; 03415 DWORD reg = ins->dst[0].reg.idx; 03416 03417 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param); 03418 03419 shader_glsl_append_dst(ins->ctx->buffer, ins); 03420 shader_glsl_get_write_mask(&ins->dst[0], dst_mask); 03421 shader_addline(ins->ctx->buffer, "vec4(tmp0.xy, dot(T%u.xyz, %s), 1.0)%s);\n", reg, src0_param.param_str, dst_mask); 03422 03423 tex_mx->current_row = 0; 03424 } 03425 03426 /* Process the WINED3DSIO_TEXM3X3SPEC instruction in GLSL 03427 * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */ 03428 static void shader_glsl_texm3x3spec(const struct wined3d_shader_instruction *ins) 03429 { 03430 struct glsl_src_param src0_param; 03431 struct glsl_src_param src1_param; 03432 DWORD reg = ins->dst[0].reg.idx; 03433 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 03434 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 03435 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 03436 struct glsl_sample_function sample_function; 03437 char coord_mask[6]; 03438 03439 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param); 03440 shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param); 03441 03442 /* Perform the last matrix multiply operation */ 03443 shader_addline(buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str); 03444 /* Reflection calculation */ 03445 shader_addline(buffer, "tmp0.xyz = -reflect((%s), normalize(tmp0.xyz));\n", src1_param.param_str); 03446 03447 /* Dependent read, not valid with conditional NP2 */ 03448 shader_glsl_get_sample_function(ins->ctx, reg, 0, &sample_function); 03449 shader_glsl_write_mask_to_str(sample_function.coord_mask, coord_mask); 03450 03451 /* Sample the texture */ 03452 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, 03453 NULL, NULL, NULL, "tmp0%s", coord_mask); 03454 03455 tex_mx->current_row = 0; 03456 } 03457 03458 /* Process the WINED3DSIO_TEXM3X3VSPEC instruction in GLSL 03459 * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */ 03460 static void shader_glsl_texm3x3vspec(const struct wined3d_shader_instruction *ins) 03461 { 03462 DWORD reg = ins->dst[0].reg.idx; 03463 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 03464 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 03465 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2; 03466 struct glsl_sample_function sample_function; 03467 struct glsl_src_param src0_param; 03468 char coord_mask[6]; 03469 03470 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param); 03471 03472 /* Perform the last matrix multiply operation */ 03473 shader_addline(buffer, "tmp0.z = dot(vec3(T%u), vec3(%s));\n", reg, src0_param.param_str); 03474 03475 /* Construct the eye-ray vector from w coordinates */ 03476 shader_addline(buffer, "tmp1.xyz = normalize(vec3(gl_TexCoord[%u].w, gl_TexCoord[%u].w, gl_TexCoord[%u].w));\n", 03477 tex_mx->texcoord_w[0], tex_mx->texcoord_w[1], reg); 03478 shader_addline(buffer, "tmp0.xyz = -reflect(tmp1.xyz, normalize(tmp0.xyz));\n"); 03479 03480 /* Dependent read, not valid with conditional NP2 */ 03481 shader_glsl_get_sample_function(ins->ctx, reg, 0, &sample_function); 03482 shader_glsl_write_mask_to_str(sample_function.coord_mask, coord_mask); 03483 03484 /* Sample the texture using the calculated coordinates */ 03485 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, 03486 NULL, NULL, NULL, "tmp0%s", coord_mask); 03487 03488 tex_mx->current_row = 0; 03489 } 03490 03495 static void shader_glsl_texbem(const struct wined3d_shader_instruction *ins) 03496 { 03497 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data; 03498 struct glsl_sample_function sample_function; 03499 struct glsl_src_param coord_param; 03500 DWORD sampler_idx; 03501 DWORD mask; 03502 DWORD flags; 03503 char coord_mask[6]; 03504 03505 sampler_idx = ins->dst[0].reg.idx; 03506 flags = (priv->cur_ps_args->tex_transform >> sampler_idx * WINED3D_PSARGS_TEXTRANSFORM_SHIFT) 03507 & WINED3D_PSARGS_TEXTRANSFORM_MASK; 03508 03509 /* Dependent read, not valid with conditional NP2 */ 03510 shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function); 03511 mask = sample_function.coord_mask; 03512 03513 shader_glsl_write_mask_to_str(mask, coord_mask); 03514 03515 /* With projected textures, texbem only divides the static texture coord, 03516 * not the displacement, so we can't let GL handle this. */ 03517 if (flags & WINED3D_PSARGS_PROJECTED) 03518 { 03519 DWORD div_mask=0; 03520 char coord_div_mask[3]; 03521 switch (flags & ~WINED3D_PSARGS_PROJECTED) 03522 { 03523 case WINED3D_TTFF_COUNT1: 03524 FIXME("WINED3D_TTFF_PROJECTED with WINED3D_TTFF_COUNT1?\n"); 03525 break; 03526 case WINED3D_TTFF_COUNT2: 03527 div_mask = WINED3DSP_WRITEMASK_1; 03528 break; 03529 case WINED3D_TTFF_COUNT3: 03530 div_mask = WINED3DSP_WRITEMASK_2; 03531 break; 03532 case WINED3D_TTFF_COUNT4: 03533 case WINED3D_TTFF_DISABLE: 03534 div_mask = WINED3DSP_WRITEMASK_3; 03535 break; 03536 } 03537 shader_glsl_write_mask_to_str(div_mask, coord_div_mask); 03538 shader_addline(ins->ctx->buffer, "T%u%s /= T%u%s;\n", sampler_idx, coord_mask, sampler_idx, coord_div_mask); 03539 } 03540 03541 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &coord_param); 03542 03543 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, 03544 "T%u%s + vec4(bumpenvmat%d * %s, 0.0, 0.0)%s", sampler_idx, coord_mask, sampler_idx, 03545 coord_param.param_str, coord_mask); 03546 03547 if (ins->handler_idx == WINED3DSIH_TEXBEML) 03548 { 03549 struct glsl_src_param luminance_param; 03550 struct glsl_dst_param dst_param; 03551 03552 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &luminance_param); 03553 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param); 03554 03555 shader_addline(ins->ctx->buffer, "%s%s *= (%s * luminancescale%d + luminanceoffset%d);\n", 03556 dst_param.reg_name, dst_param.mask_str, 03557 luminance_param.param_str, sampler_idx, sampler_idx); 03558 } 03559 } 03560 03561 static void shader_glsl_bem(const struct wined3d_shader_instruction *ins) 03562 { 03563 struct glsl_src_param src0_param, src1_param; 03564 DWORD sampler_idx = ins->dst[0].reg.idx; 03565 03566 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param); 03567 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param); 03568 03569 shader_glsl_append_dst(ins->ctx->buffer, ins); 03570 shader_addline(ins->ctx->buffer, "%s + bumpenvmat%d * %s);\n", 03571 src0_param.param_str, sampler_idx, src1_param.param_str); 03572 } 03573 03576 static void shader_glsl_texreg2ar(const struct wined3d_shader_instruction *ins) 03577 { 03578 struct glsl_sample_function sample_function; 03579 struct glsl_src_param src0_param; 03580 DWORD sampler_idx = ins->dst[0].reg.idx; 03581 03582 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param); 03583 03584 shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function); 03585 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, 03586 "%s.wx", src0_param.reg_name); 03587 } 03588 03591 static void shader_glsl_texreg2gb(const struct wined3d_shader_instruction *ins) 03592 { 03593 struct glsl_sample_function sample_function; 03594 struct glsl_src_param src0_param; 03595 DWORD sampler_idx = ins->dst[0].reg.idx; 03596 03597 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param); 03598 03599 shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function); 03600 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, 03601 "%s.yz", src0_param.reg_name); 03602 } 03603 03606 static void shader_glsl_texreg2rgb(const struct wined3d_shader_instruction *ins) 03607 { 03608 struct glsl_sample_function sample_function; 03609 struct glsl_src_param src0_param; 03610 DWORD sampler_idx = ins->dst[0].reg.idx; 03611 03612 /* Dependent read, not valid with conditional NP2 */ 03613 shader_glsl_get_sample_function(ins->ctx, sampler_idx, 0, &sample_function); 03614 shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &src0_param); 03615 03616 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, 03617 "%s", src0_param.param_str); 03618 } 03619 03622 static void shader_glsl_texkill(const struct wined3d_shader_instruction *ins) 03623 { 03624 struct glsl_dst_param dst_param; 03625 03626 /* The argument is a destination parameter, and no writemasks are allowed */ 03627 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param); 03628 if (ins->ctx->reg_maps->shader_version.major >= 2) 03629 { 03630 /* 2.0 shaders compare all 4 components in texkill */ 03631 shader_addline(ins->ctx->buffer, "if (any(lessThan(%s.xyzw, vec4(0.0)))) discard;\n", dst_param.reg_name); 03632 } else { 03633 /* 1.X shaders only compare the first 3 components, probably due to the nature of the texkill 03634 * instruction as a tex* instruction, and phase, which kills all a / w components. Even if all 03635 * 4 components are defined, only the first 3 are used 03636 */ 03637 shader_addline(ins->ctx->buffer, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;\n", dst_param.reg_name); 03638 } 03639 } 03640 03643 static void shader_glsl_dp2add(const struct wined3d_shader_instruction *ins) 03644 { 03645 struct glsl_src_param src0_param; 03646 struct glsl_src_param src1_param; 03647 struct glsl_src_param src2_param; 03648 DWORD write_mask; 03649 unsigned int mask_size; 03650 03651 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins); 03652 mask_size = shader_glsl_get_write_mask_size(write_mask); 03653 03654 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param); 03655 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param); 03656 shader_glsl_add_src_param(ins, &ins->src[2], WINED3DSP_WRITEMASK_0, &src2_param); 03657 03658 if (mask_size > 1) { 03659 shader_addline(ins->ctx->buffer, "vec%d(dot(%s, %s) + %s));\n", 03660 mask_size, src0_param.param_str, src1_param.param_str, src2_param.param_str); 03661 } else { 03662 shader_addline(ins->ctx->buffer, "dot(%s, %s) + %s);\n", 03663 src0_param.param_str, src1_param.param_str, src2_param.param_str); 03664 } 03665 } 03666 03667 static void shader_glsl_input_pack(const struct wined3d_shader *shader, struct wined3d_shader_buffer *buffer, 03668 const struct wined3d_shader_signature_element *input_signature, 03669 const struct wined3d_shader_reg_maps *reg_maps, 03670 enum vertexprocessing_mode vertexprocessing) 03671 { 03672 WORD map = reg_maps->input_registers; 03673 unsigned int i; 03674 03675 for (i = 0; map; map >>= 1, ++i) 03676 { 03677 const char *semantic_name; 03678 UINT semantic_idx; 03679 char reg_mask[6]; 03680 03681 /* Unused */ 03682 if (!(map & 1)) continue; 03683 03684 semantic_name = input_signature[i].semantic_name; 03685 semantic_idx = input_signature[i].semantic_idx; 03686 shader_glsl_write_mask_to_str(input_signature[i].mask, reg_mask); 03687 03688 if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD)) 03689 { 03690 if (semantic_idx < 8 && vertexprocessing == pretransformed) 03691 shader_addline(buffer, "IN[%u]%s = gl_TexCoord[%u]%s;\n", 03692 shader->u.ps.input_reg_map[i], reg_mask, semantic_idx, reg_mask); 03693 else 03694 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n", 03695 shader->u.ps.input_reg_map[i], reg_mask, reg_mask); 03696 } 03697 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR)) 03698 { 03699 if (!semantic_idx) 03700 shader_addline(buffer, "IN[%u]%s = vec4(gl_Color)%s;\n", 03701 shader->u.ps.input_reg_map[i], reg_mask, reg_mask); 03702 else if (semantic_idx == 1) 03703 shader_addline(buffer, "IN[%u]%s = vec4(gl_SecondaryColor)%s;\n", 03704 shader->u.ps.input_reg_map[i], reg_mask, reg_mask); 03705 else 03706 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n", 03707 shader->u.ps.input_reg_map[i], reg_mask, reg_mask); 03708 } 03709 else 03710 { 03711 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n", 03712 shader->u.ps.input_reg_map[i], reg_mask, reg_mask); 03713 } 03714 } 03715 } 03716 03717 /********************************************* 03718 * Vertex Shader Specific Code begins here 03719 ********************************************/ 03720 03721 static void add_glsl_program_entry(struct shader_glsl_priv *priv, struct glsl_shader_prog_link *entry) 03722 { 03723 struct glsl_program_key key; 03724 03725 key.vshader = entry->vshader; 03726 key.pshader = entry->pshader; 03727 key.vs_args = entry->vs_args; 03728 key.ps_args = entry->ps_args; 03729 03730 if (wine_rb_put(&priv->program_lookup, &key, &entry->program_lookup_entry) == -1) 03731 { 03732 ERR("Failed to insert program entry.\n"); 03733 } 03734 } 03735 03736 static struct glsl_shader_prog_link *get_glsl_program_entry(const struct shader_glsl_priv *priv, 03737 const struct wined3d_shader *vshader, const struct wined3d_shader *pshader, 03738 const struct vs_compile_args *vs_args, const struct ps_compile_args *ps_args) 03739 { 03740 struct wine_rb_entry *entry; 03741 struct glsl_program_key key; 03742 03743 key.vshader = vshader; 03744 key.pshader = pshader; 03745 key.vs_args = *vs_args; 03746 key.ps_args = *ps_args; 03747 03748 entry = wine_rb_get(&priv->program_lookup, &key); 03749 return entry ? WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry) : NULL; 03750 } 03751 03752 /* GL locking is done by the caller */ 03753 static void delete_glsl_program_entry(struct shader_glsl_priv *priv, const struct wined3d_gl_info *gl_info, 03754 struct glsl_shader_prog_link *entry) 03755 { 03756 struct glsl_program_key key; 03757 03758 key.vshader = entry->vshader; 03759 key.pshader = entry->pshader; 03760 key.vs_args = entry->vs_args; 03761 key.ps_args = entry->ps_args; 03762 wine_rb_remove(&priv->program_lookup, &key); 03763 03764 GL_EXTCALL(glDeleteObjectARB(entry->programId)); 03765 if (entry->vshader) list_remove(&entry->vshader_entry); 03766 if (entry->pshader) list_remove(&entry->pshader_entry); 03767 HeapFree(GetProcessHeap(), 0, entry->vuniformF_locations); 03768 HeapFree(GetProcessHeap(), 0, entry->puniformF_locations); 03769 HeapFree(GetProcessHeap(), 0, entry); 03770 } 03771 03772 static void handle_ps3_input(struct wined3d_shader_buffer *buffer, 03773 const struct wined3d_gl_info *gl_info, const DWORD *map, 03774 const struct wined3d_shader_signature_element *input_signature, 03775 const struct wined3d_shader_reg_maps *reg_maps_in, 03776 const struct wined3d_shader_signature_element *output_signature, 03777 const struct wined3d_shader_reg_maps *reg_maps_out) 03778 { 03779 unsigned int i, j; 03780 const char *semantic_name_in; 03781 UINT semantic_idx_in; 03782 DWORD *set; 03783 DWORD in_idx; 03784 unsigned int in_count = vec4_varyings(3, gl_info); 03785 char reg_mask[6]; 03786 char destination[50]; 03787 WORD input_map, output_map; 03788 03789 set = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*set) * (in_count + 2)); 03790 03791 input_map = reg_maps_in->input_registers; 03792 for (i = 0; input_map; input_map >>= 1, ++i) 03793 { 03794 if (!(input_map & 1)) continue; 03795 03796 in_idx = map[i]; 03797 /* Declared, but not read register */ 03798 if (in_idx == ~0U) continue; 03799 if (in_idx >= (in_count + 2)) 03800 { 03801 FIXME("More input varyings declared than supported, expect issues.\n"); 03802 continue; 03803 } 03804 03805 if (in_idx == in_count) { 03806 sprintf(destination, "gl_FrontColor"); 03807 } else if (in_idx == in_count + 1) { 03808 sprintf(destination, "gl_FrontSecondaryColor"); 03809 } else { 03810 sprintf(destination, "IN[%u]", in_idx); 03811 } 03812 03813 semantic_name_in = input_signature[i].semantic_name; 03814 semantic_idx_in = input_signature[i].semantic_idx; 03815 set[in_idx] = ~0U; 03816 03817 output_map = reg_maps_out->output_registers; 03818 for (j = 0; output_map; output_map >>= 1, ++j) 03819 { 03820 DWORD mask; 03821 03822 if (!(output_map & 1) 03823 || semantic_idx_in != output_signature[j].semantic_idx 03824 || strcmp(semantic_name_in, output_signature[j].semantic_name) 03825 || !(mask = input_signature[i].mask & output_signature[j].mask)) 03826 continue; 03827 03828 set[in_idx] = mask; 03829 shader_glsl_write_mask_to_str(mask, reg_mask); 03830 03831 shader_addline(buffer, "%s%s = OUT[%u]%s;\n", 03832 destination, reg_mask, j, reg_mask); 03833 } 03834 } 03835 03836 for (i = 0; i < in_count + 2; ++i) 03837 { 03838 unsigned int size; 03839 03840 if (!set[i] || set[i] == WINED3DSP_WRITEMASK_ALL) 03841 continue; 03842 03843 if (set[i] == ~0U) set[i] = 0; 03844 03845 size = 0; 03846 if (!(set[i] & WINED3DSP_WRITEMASK_0)) reg_mask[size++] = 'x'; 03847 if (!(set[i] & WINED3DSP_WRITEMASK_1)) reg_mask[size++] = 'y'; 03848 if (!(set[i] & WINED3DSP_WRITEMASK_2)) reg_mask[size++] = 'z'; 03849 if (!(set[i] & WINED3DSP_WRITEMASK_3)) reg_mask[size++] = 'w'; 03850 reg_mask[size] = '\0'; 03851 03852 if (i == in_count) sprintf(destination, "gl_FrontColor"); 03853 else if (i == in_count + 1) sprintf(destination, "gl_FrontSecondaryColor"); 03854 else sprintf(destination, "IN[%u]", i); 03855 03856 if (size == 1) shader_addline(buffer, "%s.%s = 0.0;\n", destination, reg_mask); 03857 else shader_addline(buffer, "%s.%s = vec%u(0.0);\n", destination, reg_mask, size); 03858 } 03859 03860 HeapFree(GetProcessHeap(), 0, set); 03861 } 03862 03863 /* GL locking is done by the caller */ 03864 static GLhandleARB generate_param_reorder_function(struct wined3d_shader_buffer *buffer, 03865 const struct wined3d_shader *vs, const struct wined3d_shader *ps, 03866 const struct wined3d_gl_info *gl_info) 03867 { 03868 GLhandleARB ret = 0; 03869 DWORD ps_major = ps ? ps->reg_maps.shader_version.major : 0; 03870 unsigned int i; 03871 const char *semantic_name; 03872 UINT semantic_idx; 03873 char reg_mask[6]; 03874 const struct wined3d_shader_signature_element *output_signature = vs->output_signature; 03875 WORD map = vs->reg_maps.output_registers; 03876 03877 shader_buffer_clear(buffer); 03878 03879 shader_addline(buffer, "#version 120\n"); 03880 03881 if (ps_major < 3) 03882 { 03883 shader_addline(buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT); 03884 03885 for (i = 0; map; map >>= 1, ++i) 03886 { 03887 DWORD write_mask; 03888 03889 if (!(map & 1)) continue; 03890 03891 semantic_name = output_signature[i].semantic_name; 03892 semantic_idx = output_signature[i].semantic_idx; 03893 write_mask = output_signature[i].mask; 03894 shader_glsl_write_mask_to_str(write_mask, reg_mask); 03895 03896 if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR)) 03897 { 03898 if (!semantic_idx) 03899 shader_addline(buffer, "gl_FrontColor%s = OUT[%u]%s;\n", 03900 reg_mask, i, reg_mask); 03901 else if (semantic_idx == 1) 03902 shader_addline(buffer, "gl_FrontSecondaryColor%s = OUT[%u]%s;\n", 03903 reg_mask, i, reg_mask); 03904 } 03905 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION)) 03906 { 03907 shader_addline(buffer, "gl_Position%s = OUT[%u]%s;\n", 03908 reg_mask, i, reg_mask); 03909 } 03910 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD)) 03911 { 03912 if (semantic_idx < 8) 03913 { 03914 if (!(gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) || ps_major > 0) 03915 write_mask |= WINED3DSP_WRITEMASK_3; 03916 03917 shader_addline(buffer, "gl_TexCoord[%u]%s = OUT[%u]%s;\n", 03918 semantic_idx, reg_mask, i, reg_mask); 03919 if (!(write_mask & WINED3DSP_WRITEMASK_3)) 03920 shader_addline(buffer, "gl_TexCoord[%u].w = 1.0;\n", semantic_idx); 03921 } 03922 } 03923 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE)) 03924 { 03925 shader_addline(buffer, "gl_PointSize = OUT[%u].%c;\n", i, reg_mask[1]); 03926 } 03927 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG)) 03928 { 03929 shader_addline(buffer, "gl_FogFragCoord = OUT[%u].%c;\n", i, reg_mask[1]); 03930 } 03931 } 03932 shader_addline(buffer, "}\n"); 03933 03934 } 03935 else 03936 { 03937 UINT in_count = min(vec4_varyings(ps_major, gl_info), ps->limits.packed_input); 03938 /* This one is tricky: a 3.0 pixel shader reads from a 3.0 vertex shader */ 03939 shader_addline(buffer, "varying vec4 IN[%u];\n", in_count); 03940 shader_addline(buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT); 03941 03942 /* First, sort out position and point size. Those are not passed to the pixel shader */ 03943 for (i = 0; map; map >>= 1, ++i) 03944 { 03945 if (!(map & 1)) continue; 03946 03947 semantic_name = output_signature[i].semantic_name; 03948 shader_glsl_write_mask_to_str(output_signature[i].mask, reg_mask); 03949 03950 if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION)) 03951 { 03952 shader_addline(buffer, "gl_Position%s = OUT[%u]%s;\n", 03953 reg_mask, i, reg_mask); 03954 } 03955 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE)) 03956 { 03957 shader_addline(buffer, "gl_PointSize = OUT[%u].%c;\n", i, reg_mask[1]); 03958 } 03959 } 03960 03961 /* Then, fix the pixel shader input */ 03962 handle_ps3_input(buffer, gl_info, ps->u.ps.input_reg_map, ps->input_signature, 03963 &ps->reg_maps, output_signature, &vs->reg_maps); 03964 03965 shader_addline(buffer, "}\n"); 03966 } 03967 03968 ret = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB)); 03969 checkGLcall("glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB)"); 03970 shader_glsl_compile(gl_info, ret, buffer->buffer); 03971 03972 return ret; 03973 } 03974 03975 /* GL locking is done by the caller */ 03976 static void hardcode_local_constants(const struct wined3d_shader *shader, 03977 const struct wined3d_gl_info *gl_info, GLhandleARB programId, char prefix) 03978 { 03979 const struct wined3d_shader_lconst *lconst; 03980 GLint tmp_loc; 03981 const float *value; 03982 char glsl_name[8]; 03983 03984 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 03985 { 03986 value = (const float *)lconst->value; 03987 snprintf(glsl_name, sizeof(glsl_name), "%cLC%u", prefix, lconst->idx); 03988 tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name)); 03989 GL_EXTCALL(glUniform4fvARB(tmp_loc, 1, value)); 03990 } 03991 checkGLcall("Hardcoding local constants"); 03992 } 03993 03994 /* GL locking is done by the caller */ 03995 static GLuint shader_glsl_generate_pshader(const struct wined3d_context *context, 03996 struct wined3d_shader_buffer *buffer, const struct wined3d_shader *shader, 03997 const struct ps_compile_args *args, struct ps_np2fixup_info *np2fixup_info) 03998 { 03999 const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 04000 const struct wined3d_gl_info *gl_info = context->gl_info; 04001 const DWORD *function = shader->function; 04002 struct shader_glsl_ctx_priv priv_ctx; 04003 04004 /* Create the hw GLSL shader object and assign it as the shader->prgId */ 04005 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB)); 04006 04007 memset(&priv_ctx, 0, sizeof(priv_ctx)); 04008 priv_ctx.cur_ps_args = args; 04009 priv_ctx.cur_np2fixup_info = np2fixup_info; 04010 04011 shader_addline(buffer, "#version 120\n"); 04012 04013 if (gl_info->supported[ARB_SHADER_TEXTURE_LOD]) 04014 { 04015 shader_addline(buffer, "#extension GL_ARB_shader_texture_lod : enable\n"); 04016 } 04017 if (gl_info->supported[ARB_TEXTURE_RECTANGLE]) 04018 { 04019 /* The spec says that it doesn't have to be explicitly enabled, but the nvidia 04020 * drivers write a warning if we don't do so 04021 */ 04022 shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n"); 04023 } 04024 if (gl_info->supported[EXT_GPU_SHADER4]) 04025 { 04026 shader_addline(buffer, "#extension GL_EXT_gpu_shader4 : enable\n"); 04027 } 04028 04029 /* Base Declarations */ 04030 shader_generate_glsl_declarations(context, buffer, shader, reg_maps, &priv_ctx); 04031 04032 /* Pack 3.0 inputs */ 04033 if (reg_maps->shader_version.major >= 3 && args->vp_mode != vertexshader) 04034 shader_glsl_input_pack(shader, buffer, shader->input_signature, reg_maps, args->vp_mode); 04035 04036 /* Base Shader Body */ 04037 shader_generate_main(shader, buffer, reg_maps, function, &priv_ctx); 04038 04039 /* Pixel shaders < 2.0 place the resulting color in R0 implicitly */ 04040 if (reg_maps->shader_version.major < 2) 04041 { 04042 /* Some older cards like GeforceFX ones don't support multiple buffers, so also not gl_FragData */ 04043 shader_addline(buffer, "gl_FragData[0] = R0;\n"); 04044 } 04045 04046 if (args->srgb_correction) 04047 { 04048 shader_addline(buffer, "tmp0.xyz = pow(gl_FragData[0].xyz, vec3(srgb_const0.x));\n"); 04049 shader_addline(buffer, "tmp0.xyz = tmp0.xyz * vec3(srgb_const0.y) - vec3(srgb_const0.z);\n"); 04050 shader_addline(buffer, "tmp1.xyz = gl_FragData[0].xyz * vec3(srgb_const0.w);\n"); 04051 shader_addline(buffer, "bvec3 srgb_compare = lessThan(gl_FragData[0].xyz, vec3(srgb_const1.x));\n"); 04052 shader_addline(buffer, "gl_FragData[0].xyz = mix(tmp0.xyz, tmp1.xyz, vec3(srgb_compare));\n"); 04053 shader_addline(buffer, "gl_FragData[0] = clamp(gl_FragData[0], 0.0, 1.0);\n"); 04054 } 04055 /* Pixel shader < 3.0 do not replace the fog stage. 04056 * This implements linear fog computation and blending. 04057 * TODO: non linear fog 04058 * NOTE: gl_Fog.start and gl_Fog.end don't hold fog start s and end e but 04059 * -1/(e-s) and e/(e-s) respectively. 04060 */ 04061 if (reg_maps->shader_version.major < 3) 04062 { 04063 switch(args->fog) { 04064 case FOG_OFF: break; 04065 case FOG_LINEAR: 04066 shader_addline(buffer, "float fogstart = -1.0 / (gl_Fog.end - gl_Fog.start);\n"); 04067 shader_addline(buffer, "float fogend = gl_Fog.end * -fogstart;\n"); 04068 shader_addline(buffer, "float Fog = clamp(gl_FogFragCoord * fogstart + fogend, 0.0, 1.0);\n"); 04069 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n"); 04070 break; 04071 case FOG_EXP: 04072 /* Fog = e^(-gl_Fog.density * gl_FogFragCoord) */ 04073 shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_FogFragCoord);\n"); 04074 shader_addline(buffer, "Fog = clamp(Fog, 0.0, 1.0);\n"); 04075 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n"); 04076 break; 04077 case FOG_EXP2: 04078 /* Fog = e^(-(gl_Fog.density * gl_FogFragCoord)^2) */ 04079 shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_Fog.density * gl_FogFragCoord * gl_FogFragCoord);\n"); 04080 shader_addline(buffer, "Fog = clamp(Fog, 0.0, 1.0);\n"); 04081 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n"); 04082 break; 04083 } 04084 } 04085 04086 shader_addline(buffer, "}\n"); 04087 04088 TRACE("Compiling shader object %u\n", shader_obj); 04089 shader_glsl_compile(gl_info, shader_obj, buffer->buffer); 04090 04091 /* Store the shader object */ 04092 return shader_obj; 04093 } 04094 04095 /* GL locking is done by the caller */ 04096 static GLuint shader_glsl_generate_vshader(const struct wined3d_context *context, 04097 struct wined3d_shader_buffer *buffer, const struct wined3d_shader *shader, 04098 const struct vs_compile_args *args) 04099 { 04100 const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 04101 const struct wined3d_gl_info *gl_info = context->gl_info; 04102 const DWORD *function = shader->function; 04103 struct shader_glsl_ctx_priv priv_ctx; 04104 04105 /* Create the hw GLSL shader program and assign it as the shader->prgId */ 04106 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB)); 04107 04108 shader_addline(buffer, "#version 120\n"); 04109 04110 if (gl_info->supported[EXT_GPU_SHADER4]) 04111 shader_addline(buffer, "#extension GL_EXT_gpu_shader4 : enable\n"); 04112 04113 memset(&priv_ctx, 0, sizeof(priv_ctx)); 04114 priv_ctx.cur_vs_args = args; 04115 04116 /* Base Declarations */ 04117 shader_generate_glsl_declarations(context, buffer, shader, reg_maps, &priv_ctx); 04118 04119 /* Base Shader Body */ 04120 shader_generate_main(shader, buffer, reg_maps, function, &priv_ctx); 04121 04122 /* Unpack outputs */ 04123 shader_addline(buffer, "order_ps_input(OUT);\n"); 04124 04125 /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used 04126 * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE), 04127 * the fog frag coord is thrown away. If the fog frag coord is used, but not written by 04128 * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0) 04129 */ 04130 if (args->fog_src == VS_FOG_Z) 04131 shader_addline(buffer, "gl_FogFragCoord = gl_Position.z;\n"); 04132 else if (!reg_maps->fog) 04133 shader_addline(buffer, "gl_FogFragCoord = 0.0;\n"); 04134 04135 /* We always store the clipplanes without y inversion */ 04136 if (args->clip_enabled) 04137 shader_addline(buffer, "gl_ClipVertex = gl_Position;\n"); 04138 04139 /* Write the final position. 04140 * 04141 * OpenGL coordinates specify the center of the pixel while d3d coords specify 04142 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains 04143 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x 04144 * contains 1.0 to allow a mad. 04145 */ 04146 shader_addline(buffer, "gl_Position.y = gl_Position.y * posFixup.y;\n"); 04147 shader_addline(buffer, "gl_Position.xy += posFixup.zw * gl_Position.ww;\n"); 04148 04149 /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c 04150 * 04151 * Basically we want (in homogeneous coordinates) z = z * 2 - 1. However, shaders are run 04152 * before the homogeneous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w, 04153 * which is the same as z = z * 2 - w. 04154 */ 04155 shader_addline(buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n"); 04156 04157 shader_addline(buffer, "}\n"); 04158 04159 TRACE("Compiling shader object %u\n", shader_obj); 04160 shader_glsl_compile(gl_info, shader_obj, buffer->buffer); 04161 04162 return shader_obj; 04163 } 04164 04165 static GLhandleARB find_glsl_pshader(const struct wined3d_context *context, 04166 struct wined3d_shader_buffer *buffer, struct wined3d_shader *shader, 04167 const struct ps_compile_args *args, const struct ps_np2fixup_info **np2fixup_info) 04168 { 04169 struct wined3d_state *state = &shader->device->stateBlock->state; 04170 UINT i; 04171 DWORD new_size; 04172 struct glsl_ps_compiled_shader *new_array; 04173 struct glsl_pshader_private *shader_data; 04174 struct ps_np2fixup_info *np2fixup = NULL; 04175 GLhandleARB ret; 04176 04177 if (!shader->backend_data) 04178 { 04179 shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data)); 04180 if (!shader->backend_data) 04181 { 04182 ERR("Failed to allocate backend data.\n"); 04183 return 0; 04184 } 04185 } 04186 shader_data = shader->backend_data; 04187 04188 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2), 04189 * so a linear search is more performant than a hashmap or a binary search 04190 * (cache coherency etc) 04191 */ 04192 for (i = 0; i < shader_data->num_gl_shaders; ++i) 04193 { 04194 if (!memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args))) 04195 { 04196 if (args->np2_fixup) *np2fixup_info = &shader_data->gl_shaders[i].np2fixup; 04197 return shader_data->gl_shaders[i].prgId; 04198 } 04199 } 04200 04201 TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader); 04202 if(shader_data->shader_array_size == shader_data->num_gl_shaders) { 04203 if (shader_data->num_gl_shaders) 04204 { 04205 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2); 04206 new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders, 04207 new_size * sizeof(*shader_data->gl_shaders)); 04208 } else { 04209 new_array = HeapAlloc(GetProcessHeap(), 0, sizeof(*shader_data->gl_shaders)); 04210 new_size = 1; 04211 } 04212 04213 if(!new_array) { 04214 ERR("Out of memory\n"); 04215 return 0; 04216 } 04217 shader_data->gl_shaders = new_array; 04218 shader_data->shader_array_size = new_size; 04219 } 04220 04221 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args; 04222 04223 memset(&shader_data->gl_shaders[shader_data->num_gl_shaders].np2fixup, 0, sizeof(struct ps_np2fixup_info)); 04224 if (args->np2_fixup) np2fixup = &shader_data->gl_shaders[shader_data->num_gl_shaders].np2fixup; 04225 04226 pixelshader_update_samplers(&shader->reg_maps, state->textures); 04227 04228 shader_buffer_clear(buffer); 04229 ret = shader_glsl_generate_pshader(context, buffer, shader, args, np2fixup); 04230 shader_data->gl_shaders[shader_data->num_gl_shaders++].prgId = ret; 04231 *np2fixup_info = np2fixup; 04232 04233 return ret; 04234 } 04235 04236 static inline BOOL vs_args_equal(const struct vs_compile_args *stored, const struct vs_compile_args *new, 04237 const DWORD use_map) { 04238 if((stored->swizzle_map & use_map) != new->swizzle_map) return FALSE; 04239 if((stored->clip_enabled) != new->clip_enabled) return FALSE; 04240 return stored->fog_src == new->fog_src; 04241 } 04242 04243 static GLhandleARB find_glsl_vshader(const struct wined3d_context *context, 04244 struct wined3d_shader_buffer *buffer, struct wined3d_shader *shader, 04245 const struct vs_compile_args *args) 04246 { 04247 UINT i; 04248 DWORD new_size; 04249 struct glsl_vs_compiled_shader *new_array; 04250 DWORD use_map = shader->device->strided_streams.use_map; 04251 struct glsl_vshader_private *shader_data; 04252 GLhandleARB ret; 04253 04254 if (!shader->backend_data) 04255 { 04256 shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data)); 04257 if (!shader->backend_data) 04258 { 04259 ERR("Failed to allocate backend data.\n"); 04260 return 0; 04261 } 04262 } 04263 shader_data = shader->backend_data; 04264 04265 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2), 04266 * so a linear search is more performant than a hashmap or a binary search 04267 * (cache coherency etc) 04268 */ 04269 for(i = 0; i < shader_data->num_gl_shaders; i++) { 04270 if(vs_args_equal(&shader_data->gl_shaders[i].args, args, use_map)) { 04271 return shader_data->gl_shaders[i].prgId; 04272 } 04273 } 04274 04275 TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader); 04276 04277 if(shader_data->shader_array_size == shader_data->num_gl_shaders) { 04278 if (shader_data->num_gl_shaders) 04279 { 04280 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2); 04281 new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders, 04282 new_size * sizeof(*shader_data->gl_shaders)); 04283 } else { 04284 new_array = HeapAlloc(GetProcessHeap(), 0, sizeof(*shader_data->gl_shaders)); 04285 new_size = 1; 04286 } 04287 04288 if(!new_array) { 04289 ERR("Out of memory\n"); 04290 return 0; 04291 } 04292 shader_data->gl_shaders = new_array; 04293 shader_data->shader_array_size = new_size; 04294 } 04295 04296 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args; 04297 04298 shader_buffer_clear(buffer); 04299 ret = shader_glsl_generate_vshader(context, buffer, shader, args); 04300 shader_data->gl_shaders[shader_data->num_gl_shaders++].prgId = ret; 04301 04302 return ret; 04303 } 04304 04314 /* GL locking is done by the caller */ 04315 static void set_glsl_shader_program(const struct wined3d_context *context, 04316 struct wined3d_device *device, BOOL use_ps, BOOL use_vs) 04317 { 04318 const struct wined3d_state *state = &device->stateBlock->state; 04319 struct wined3d_shader *vshader = use_vs ? state->vertex_shader : NULL; 04320 struct wined3d_shader *pshader = use_ps ? state->pixel_shader : NULL; 04321 const struct wined3d_gl_info *gl_info = context->gl_info; 04322 struct shader_glsl_priv *priv = device->shader_priv; 04323 struct glsl_shader_prog_link *entry = NULL; 04324 GLhandleARB programId = 0; 04325 GLhandleARB reorder_shader_id = 0; 04326 unsigned int i; 04327 char glsl_name[8]; 04328 struct ps_compile_args ps_compile_args; 04329 struct vs_compile_args vs_compile_args; 04330 04331 if (vshader) find_vs_compile_args(state, vshader, &vs_compile_args); 04332 if (pshader) find_ps_compile_args(state, pshader, &ps_compile_args); 04333 04334 entry = get_glsl_program_entry(priv, vshader, pshader, &vs_compile_args, &ps_compile_args); 04335 if (entry) 04336 { 04337 priv->glsl_program = entry; 04338 return; 04339 } 04340 04341 /* If we get to this point, then no matching program exists, so we create one */ 04342 programId = GL_EXTCALL(glCreateProgramObjectARB()); 04343 TRACE("Created new GLSL shader program %u\n", programId); 04344 04345 /* Create the entry */ 04346 entry = HeapAlloc(GetProcessHeap(), 0, sizeof(struct glsl_shader_prog_link)); 04347 entry->programId = programId; 04348 entry->vshader = vshader; 04349 entry->pshader = pshader; 04350 entry->vs_args = vs_compile_args; 04351 entry->ps_args = ps_compile_args; 04352 entry->constant_version = 0; 04353 entry->np2Fixup_info = NULL; 04354 /* Add the hash table entry */ 04355 add_glsl_program_entry(priv, entry); 04356 04357 /* Set the current program */ 04358 priv->glsl_program = entry; 04359 04360 /* Attach GLSL vshader */ 04361 if (vshader) 04362 { 04363 GLhandleARB vshader_id = find_glsl_vshader(context, &priv->shader_buffer, vshader, &vs_compile_args); 04364 WORD map = vshader->reg_maps.input_registers; 04365 char tmp_name[10]; 04366 04367 reorder_shader_id = generate_param_reorder_function(&priv->shader_buffer, vshader, pshader, gl_info); 04368 TRACE("Attaching GLSL shader object %u to program %u\n", reorder_shader_id, programId); 04369 GL_EXTCALL(glAttachObjectARB(programId, reorder_shader_id)); 04370 checkGLcall("glAttachObjectARB"); 04371 /* Flag the reorder function for deletion, then it will be freed automatically when the program 04372 * is destroyed 04373 */ 04374 GL_EXTCALL(glDeleteObjectARB(reorder_shader_id)); 04375 04376 TRACE("Attaching GLSL shader object %u to program %u\n", vshader_id, programId); 04377 GL_EXTCALL(glAttachObjectARB(programId, vshader_id)); 04378 checkGLcall("glAttachObjectARB"); 04379 04380 /* Bind vertex attributes to a corresponding index number to match 04381 * the same index numbers as ARB_vertex_programs (makes loading 04382 * vertex attributes simpler). With this method, we can use the 04383 * exact same code to load the attributes later for both ARB and 04384 * GLSL shaders. 04385 * 04386 * We have to do this here because we need to know the Program ID 04387 * in order to make the bindings work, and it has to be done prior 04388 * to linking the GLSL program. */ 04389 for (i = 0; map; map >>= 1, ++i) 04390 { 04391 if (!(map & 1)) continue; 04392 04393 snprintf(tmp_name, sizeof(tmp_name), "attrib%u", i); 04394 GL_EXTCALL(glBindAttribLocationARB(programId, i, tmp_name)); 04395 } 04396 checkGLcall("glBindAttribLocationARB"); 04397 04398 list_add_head(&vshader->linked_programs, &entry->vshader_entry); 04399 } 04400 04401 /* Attach GLSL pshader */ 04402 if (pshader) 04403 { 04404 GLhandleARB pshader_id = find_glsl_pshader(context, &priv->shader_buffer, 04405 pshader, &ps_compile_args, &entry->np2Fixup_info); 04406 TRACE("Attaching GLSL shader object %u to program %u\n", pshader_id, programId); 04407 GL_EXTCALL(glAttachObjectARB(programId, pshader_id)); 04408 checkGLcall("glAttachObjectARB"); 04409 04410 list_add_head(&pshader->linked_programs, &entry->pshader_entry); 04411 } 04412 04413 /* Link the program */ 04414 TRACE("Linking GLSL shader program %u\n", programId); 04415 GL_EXTCALL(glLinkProgramARB(programId)); 04416 shader_glsl_validate_link(gl_info, programId); 04417 04418 entry->vuniformF_locations = HeapAlloc(GetProcessHeap(), 0, 04419 sizeof(GLhandleARB) * gl_info->limits.glsl_vs_float_constants); 04420 for (i = 0; i < gl_info->limits.glsl_vs_float_constants; ++i) 04421 { 04422 snprintf(glsl_name, sizeof(glsl_name), "VC[%i]", i); 04423 entry->vuniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name)); 04424 } 04425 for (i = 0; i < MAX_CONST_I; ++i) 04426 { 04427 snprintf(glsl_name, sizeof(glsl_name), "VI[%i]", i); 04428 entry->vuniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name)); 04429 } 04430 entry->puniformF_locations = HeapAlloc(GetProcessHeap(), 0, 04431 sizeof(GLhandleARB) * gl_info->limits.glsl_ps_float_constants); 04432 for (i = 0; i < gl_info->limits.glsl_ps_float_constants; ++i) 04433 { 04434 snprintf(glsl_name, sizeof(glsl_name), "PC[%i]", i); 04435 entry->puniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name)); 04436 } 04437 for (i = 0; i < MAX_CONST_I; ++i) 04438 { 04439 snprintf(glsl_name, sizeof(glsl_name), "PI[%i]", i); 04440 entry->puniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name)); 04441 } 04442 04443 if(pshader) { 04444 char name[32]; 04445 04446 for(i = 0; i < MAX_TEXTURES; i++) { 04447 sprintf(name, "bumpenvmat%u", i); 04448 entry->bumpenvmat_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name)); 04449 sprintf(name, "luminancescale%u", i); 04450 entry->luminancescale_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name)); 04451 sprintf(name, "luminanceoffset%u", i); 04452 entry->luminanceoffset_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name)); 04453 } 04454 04455 if (ps_compile_args.np2_fixup) { 04456 if (entry->np2Fixup_info) { 04457 entry->np2Fixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "PsamplerNP2Fixup")); 04458 } else { 04459 FIXME("NP2 texcoord fixup needed for this pixelshader, but no fixup uniform found.\n"); 04460 } 04461 } 04462 } 04463 04464 entry->posFixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "posFixup")); 04465 entry->ycorrection_location = GL_EXTCALL(glGetUniformLocationARB(programId, "ycorrection")); 04466 checkGLcall("Find glsl program uniform locations"); 04467 04468 if (pshader && pshader->reg_maps.shader_version.major >= 3 04469 && pshader->u.ps.declared_in_count > vec4_varyings(3, gl_info)) 04470 { 04471 TRACE("Shader %d needs vertex color clamping disabled\n", programId); 04472 entry->vertex_color_clamp = GL_FALSE; 04473 } else { 04474 entry->vertex_color_clamp = GL_FIXED_ONLY_ARB; 04475 } 04476 04477 /* Set the shader to allow uniform loading on it */ 04478 GL_EXTCALL(glUseProgramObjectARB(programId)); 04479 checkGLcall("glUseProgramObjectARB(programId)"); 04480 04481 /* Load the vertex and pixel samplers now. The function that finds the mappings makes sure 04482 * that it stays the same for each vertexshader-pixelshader pair(=linked glsl program). If 04483 * a pshader with fixed function pipeline is used there are no vertex samplers, and if a 04484 * vertex shader with fixed function pixel processing is used we make sure that the card 04485 * supports enough samplers to allow the max number of vertex samplers with all possible 04486 * fixed function fragment processing setups. So once the program is linked these samplers 04487 * won't change. 04488 */ 04489 if (vshader) shader_glsl_load_vsamplers(gl_info, device->texUnitMap, programId); 04490 if (pshader) shader_glsl_load_psamplers(gl_info, device->texUnitMap, programId); 04491 04492 /* If the local constants do not have to be loaded with the environment constants, 04493 * load them now to have them hardcoded in the GLSL program. This saves some CPU cycles 04494 * later 04495 */ 04496 if (pshader && !pshader->load_local_constsF) 04497 hardcode_local_constants(pshader, gl_info, programId, 'P'); 04498 if (vshader && !vshader->load_local_constsF) 04499 hardcode_local_constants(vshader, gl_info, programId, 'V'); 04500 } 04501 04502 /* GL locking is done by the caller */ 04503 static GLhandleARB create_glsl_blt_shader(const struct wined3d_gl_info *gl_info, enum tex_types tex_type, BOOL masked) 04504 { 04505 GLhandleARB program_id; 04506 GLhandleARB vshader_id, pshader_id; 04507 const char *blt_pshader; 04508 04509 static const char *blt_vshader = 04510 "#version 120\n" 04511 "void main(void)\n" 04512 "{\n" 04513 " gl_Position = gl_Vertex;\n" 04514 " gl_FrontColor = vec4(1.0);\n" 04515 " gl_TexCoord[0] = gl_MultiTexCoord0;\n" 04516 "}\n"; 04517 04518 static const char * const blt_pshaders_full[tex_type_count] = 04519 { 04520 /* tex_1d */ 04521 NULL, 04522 /* tex_2d */ 04523 "#version 120\n" 04524 "uniform sampler2D sampler;\n" 04525 "void main(void)\n" 04526 "{\n" 04527 " gl_FragDepth = texture2D(sampler, gl_TexCoord[0].xy).x;\n" 04528 "}\n", 04529 /* tex_3d */ 04530 NULL, 04531 /* tex_cube */ 04532 "#version 120\n" 04533 "uniform samplerCube sampler;\n" 04534 "void main(void)\n" 04535 "{\n" 04536 " gl_FragDepth = textureCube(sampler, gl_TexCoord[0].xyz).x;\n" 04537 "}\n", 04538 /* tex_rect */ 04539 "#version 120\n" 04540 "#extension GL_ARB_texture_rectangle : enable\n" 04541 "uniform sampler2DRect sampler;\n" 04542 "void main(void)\n" 04543 "{\n" 04544 " gl_FragDepth = texture2DRect(sampler, gl_TexCoord[0].xy).x;\n" 04545 "}\n", 04546 }; 04547 04548 static const char * const blt_pshaders_masked[tex_type_count] = 04549 { 04550 /* tex_1d */ 04551 NULL, 04552 /* tex_2d */ 04553 "#version 120\n" 04554 "uniform sampler2D sampler;\n" 04555 "uniform vec4 mask;\n" 04556 "void main(void)\n" 04557 "{\n" 04558 " if (all(lessThan(gl_FragCoord.xy, mask.zw))) discard;\n" 04559 " gl_FragDepth = texture2D(sampler, gl_TexCoord[0].xy).x;\n" 04560 "}\n", 04561 /* tex_3d */ 04562 NULL, 04563 /* tex_cube */ 04564 "#version 120\n" 04565 "uniform samplerCube sampler;\n" 04566 "uniform vec4 mask;\n" 04567 "void main(void)\n" 04568 "{\n" 04569 " if (all(lessThan(gl_FragCoord.xy, mask.zw))) discard;\n" 04570 " gl_FragDepth = textureCube(sampler, gl_TexCoord[0].xyz).x;\n" 04571 "}\n", 04572 /* tex_rect */ 04573 "#version 120\n" 04574 "#extension GL_ARB_texture_rectangle : enable\n" 04575 "uniform sampler2DRect sampler;\n" 04576 "uniform vec4 mask;\n" 04577 "void main(void)\n" 04578 "{\n" 04579 " if (all(lessThan(gl_FragCoord.xy, mask.zw))) discard;\n" 04580 " gl_FragDepth = texture2DRect(sampler, gl_TexCoord[0].xy).x;\n" 04581 "}\n", 04582 }; 04583 04584 blt_pshader = masked ? blt_pshaders_masked[tex_type] : blt_pshaders_full[tex_type]; 04585 if (!blt_pshader) 04586 { 04587 FIXME("tex_type %#x not supported\n", tex_type); 04588 return 0; 04589 } 04590 04591 vshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB)); 04592 shader_glsl_compile(gl_info, vshader_id, blt_vshader); 04593 04594 pshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB)); 04595 shader_glsl_compile(gl_info, pshader_id, blt_pshader); 04596 04597 program_id = GL_EXTCALL(glCreateProgramObjectARB()); 04598 GL_EXTCALL(glAttachObjectARB(program_id, vshader_id)); 04599 GL_EXTCALL(glAttachObjectARB(program_id, pshader_id)); 04600 GL_EXTCALL(glLinkProgramARB(program_id)); 04601 04602 shader_glsl_validate_link(gl_info, program_id); 04603 04604 /* Once linked we can mark the shaders for deletion. They will be deleted once the program 04605 * is destroyed 04606 */ 04607 GL_EXTCALL(glDeleteObjectARB(vshader_id)); 04608 GL_EXTCALL(glDeleteObjectARB(pshader_id)); 04609 return program_id; 04610 } 04611 04612 /* GL locking is done by the caller */ 04613 static void shader_glsl_select(const struct wined3d_context *context, BOOL usePS, BOOL useVS) 04614 { 04615 const struct wined3d_gl_info *gl_info = context->gl_info; 04616 struct wined3d_device *device = context->swapchain->device; 04617 struct shader_glsl_priv *priv = device->shader_priv; 04618 GLhandleARB program_id = 0; 04619 GLenum old_vertex_color_clamp, current_vertex_color_clamp; 04620 04621 old_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB; 04622 04623 if (useVS || usePS) set_glsl_shader_program(context, device, usePS, useVS); 04624 else priv->glsl_program = NULL; 04625 04626 current_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB; 04627 04628 if (old_vertex_color_clamp != current_vertex_color_clamp) 04629 { 04630 if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT]) 04631 { 04632 GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, current_vertex_color_clamp)); 04633 checkGLcall("glClampColorARB"); 04634 } 04635 else 04636 { 04637 FIXME("vertex color clamp needs to be changed, but extension not supported.\n"); 04638 } 04639 } 04640 04641 program_id = priv->glsl_program ? priv->glsl_program->programId : 0; 04642 if (program_id) TRACE("Using GLSL program %u\n", program_id); 04643 GL_EXTCALL(glUseProgramObjectARB(program_id)); 04644 checkGLcall("glUseProgramObjectARB"); 04645 04646 /* In case that NP2 texcoord fixup data is found for the selected program, trigger a reload of the 04647 * constants. This has to be done because it can't be guaranteed that sampler() (from state.c) is 04648 * called between selecting the shader and using it, which results in wrong fixup for some frames. */ 04649 if (priv->glsl_program && priv->glsl_program->np2Fixup_info) 04650 { 04651 shader_glsl_load_np2fixup_constants(priv, gl_info, &device->stateBlock->state); 04652 } 04653 } 04654 04655 /* GL locking is done by the caller */ 04656 static void shader_glsl_select_depth_blt(void *shader_priv, const struct wined3d_gl_info *gl_info, 04657 enum tex_types tex_type, const SIZE *ds_mask_size) 04658 { 04659 BOOL masked = ds_mask_size->cx && ds_mask_size->cy; 04660 struct shader_glsl_priv *priv = shader_priv; 04661 GLhandleARB *blt_program; 04662 GLint loc; 04663 04664 blt_program = masked ? &priv->depth_blt_program_masked[tex_type] : &priv->depth_blt_program_full[tex_type]; 04665 if (!*blt_program) 04666 { 04667 *blt_program = create_glsl_blt_shader(gl_info, tex_type, masked); 04668 loc = GL_EXTCALL(glGetUniformLocationARB(*blt_program, "sampler")); 04669 GL_EXTCALL(glUseProgramObjectARB(*blt_program)); 04670 GL_EXTCALL(glUniform1iARB(loc, 0)); 04671 } 04672 else 04673 { 04674 GL_EXTCALL(glUseProgramObjectARB(*blt_program)); 04675 } 04676 04677 if (masked) 04678 { 04679 loc = GL_EXTCALL(glGetUniformLocationARB(*blt_program, "mask")); 04680 GL_EXTCALL(glUniform4fARB(loc, 0.0f, 0.0f, (float)ds_mask_size->cx, (float)ds_mask_size->cy)); 04681 } 04682 } 04683 04684 /* GL locking is done by the caller */ 04685 static void shader_glsl_deselect_depth_blt(void *shader_priv, const struct wined3d_gl_info *gl_info) 04686 { 04687 struct shader_glsl_priv *priv = shader_priv; 04688 GLhandleARB program_id; 04689 04690 program_id = priv->glsl_program ? priv->glsl_program->programId : 0; 04691 if (program_id) TRACE("Using GLSL program %u\n", program_id); 04692 04693 GL_EXTCALL(glUseProgramObjectARB(program_id)); 04694 checkGLcall("glUseProgramObjectARB"); 04695 } 04696 04697 static void shader_glsl_destroy(struct wined3d_shader *shader) 04698 { 04699 struct wined3d_device *device = shader->device; 04700 struct shader_glsl_priv *priv = device->shader_priv; 04701 const struct wined3d_gl_info *gl_info; 04702 const struct list *linked_programs; 04703 struct wined3d_context *context; 04704 04705 char pshader = shader_is_pshader_version(shader->reg_maps.shader_version.type); 04706 04707 if (pshader) 04708 { 04709 struct glsl_pshader_private *shader_data = shader->backend_data; 04710 04711 if (!shader_data || !shader_data->num_gl_shaders) 04712 { 04713 HeapFree(GetProcessHeap(), 0, shader_data); 04714 shader->backend_data = NULL; 04715 return; 04716 } 04717 04718 context = context_acquire(device, NULL); 04719 gl_info = context->gl_info; 04720 04721 if (priv->glsl_program && priv->glsl_program->pshader == shader) 04722 { 04723 ENTER_GL(); 04724 shader_glsl_select(context, FALSE, FALSE); 04725 LEAVE_GL(); 04726 } 04727 } 04728 else 04729 { 04730 struct glsl_vshader_private *shader_data = shader->backend_data; 04731 04732 if (!shader_data || !shader_data->num_gl_shaders) 04733 { 04734 HeapFree(GetProcessHeap(), 0, shader_data); 04735 shader->backend_data = NULL; 04736 return; 04737 } 04738 04739 context = context_acquire(device, NULL); 04740 gl_info = context->gl_info; 04741 04742 if (priv->glsl_program && priv->glsl_program->vshader == shader) 04743 { 04744 ENTER_GL(); 04745 shader_glsl_select(context, FALSE, FALSE); 04746 LEAVE_GL(); 04747 } 04748 } 04749 04750 linked_programs = &shader->linked_programs; 04751 04752 TRACE("Deleting linked programs\n"); 04753 if (linked_programs->next) { 04754 struct glsl_shader_prog_link *entry, *entry2; 04755 04756 ENTER_GL(); 04757 if(pshader) { 04758 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, pshader_entry) { 04759 delete_glsl_program_entry(priv, gl_info, entry); 04760 } 04761 } else { 04762 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) { 04763 delete_glsl_program_entry(priv, gl_info, entry); 04764 } 04765 } 04766 LEAVE_GL(); 04767 } 04768 04769 if (pshader) 04770 { 04771 struct glsl_pshader_private *shader_data = shader->backend_data; 04772 UINT i; 04773 04774 ENTER_GL(); 04775 for(i = 0; i < shader_data->num_gl_shaders; i++) { 04776 TRACE("deleting pshader %u\n", shader_data->gl_shaders[i].prgId); 04777 GL_EXTCALL(glDeleteObjectARB(shader_data->gl_shaders[i].prgId)); 04778 checkGLcall("glDeleteObjectARB"); 04779 } 04780 LEAVE_GL(); 04781 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders); 04782 } 04783 else 04784 { 04785 struct glsl_vshader_private *shader_data = shader->backend_data; 04786 UINT i; 04787 04788 ENTER_GL(); 04789 for(i = 0; i < shader_data->num_gl_shaders; i++) { 04790 TRACE("deleting vshader %u\n", shader_data->gl_shaders[i].prgId); 04791 GL_EXTCALL(glDeleteObjectARB(shader_data->gl_shaders[i].prgId)); 04792 checkGLcall("glDeleteObjectARB"); 04793 } 04794 LEAVE_GL(); 04795 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders); 04796 } 04797 04798 HeapFree(GetProcessHeap(), 0, shader->backend_data); 04799 shader->backend_data = NULL; 04800 04801 context_release(context); 04802 } 04803 04804 static int glsl_program_key_compare(const void *key, const struct wine_rb_entry *entry) 04805 { 04806 const struct glsl_program_key *k = key; 04807 const struct glsl_shader_prog_link *prog = WINE_RB_ENTRY_VALUE(entry, 04808 const struct glsl_shader_prog_link, program_lookup_entry); 04809 int cmp; 04810 04811 if (k->vshader > prog->vshader) return 1; 04812 else if (k->vshader < prog->vshader) return -1; 04813 04814 if (k->pshader > prog->pshader) return 1; 04815 else if (k->pshader < prog->pshader) return -1; 04816 04817 if (k->vshader && (cmp = memcmp(&k->vs_args, &prog->vs_args, sizeof(prog->vs_args)))) return cmp; 04818 if (k->pshader && (cmp = memcmp(&k->ps_args, &prog->ps_args, sizeof(prog->ps_args)))) return cmp; 04819 04820 return 0; 04821 } 04822 04823 static BOOL constant_heap_init(struct constant_heap *heap, unsigned int constant_count) 04824 { 04825 SIZE_T size = (constant_count + 1) * sizeof(*heap->entries) + constant_count * sizeof(*heap->positions); 04826 void *mem = HeapAlloc(GetProcessHeap(), 0, size); 04827 04828 if (!mem) 04829 { 04830 ERR("Failed to allocate memory\n"); 04831 return FALSE; 04832 } 04833 04834 heap->entries = mem; 04835 heap->entries[1].version = 0; 04836 heap->positions = (unsigned int *)(heap->entries + constant_count + 1); 04837 heap->size = 1; 04838 04839 return TRUE; 04840 } 04841 04842 static void constant_heap_free(struct constant_heap *heap) 04843 { 04844 HeapFree(GetProcessHeap(), 0, heap->entries); 04845 } 04846 04847 static const struct wine_rb_functions wined3d_glsl_program_rb_functions = 04848 { 04849 wined3d_rb_alloc, 04850 wined3d_rb_realloc, 04851 wined3d_rb_free, 04852 glsl_program_key_compare, 04853 }; 04854 04855 static HRESULT shader_glsl_alloc(struct wined3d_device *device) 04856 { 04857 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 04858 struct shader_glsl_priv *priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_glsl_priv)); 04859 SIZE_T stack_size = wined3d_log2i(max(gl_info->limits.glsl_vs_float_constants, 04860 gl_info->limits.glsl_ps_float_constants)) + 1; 04861 04862 if (!shader_buffer_init(&priv->shader_buffer)) 04863 { 04864 ERR("Failed to initialize shader buffer.\n"); 04865 goto fail; 04866 } 04867 04868 priv->stack = HeapAlloc(GetProcessHeap(), 0, stack_size * sizeof(*priv->stack)); 04869 if (!priv->stack) 04870 { 04871 ERR("Failed to allocate memory.\n"); 04872 goto fail; 04873 } 04874 04875 if (!constant_heap_init(&priv->vconst_heap, gl_info->limits.glsl_vs_float_constants)) 04876 { 04877 ERR("Failed to initialize vertex shader constant heap\n"); 04878 goto fail; 04879 } 04880 04881 if (!constant_heap_init(&priv->pconst_heap, gl_info->limits.glsl_ps_float_constants)) 04882 { 04883 ERR("Failed to initialize pixel shader constant heap\n"); 04884 goto fail; 04885 } 04886 04887 if (wine_rb_init(&priv->program_lookup, &wined3d_glsl_program_rb_functions) == -1) 04888 { 04889 ERR("Failed to initialize rbtree.\n"); 04890 goto fail; 04891 } 04892 04893 priv->next_constant_version = 1; 04894 04895 device->shader_priv = priv; 04896 return WINED3D_OK; 04897 04898 fail: 04899 constant_heap_free(&priv->pconst_heap); 04900 constant_heap_free(&priv->vconst_heap); 04901 HeapFree(GetProcessHeap(), 0, priv->stack); 04902 shader_buffer_free(&priv->shader_buffer); 04903 HeapFree(GetProcessHeap(), 0, priv); 04904 return E_OUTOFMEMORY; 04905 } 04906 04907 /* Context activation is done by the caller. */ 04908 static void shader_glsl_free(struct wined3d_device *device) 04909 { 04910 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 04911 struct shader_glsl_priv *priv = device->shader_priv; 04912 int i; 04913 04914 ENTER_GL(); 04915 for (i = 0; i < tex_type_count; ++i) 04916 { 04917 if (priv->depth_blt_program_full[i]) 04918 { 04919 GL_EXTCALL(glDeleteObjectARB(priv->depth_blt_program_full[i])); 04920 } 04921 if (priv->depth_blt_program_masked[i]) 04922 { 04923 GL_EXTCALL(glDeleteObjectARB(priv->depth_blt_program_masked[i])); 04924 } 04925 } 04926 LEAVE_GL(); 04927 04928 wine_rb_destroy(&priv->program_lookup, NULL, NULL); 04929 constant_heap_free(&priv->pconst_heap); 04930 constant_heap_free(&priv->vconst_heap); 04931 HeapFree(GetProcessHeap(), 0, priv->stack); 04932 shader_buffer_free(&priv->shader_buffer); 04933 04934 HeapFree(GetProcessHeap(), 0, device->shader_priv); 04935 device->shader_priv = NULL; 04936 } 04937 04938 static void shader_glsl_context_destroyed(void *shader_priv, const struct wined3d_context *context) {} 04939 04940 static void shader_glsl_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *caps) 04941 { 04942 if (gl_info->supported[EXT_GPU_SHADER4] && gl_info->supported[ARB_GEOMETRY_SHADER4] 04943 && gl_info->glsl_version >= MAKEDWORD_VERSION(1, 50)) 04944 { 04945 caps->VertexShaderVersion = 4; 04946 caps->PixelShaderVersion = 4; 04947 } 04948 /* ARB_shader_texture_lod or EXT_gpu_shader4 is required for the SM3 04949 * texldd and texldl instructions. */ 04950 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD] || gl_info->supported[EXT_GPU_SHADER4]) 04951 { 04952 caps->VertexShaderVersion = 3; 04953 caps->PixelShaderVersion = 3; 04954 } 04955 else 04956 { 04957 caps->VertexShaderVersion = 2; 04958 caps->PixelShaderVersion = 2; 04959 } 04960 04961 caps->MaxVertexShaderConst = gl_info->limits.glsl_vs_float_constants; 04962 caps->MaxPixelShaderConst = gl_info->limits.glsl_ps_float_constants; 04963 04964 /* FIXME: The following line is card dependent. -8.0 to 8.0 is the 04965 * Direct3D minimum requirement. 04966 * 04967 * Both GL_ARB_fragment_program and GLSL require a "maximum representable magnitude" 04968 * of colors to be 2^10, and 2^32 for other floats. Should we use 1024 here? 04969 * 04970 * The problem is that the refrast clamps temporary results in the shader to 04971 * [-MaxValue;+MaxValue]. If the card's max value is bigger than the one we advertize here, 04972 * then applications may miss the clamping behavior. On the other hand, if it is smaller, 04973 * the shader will generate incorrect results too. Unfortunately, GL deliberately doesn't 04974 * offer a way to query this. 04975 */ 04976 caps->PixelShader1xMaxValue = 8.0; 04977 04978 caps->VSClipping = TRUE; 04979 04980 TRACE_(d3d_caps)("Hardware vertex shader version %u enabled (GLSL).\n", 04981 caps->VertexShaderVersion); 04982 TRACE_(d3d_caps)("Hardware pixel shader version %u enabled (GLSL).\n", 04983 caps->PixelShaderVersion); 04984 } 04985 04986 static BOOL shader_glsl_color_fixup_supported(struct color_fixup_desc fixup) 04987 { 04988 if (TRACE_ON(d3d_shader) && TRACE_ON(d3d)) 04989 { 04990 TRACE("Checking support for fixup:\n"); 04991 dump_color_fixup_desc(fixup); 04992 } 04993 04994 /* We support everything except YUV conversions. */ 04995 if (!is_complex_fixup(fixup)) 04996 { 04997 TRACE("[OK]\n"); 04998 return TRUE; 04999 } 05000 05001 TRACE("[FAILED]\n"); 05002 return FALSE; 05003 } 05004 05005 static const SHADER_HANDLER shader_glsl_instruction_handler_table[WINED3DSIH_TABLE_SIZE] = 05006 { 05007 /* WINED3DSIH_ABS */ shader_glsl_map2gl, 05008 /* WINED3DSIH_ADD */ shader_glsl_arith, 05009 /* WINED3DSIH_AND */ NULL, 05010 /* WINED3DSIH_BEM */ shader_glsl_bem, 05011 /* WINED3DSIH_BREAK */ shader_glsl_break, 05012 /* WINED3DSIH_BREAKC */ shader_glsl_breakc, 05013 /* WINED3DSIH_BREAKP */ NULL, 05014 /* WINED3DSIH_CALL */ shader_glsl_call, 05015 /* WINED3DSIH_CALLNZ */ shader_glsl_callnz, 05016 /* WINED3DSIH_CMP */ shader_glsl_cmp, 05017 /* WINED3DSIH_CND */ shader_glsl_cnd, 05018 /* WINED3DSIH_CRS */ shader_glsl_cross, 05019 /* WINED3DSIH_CUT */ NULL, 05020 /* WINED3DSIH_DCL */ NULL, 05021 /* WINED3DSIH_DEF */ NULL, 05022 /* WINED3DSIH_DEFB */ NULL, 05023 /* WINED3DSIH_DEFI */ NULL, 05024 /* WINED3DSIH_DIV */ NULL, 05025 /* WINED3DSIH_DP2ADD */ shader_glsl_dp2add, 05026 /* WINED3DSIH_DP3 */ shader_glsl_dot, 05027 /* WINED3DSIH_DP4 */ shader_glsl_dot, 05028 /* WINED3DSIH_DST */ shader_glsl_dst, 05029 /* WINED3DSIH_DSX */ shader_glsl_map2gl, 05030 /* WINED3DSIH_DSY */ shader_glsl_map2gl, 05031 /* WINED3DSIH_ELSE */ shader_glsl_else, 05032 /* WINED3DSIH_EMIT */ NULL, 05033 /* WINED3DSIH_ENDIF */ shader_glsl_end, 05034 /* WINED3DSIH_ENDLOOP */ shader_glsl_end, 05035 /* WINED3DSIH_ENDREP */ shader_glsl_end, 05036 /* WINED3DSIH_EQ */ NULL, 05037 /* WINED3DSIH_EXP */ shader_glsl_map2gl, 05038 /* WINED3DSIH_EXPP */ shader_glsl_expp, 05039 /* WINED3DSIH_FRC */ shader_glsl_map2gl, 05040 /* WINED3DSIH_FTOI */ NULL, 05041 /* WINED3DSIH_GE */ NULL, 05042 /* WINED3DSIH_IADD */ NULL, 05043 /* WINED3DSIH_IEQ */ NULL, 05044 /* WINED3DSIH_IF */ shader_glsl_if, 05045 /* WINED3DSIH_IFC */ shader_glsl_ifc, 05046 /* WINED3DSIH_IGE */ NULL, 05047 /* WINED3DSIH_IMUL */ NULL, 05048 /* WINED3DSIH_ITOF */ NULL, 05049 /* WINED3DSIH_LABEL */ shader_glsl_label, 05050 /* WINED3DSIH_LD */ NULL, 05051 /* WINED3DSIH_LIT */ shader_glsl_lit, 05052 /* WINED3DSIH_LOG */ shader_glsl_log, 05053 /* WINED3DSIH_LOGP */ shader_glsl_log, 05054 /* WINED3DSIH_LOOP */ shader_glsl_loop, 05055 /* WINED3DSIH_LRP */ shader_glsl_lrp, 05056 /* WINED3DSIH_LT */ NULL, 05057 /* WINED3DSIH_M3x2 */ shader_glsl_mnxn, 05058 /* WINED3DSIH_M3x3 */ shader_glsl_mnxn, 05059 /* WINED3DSIH_M3x4 */ shader_glsl_mnxn, 05060 /* WINED3DSIH_M4x3 */ shader_glsl_mnxn, 05061 /* WINED3DSIH_M4x4 */ shader_glsl_mnxn, 05062 /* WINED3DSIH_MAD */ shader_glsl_mad, 05063 /* WINED3DSIH_MAX */ shader_glsl_map2gl, 05064 /* WINED3DSIH_MIN */ shader_glsl_map2gl, 05065 /* WINED3DSIH_MOV */ shader_glsl_mov, 05066 /* WINED3DSIH_MOVA */ shader_glsl_mov, 05067 /* WINED3DSIH_MOVC */ NULL, 05068 /* WINED3DSIH_MUL */ shader_glsl_arith, 05069 /* WINED3DSIH_NOP */ NULL, 05070 /* WINED3DSIH_NRM */ shader_glsl_nrm, 05071 /* WINED3DSIH_PHASE */ NULL, 05072 /* WINED3DSIH_POW */ shader_glsl_pow, 05073 /* WINED3DSIH_RCP */ shader_glsl_rcp, 05074 /* WINED3DSIH_REP */ shader_glsl_rep, 05075 /* WINED3DSIH_RET */ shader_glsl_ret, 05076 /* WINED3DSIH_ROUND_NI */ NULL, 05077 /* WINED3DSIH_RSQ */ shader_glsl_rsq, 05078 /* WINED3DSIH_SAMPLE */ NULL, 05079 /* WINED3DSIH_SAMPLE_GRAD */ NULL, 05080 /* WINED3DSIH_SAMPLE_LOD */ NULL, 05081 /* WINED3DSIH_SETP */ NULL, 05082 /* WINED3DSIH_SGE */ shader_glsl_compare, 05083 /* WINED3DSIH_SGN */ shader_glsl_sgn, 05084 /* WINED3DSIH_SINCOS */ shader_glsl_sincos, 05085 /* WINED3DSIH_SLT */ shader_glsl_compare, 05086 /* WINED3DSIH_SQRT */ NULL, 05087 /* WINED3DSIH_SUB */ shader_glsl_arith, 05088 /* WINED3DSIH_TEX */ shader_glsl_tex, 05089 /* WINED3DSIH_TEXBEM */ shader_glsl_texbem, 05090 /* WINED3DSIH_TEXBEML */ shader_glsl_texbem, 05091 /* WINED3DSIH_TEXCOORD */ shader_glsl_texcoord, 05092 /* WINED3DSIH_TEXDEPTH */ shader_glsl_texdepth, 05093 /* WINED3DSIH_TEXDP3 */ shader_glsl_texdp3, 05094 /* WINED3DSIH_TEXDP3TEX */ shader_glsl_texdp3tex, 05095 /* WINED3DSIH_TEXKILL */ shader_glsl_texkill, 05096 /* WINED3DSIH_TEXLDD */ shader_glsl_texldd, 05097 /* WINED3DSIH_TEXLDL */ shader_glsl_texldl, 05098 /* WINED3DSIH_TEXM3x2DEPTH */ shader_glsl_texm3x2depth, 05099 /* WINED3DSIH_TEXM3x2PAD */ shader_glsl_texm3x2pad, 05100 /* WINED3DSIH_TEXM3x2TEX */ shader_glsl_texm3x2tex, 05101 /* WINED3DSIH_TEXM3x3 */ shader_glsl_texm3x3, 05102 /* WINED3DSIH_TEXM3x3DIFF */ NULL, 05103 /* WINED3DSIH_TEXM3x3PAD */ shader_glsl_texm3x3pad, 05104 /* WINED3DSIH_TEXM3x3SPEC */ shader_glsl_texm3x3spec, 05105 /* WINED3DSIH_TEXM3x3TEX */ shader_glsl_texm3x3tex, 05106 /* WINED3DSIH_TEXM3x3VSPEC */ shader_glsl_texm3x3vspec, 05107 /* WINED3DSIH_TEXREG2AR */ shader_glsl_texreg2ar, 05108 /* WINED3DSIH_TEXREG2GB */ shader_glsl_texreg2gb, 05109 /* WINED3DSIH_TEXREG2RGB */ shader_glsl_texreg2rgb, 05110 /* WINED3DSIH_UDIV */ NULL, 05111 /* WINED3DSIH_USHR */ NULL, 05112 /* WINED3DSIH_UTOF */ NULL, 05113 /* WINED3DSIH_XOR */ NULL, 05114 }; 05115 05116 static void shader_glsl_handle_instruction(const struct wined3d_shader_instruction *ins) { 05117 SHADER_HANDLER hw_fct; 05118 05119 /* Select handler */ 05120 hw_fct = shader_glsl_instruction_handler_table[ins->handler_idx]; 05121 05122 /* Unhandled opcode */ 05123 if (!hw_fct) 05124 { 05125 FIXME("Backend can't handle opcode %#x\n", ins->handler_idx); 05126 return; 05127 } 05128 hw_fct(ins); 05129 05130 shader_glsl_add_instruction_modifiers(ins); 05131 } 05132 05133 const struct wined3d_shader_backend_ops glsl_shader_backend = 05134 { 05135 shader_glsl_handle_instruction, 05136 shader_glsl_select, 05137 shader_glsl_select_depth_blt, 05138 shader_glsl_deselect_depth_blt, 05139 shader_glsl_update_float_vertex_constants, 05140 shader_glsl_update_float_pixel_constants, 05141 shader_glsl_load_constants, 05142 shader_glsl_load_np2fixup_constants, 05143 shader_glsl_destroy, 05144 shader_glsl_alloc, 05145 shader_glsl_free, 05146 shader_glsl_context_destroyed, 05147 shader_glsl_get_caps, 05148 shader_glsl_color_fixup_supported, 05149 }; Generated on Mon May 28 2012 04:21:52 for ReactOS by
1.7.6.1
|