Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenarb_program_shader.c
Go to the documentation of this file.
00001 /* 00002 * Pixel and vertex shaders implementation using ARB_vertex_program 00003 * and ARB_fragment_program GL extensions. 00004 * 00005 * Copyright 2002-2003 Jason Edmeades 00006 * Copyright 2002-2003 Raphael Junqueira 00007 * Copyright 2004 Christian Costa 00008 * Copyright 2005 Oliver Stieber 00009 * Copyright 2006 Ivan Gyurdiev 00010 * Copyright 2006 Jason Green 00011 * Copyright 2006 Henri Verbeet 00012 * Copyright 2007-2008 Stefan Dösinger for CodeWeavers 00013 * Copyright 2009 Henri Verbeet for CodeWeavers 00014 * 00015 * This library is free software; you can redistribute it and/or 00016 * modify it under the terms of the GNU Lesser General Public 00017 * License as published by the Free Software Foundation; either 00018 * version 2.1 of the License, or (at your option) any later version. 00019 * 00020 * This library is distributed in the hope that it will be useful, 00021 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00022 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00023 * Lesser General Public License for more details. 00024 * 00025 * You should have received a copy of the GNU Lesser General Public 00026 * License along with this library; if not, write to the Free Software 00027 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 00028 */ 00029 00030 #include "config.h" 00031 00032 #include <math.h> 00033 #include <stdio.h> 00034 00035 #include "wined3d_private.h" 00036 00037 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader); 00038 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants); 00039 WINE_DECLARE_DEBUG_CHANNEL(d3d_caps); 00040 WINE_DECLARE_DEBUG_CHANNEL(d3d); 00041 00042 /* Extract a line. Note that this modifies the source string. */ 00043 static char *get_line(char **ptr) 00044 { 00045 char *p, *q; 00046 00047 p = *ptr; 00048 if (!(q = strstr(p, "\n"))) 00049 { 00050 if (!*p) return NULL; 00051 *ptr += strlen(p); 00052 return p; 00053 } 00054 *q = '\0'; 00055 *ptr = q + 1; 00056 00057 return p; 00058 } 00059 00060 static void shader_arb_dump_program_source(const char *source) 00061 { 00062 ULONG source_size; 00063 char *ptr, *line, *tmp; 00064 00065 source_size = strlen(source) + 1; 00066 tmp = HeapAlloc(GetProcessHeap(), 0, source_size); 00067 if (!tmp) 00068 { 00069 ERR("Failed to allocate %u bytes for shader source.\n", source_size); 00070 return; 00071 } 00072 memcpy(tmp, source, source_size); 00073 00074 ptr = tmp; 00075 while ((line = get_line(&ptr))) FIXME(" %s\n", line); 00076 FIXME("\n"); 00077 00078 HeapFree(GetProcessHeap(), 0, tmp); 00079 } 00080 00081 enum arb_helper_value 00082 { 00083 ARB_ZERO, 00084 ARB_ONE, 00085 ARB_TWO, 00086 ARB_0001, 00087 ARB_EPS, 00088 00089 ARB_VS_REL_OFFSET 00090 }; 00091 00092 static const char *arb_get_helper_value(enum wined3d_shader_type shader, enum arb_helper_value value) 00093 { 00094 if (shader == WINED3D_SHADER_TYPE_GEOMETRY) 00095 { 00096 ERR("Geometry shaders are unsupported\n"); 00097 return "bad"; 00098 } 00099 00100 if (shader == WINED3D_SHADER_TYPE_PIXEL) 00101 { 00102 switch (value) 00103 { 00104 case ARB_ZERO: return "ps_helper_const.x"; 00105 case ARB_ONE: return "ps_helper_const.y"; 00106 case ARB_TWO: return "coefmul.x"; 00107 case ARB_0001: return "ps_helper_const.xxxy"; 00108 case ARB_EPS: return "ps_helper_const.z"; 00109 default: break; 00110 } 00111 } 00112 else 00113 { 00114 switch (value) 00115 { 00116 case ARB_ZERO: return "helper_const.x"; 00117 case ARB_ONE: return "helper_const.y"; 00118 case ARB_TWO: return "helper_const.z"; 00119 case ARB_EPS: return "helper_const.w"; 00120 case ARB_0001: return "helper_const.xxxy"; 00121 case ARB_VS_REL_OFFSET: return "rel_addr_const.y"; 00122 } 00123 } 00124 FIXME("Unmanaged %s shader helper constant requested: %u\n", 00125 shader == WINED3D_SHADER_TYPE_PIXEL ? "pixel" : "vertex", value); 00126 switch (value) 00127 { 00128 case ARB_ZERO: return "0.0"; 00129 case ARB_ONE: return "1.0"; 00130 case ARB_TWO: return "2.0"; 00131 case ARB_0001: return "{0.0, 0.0, 0.0, 1.0}"; 00132 case ARB_EPS: return "1e-8"; 00133 default: return "bad"; 00134 } 00135 } 00136 00137 static inline BOOL ffp_clip_emul(const struct wined3d_state *state) 00138 { 00139 return state->lowest_disabled_stage < 7; 00140 } 00141 00142 /* ARB_program_shader private data */ 00143 00144 struct control_frame 00145 { 00146 struct list entry; 00147 enum 00148 { 00149 IF, 00150 IFC, 00151 LOOP, 00152 REP 00153 } type; 00154 BOOL muting; 00155 BOOL outer_loop; 00156 union 00157 { 00158 unsigned int loop; 00159 unsigned int ifc; 00160 } no; 00161 struct wined3d_shader_loop_control loop_control; 00162 BOOL had_else; 00163 }; 00164 00165 struct arb_ps_np2fixup_info 00166 { 00167 struct ps_np2fixup_info super; 00168 /* For ARB we need a offset value: 00169 * With both GLSL and ARB mode the NP2 fixup information (the texture dimensions) are stored in a 00170 * consecutive way (GLSL uses a uniform array). Since ARB doesn't know the notion of a "standalone" 00171 * array we need an offset to the index inside the program local parameter array. */ 00172 UINT offset; 00173 }; 00174 00175 struct arb_ps_compile_args 00176 { 00177 struct ps_compile_args super; 00178 WORD bools; 00179 WORD clip; /* only a boolean, use a WORD for alignment */ 00180 unsigned char loop_ctrl[MAX_CONST_I][3]; 00181 }; 00182 00183 struct stb_const_desc 00184 { 00185 unsigned char texunit; 00186 UINT const_num; 00187 }; 00188 00189 struct arb_ps_compiled_shader 00190 { 00191 struct arb_ps_compile_args args; 00192 struct arb_ps_np2fixup_info np2fixup_info; 00193 struct stb_const_desc bumpenvmatconst[MAX_TEXTURES]; 00194 struct stb_const_desc luminanceconst[MAX_TEXTURES]; 00195 UINT int_consts[MAX_CONST_I]; 00196 GLuint prgId; 00197 UINT ycorrection; 00198 unsigned char numbumpenvmatconsts; 00199 char num_int_consts; 00200 }; 00201 00202 struct arb_vs_compile_args 00203 { 00204 struct vs_compile_args super; 00205 union 00206 { 00207 struct 00208 { 00209 WORD bools; 00210 unsigned char clip_texcoord; 00211 unsigned char clipplane_mask; 00212 } boolclip; 00213 DWORD boolclip_compare; 00214 } clip; 00215 DWORD ps_signature; 00216 union 00217 { 00218 unsigned char samplers[4]; 00219 DWORD samplers_compare; 00220 } vertex; 00221 unsigned char loop_ctrl[MAX_CONST_I][3]; 00222 }; 00223 00224 struct arb_vs_compiled_shader 00225 { 00226 struct arb_vs_compile_args args; 00227 GLuint prgId; 00228 UINT int_consts[MAX_CONST_I]; 00229 char num_int_consts; 00230 char need_color_unclamp; 00231 UINT pos_fixup; 00232 }; 00233 00234 struct recorded_instruction 00235 { 00236 struct wined3d_shader_instruction ins; 00237 struct list entry; 00238 }; 00239 00240 struct shader_arb_ctx_priv 00241 { 00242 char addr_reg[20]; 00243 enum 00244 { 00245 /* plain GL_ARB_vertex_program or GL_ARB_fragment_program */ 00246 ARB, 00247 /* GL_NV_vertex_progam2_option or GL_NV_fragment_program_option */ 00248 NV2, 00249 /* GL_NV_vertex_program3 or GL_NV_fragment_program2 */ 00250 NV3 00251 } target_version; 00252 00253 const struct arb_vs_compile_args *cur_vs_args; 00254 const struct arb_ps_compile_args *cur_ps_args; 00255 const struct arb_ps_compiled_shader *compiled_fprog; 00256 const struct arb_vs_compiled_shader *compiled_vprog; 00257 struct arb_ps_np2fixup_info *cur_np2fixup_info; 00258 struct list control_frames; 00259 struct list record; 00260 BOOL recording; 00261 BOOL muted; 00262 unsigned int num_loops, loop_depth, num_ifcs; 00263 int aL; 00264 00265 unsigned int vs_clipplanes; 00266 BOOL footer_written; 00267 BOOL in_main_func; 00268 00269 /* For 3.0 vertex shaders */ 00270 const char *vs_output[MAX_REG_OUTPUT]; 00271 /* For 2.x and earlier vertex shaders */ 00272 const char *texcrd_output[8], *color_output[2], *fog_output; 00273 00274 /* 3.0 pshader input for compatibility with fixed function */ 00275 const char *ps_input[MAX_REG_INPUT]; 00276 }; 00277 00278 struct ps_signature 00279 { 00280 struct wined3d_shader_signature_element *sig; 00281 DWORD idx; 00282 struct wine_rb_entry entry; 00283 }; 00284 00285 struct arb_pshader_private { 00286 struct arb_ps_compiled_shader *gl_shaders; 00287 UINT num_gl_shaders, shader_array_size; 00288 DWORD input_signature_idx; 00289 DWORD clipplane_emulation; 00290 BOOL clamp_consts; 00291 }; 00292 00293 struct arb_vshader_private { 00294 struct arb_vs_compiled_shader *gl_shaders; 00295 UINT num_gl_shaders, shader_array_size; 00296 UINT rel_offset; 00297 }; 00298 00299 struct shader_arb_priv 00300 { 00301 GLuint current_vprogram_id; 00302 GLuint current_fprogram_id; 00303 const struct arb_ps_compiled_shader *compiled_fprog; 00304 const struct arb_vs_compiled_shader *compiled_vprog; 00305 GLuint depth_blt_vprogram_id; 00306 GLuint depth_blt_fprogram_id_full[tex_type_count]; 00307 GLuint depth_blt_fprogram_id_masked[tex_type_count]; 00308 BOOL use_arbfp_fixed_func; 00309 struct wine_rb_tree fragment_shaders; 00310 BOOL last_ps_const_clamped; 00311 BOOL last_vs_color_unclamp; 00312 00313 struct wine_rb_tree signature_tree; 00314 DWORD ps_sig_number; 00315 00316 unsigned int highest_dirty_ps_const, highest_dirty_vs_const; 00317 char *vshader_const_dirty, *pshader_const_dirty; 00318 const struct wined3d_context *last_context; 00319 }; 00320 00321 /* GL locking for state handlers is done by the caller. */ 00322 static BOOL need_rel_addr_const(const struct arb_vshader_private *shader_data, 00323 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info) 00324 { 00325 if (shader_data->rel_offset) return TRUE; 00326 if (!reg_maps->usesmova) return FALSE; 00327 return !gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]; 00328 } 00329 00330 /* Returns TRUE if result.clip from GL_NV_vertex_program2 should be used and FALSE otherwise */ 00331 static inline BOOL use_nv_clip(const struct wined3d_gl_info *gl_info) 00332 { 00333 return gl_info->supported[NV_VERTEX_PROGRAM2_OPTION] 00334 && !(gl_info->quirks & WINED3D_QUIRK_NV_CLIP_BROKEN); 00335 } 00336 00337 static BOOL need_helper_const(const struct arb_vshader_private *shader_data, 00338 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info) 00339 { 00340 if (need_rel_addr_const(shader_data, reg_maps, gl_info)) return TRUE; 00341 if (!gl_info->supported[NV_VERTEX_PROGRAM]) return TRUE; /* Need to init colors. */ 00342 if (gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT) return TRUE; /* Load the immval offset. */ 00343 if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) return TRUE; /* Have to init texcoords. */ 00344 if (!use_nv_clip(gl_info)) return TRUE; /* Init the clip texcoord */ 00345 if (reg_maps->usesnrm) return TRUE; /* 0.0 */ 00346 if (reg_maps->usespow) return TRUE; /* EPS, 0.0 and 1.0 */ 00347 return FALSE; 00348 } 00349 00350 static unsigned int reserved_vs_const(const struct arb_vshader_private *shader_data, 00351 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info) 00352 { 00353 unsigned int ret = 1; 00354 /* We use one PARAM for the pos fixup, and in some cases one to load 00355 * some immediate values into the shader. */ 00356 if (need_helper_const(shader_data, reg_maps, gl_info)) ++ret; 00357 if (need_rel_addr_const(shader_data, reg_maps, gl_info)) ++ret; 00358 return ret; 00359 } 00360 00361 /* Loads floating point constants into the currently set ARB_vertex/fragment_program. 00362 * When constant_list == NULL, it will load all the constants. 00363 * 00364 * @target_type should be either GL_VERTEX_PROGRAM_ARB (for vertex shaders) 00365 * or GL_FRAGMENT_PROGRAM_ARB (for pixel shaders) 00366 */ 00367 /* GL locking is done by the caller */ 00368 static unsigned int shader_arb_load_constantsF(const struct wined3d_shader *shader, 00369 const struct wined3d_gl_info *gl_info, GLuint target_type, unsigned int max_constants, 00370 const float *constants, char *dirty_consts) 00371 { 00372 struct wined3d_shader_lconst *lconst; 00373 DWORD i, j; 00374 unsigned int ret; 00375 00376 if (TRACE_ON(d3d_constants)) 00377 { 00378 for(i = 0; i < max_constants; i++) { 00379 if(!dirty_consts[i]) continue; 00380 TRACE_(d3d_constants)("Loading constants %i: %f, %f, %f, %f\n", i, 00381 constants[i * 4 + 0], constants[i * 4 + 1], 00382 constants[i * 4 + 2], constants[i * 4 + 3]); 00383 } 00384 } 00385 00386 i = 0; 00387 00388 /* In 1.X pixel shaders constants are implicitly clamped in the range [-1;1] */ 00389 if (target_type == GL_FRAGMENT_PROGRAM_ARB && shader->reg_maps.shader_version.major == 1) 00390 { 00391 float lcl_const[4]; 00392 /* ps 1.x supports only 8 constants, clamp only those. When switching between 1.x and higher 00393 * shaders, the first 8 constants are marked dirty for reload 00394 */ 00395 for(; i < min(8, max_constants); i++) { 00396 if(!dirty_consts[i]) continue; 00397 dirty_consts[i] = 0; 00398 00399 j = 4 * i; 00400 if (constants[j + 0] > 1.0f) lcl_const[0] = 1.0f; 00401 else if (constants[j + 0] < -1.0f) lcl_const[0] = -1.0f; 00402 else lcl_const[0] = constants[j + 0]; 00403 00404 if (constants[j + 1] > 1.0f) lcl_const[1] = 1.0f; 00405 else if (constants[j + 1] < -1.0f) lcl_const[1] = -1.0f; 00406 else lcl_const[1] = constants[j + 1]; 00407 00408 if (constants[j + 2] > 1.0f) lcl_const[2] = 1.0f; 00409 else if (constants[j + 2] < -1.0f) lcl_const[2] = -1.0f; 00410 else lcl_const[2] = constants[j + 2]; 00411 00412 if (constants[j + 3] > 1.0f) lcl_const[3] = 1.0f; 00413 else if (constants[j + 3] < -1.0f) lcl_const[3] = -1.0f; 00414 else lcl_const[3] = constants[j + 3]; 00415 00416 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, lcl_const)); 00417 } 00418 00419 /* If further constants are dirty, reload them without clamping. 00420 * 00421 * The alternative is not to touch them, but then we cannot reset the dirty constant count 00422 * to zero. That's bad for apps that only use PS 1.x shaders, because in that case the code 00423 * above would always re-check the first 8 constants since max_constant remains at the init 00424 * value 00425 */ 00426 } 00427 00428 if (gl_info->supported[EXT_GPU_PROGRAM_PARAMETERS]) 00429 { 00430 /* TODO: Benchmark if we're better of with finding the dirty constants ourselves, 00431 * or just reloading *all* constants at once 00432 * 00433 GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, i, max_constants, constants + (i * 4))); 00434 */ 00435 for(; i < max_constants; i++) { 00436 if(!dirty_consts[i]) continue; 00437 00438 /* Find the next block of dirty constants */ 00439 dirty_consts[i] = 0; 00440 j = i; 00441 for(i++; (i < max_constants) && dirty_consts[i]; i++) { 00442 dirty_consts[i] = 0; 00443 } 00444 00445 GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, j, i - j, constants + (j * 4))); 00446 } 00447 } else { 00448 for(; i < max_constants; i++) { 00449 if(dirty_consts[i]) { 00450 dirty_consts[i] = 0; 00451 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, constants + (i * 4))); 00452 } 00453 } 00454 } 00455 checkGLcall("glProgramEnvParameter4fvARB()"); 00456 00457 /* Load immediate constants */ 00458 if (shader->load_local_constsF) 00459 { 00460 if (TRACE_ON(d3d_shader)) 00461 { 00462 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 00463 { 00464 GLfloat* values = (GLfloat*)lconst->value; 00465 TRACE_(d3d_constants)("Loading local constants %i: %f, %f, %f, %f\n", lconst->idx, 00466 values[0], values[1], values[2], values[3]); 00467 } 00468 } 00469 /* Immediate constants are clamped for 1.X shaders at loading times */ 00470 ret = 0; 00471 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 00472 { 00473 dirty_consts[lconst->idx] = 1; /* Dirtify so the non-immediate constant overwrites it next time */ 00474 ret = max(ret, lconst->idx + 1); 00475 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, lconst->idx, (GLfloat*)lconst->value)); 00476 } 00477 checkGLcall("glProgramEnvParameter4fvARB()"); 00478 return ret; /* The loaded immediate constants need reloading for the next shader */ 00479 } else { 00480 return 0; /* No constants are dirty now */ 00481 } 00482 } 00483 00487 static void shader_arb_load_np2fixup_constants(void *shader_priv, 00488 const struct wined3d_gl_info *gl_info, const struct wined3d_state *state) 00489 { 00490 const struct shader_arb_priv * priv = shader_priv; 00491 00492 /* NP2 texcoord fixup is (currently) only done for pixelshaders. */ 00493 if (!use_ps(state)) return; 00494 00495 if (priv->compiled_fprog && priv->compiled_fprog->np2fixup_info.super.active) { 00496 const struct arb_ps_np2fixup_info* const fixup = &priv->compiled_fprog->np2fixup_info; 00497 UINT i; 00498 WORD active = fixup->super.active; 00499 GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS]; 00500 00501 for (i = 0; active; active >>= 1, ++i) 00502 { 00503 const struct wined3d_texture *tex = state->textures[i]; 00504 const unsigned char idx = fixup->super.idx[i]; 00505 GLfloat *tex_dim = &np2fixup_constants[(idx >> 1) * 4]; 00506 00507 if (!(active & 1)) continue; 00508 00509 if (!tex) { 00510 FIXME("Nonexistent texture is flagged for NP2 texcoord fixup\n"); 00511 continue; 00512 } 00513 00514 if (idx % 2) 00515 { 00516 tex_dim[2] = tex->pow2_matrix[0]; 00517 tex_dim[3] = tex->pow2_matrix[5]; 00518 } 00519 else 00520 { 00521 tex_dim[0] = tex->pow2_matrix[0]; 00522 tex_dim[1] = tex->pow2_matrix[5]; 00523 } 00524 } 00525 00526 for (i = 0; i < fixup->super.num_consts; ++i) { 00527 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 00528 fixup->offset + i, &np2fixup_constants[i * 4])); 00529 } 00530 } 00531 } 00532 00533 /* GL locking is done by the caller. */ 00534 static void shader_arb_ps_local_constants(const struct arb_ps_compiled_shader *gl_shader, 00535 const struct wined3d_context *context, const struct wined3d_state *state, UINT rt_height) 00536 { 00537 const struct wined3d_gl_info *gl_info = context->gl_info; 00538 unsigned char i; 00539 00540 for(i = 0; i < gl_shader->numbumpenvmatconsts; i++) 00541 { 00542 int texunit = gl_shader->bumpenvmatconst[i].texunit; 00543 00544 /* The state manager takes care that this function is always called if the bump env matrix changes */ 00545 const float *data = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_MAT00]; 00546 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 00547 gl_shader->bumpenvmatconst[i].const_num, data)); 00548 00549 if (gl_shader->luminanceconst[i].const_num != WINED3D_CONST_NUM_UNUSED) 00550 { 00551 /* WINED3D_TSS_BUMPENVLSCALE and WINED3D_TSS_BUMPENVLOFFSET are next to each other. 00552 * point gl to the scale, and load 4 floats. x = scale, y = offset, z and w are junk, we 00553 * don't care about them. The pointers are valid for sure because the stateblock is bigger. 00554 * (they're WINED3D_TSS_TEXTURETRANSFORMFLAGS and WINED3D_TSS_ADDRESSW, so most likely 0 or NaN 00555 */ 00556 const float *scale = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_LSCALE]; 00557 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 00558 gl_shader->luminanceconst[i].const_num, scale)); 00559 } 00560 } 00561 checkGLcall("Load bumpmap consts"); 00562 00563 if(gl_shader->ycorrection != WINED3D_CONST_NUM_UNUSED) 00564 { 00565 /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen). 00566 * ycorrection.y: -1.0(onscreen), 1.0(offscreen) 00567 * ycorrection.z: 1.0 00568 * ycorrection.w: 0.0 00569 */ 00570 float val[4]; 00571 val[0] = context->render_offscreen ? 0.0f : (float) rt_height; 00572 val[1] = context->render_offscreen ? 1.0f : -1.0f; 00573 val[2] = 1.0f; 00574 val[3] = 0.0f; 00575 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->ycorrection, val)); 00576 checkGLcall("y correction loading"); 00577 } 00578 00579 if (!gl_shader->num_int_consts) return; 00580 00581 for(i = 0; i < MAX_CONST_I; i++) 00582 { 00583 if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED) 00584 { 00585 float val[4]; 00586 val[0] = (float)state->ps_consts_i[4 * i]; 00587 val[1] = (float)state->ps_consts_i[4 * i + 1]; 00588 val[2] = (float)state->ps_consts_i[4 * i + 2]; 00589 val[3] = -1.0f; 00590 00591 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->int_consts[i], val)); 00592 } 00593 } 00594 checkGLcall("Load ps int consts"); 00595 } 00596 00597 /* GL locking is done by the caller. */ 00598 static void shader_arb_vs_local_constants(const struct arb_vs_compiled_shader *gl_shader, 00599 const struct wined3d_context *context, const struct wined3d_state *state) 00600 { 00601 const struct wined3d_gl_info *gl_info = context->gl_info; 00602 float position_fixup[4]; 00603 unsigned char i; 00604 00605 /* Upload the position fixup */ 00606 shader_get_position_fixup(context, state, position_fixup); 00607 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->pos_fixup, position_fixup)); 00608 00609 if (!gl_shader->num_int_consts) return; 00610 00611 for(i = 0; i < MAX_CONST_I; i++) 00612 { 00613 if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED) 00614 { 00615 float val[4]; 00616 val[0] = (float)state->vs_consts_i[4 * i]; 00617 val[1] = (float)state->vs_consts_i[4 * i + 1]; 00618 val[2] = (float)state->vs_consts_i[4 * i + 2]; 00619 val[3] = -1.0f; 00620 00621 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->int_consts[i], val)); 00622 } 00623 } 00624 checkGLcall("Load vs int consts"); 00625 } 00626 00633 /* GL locking is done by the caller (state handler) */ 00634 static void shader_arb_load_constants(const struct wined3d_context *context, char usePixelShader, char useVertexShader) 00635 { 00636 struct wined3d_device *device = context->swapchain->device; 00637 const struct wined3d_state *state = &device->stateBlock->state; 00638 const struct wined3d_gl_info *gl_info = context->gl_info; 00639 struct shader_arb_priv *priv = device->shader_priv; 00640 00641 if (context != priv->last_context) 00642 { 00643 memset(priv->vshader_const_dirty, 1, 00644 sizeof(*priv->vshader_const_dirty) * device->d3d_vshader_constantF); 00645 priv->highest_dirty_vs_const = device->d3d_vshader_constantF; 00646 00647 memset(priv->pshader_const_dirty, 1, 00648 sizeof(*priv->pshader_const_dirty) * device->d3d_pshader_constantF); 00649 priv->highest_dirty_ps_const = device->d3d_pshader_constantF; 00650 00651 priv->last_context = context; 00652 } 00653 00654 if (useVertexShader) 00655 { 00656 struct wined3d_shader *vshader = state->vertex_shader; 00657 const struct arb_vs_compiled_shader *gl_shader = priv->compiled_vprog; 00658 00659 /* Load DirectX 9 float constants for vertex shader */ 00660 priv->highest_dirty_vs_const = shader_arb_load_constantsF(vshader, gl_info, GL_VERTEX_PROGRAM_ARB, 00661 priv->highest_dirty_vs_const, state->vs_consts_f, priv->vshader_const_dirty); 00662 shader_arb_vs_local_constants(gl_shader, context, state); 00663 } 00664 00665 if (usePixelShader) 00666 { 00667 struct wined3d_shader *pshader = state->pixel_shader; 00668 const struct arb_ps_compiled_shader *gl_shader = priv->compiled_fprog; 00669 UINT rt_height = state->fb->render_targets[0]->resource.height; 00670 00671 /* Load DirectX 9 float constants for pixel shader */ 00672 priv->highest_dirty_ps_const = shader_arb_load_constantsF(pshader, gl_info, GL_FRAGMENT_PROGRAM_ARB, 00673 priv->highest_dirty_ps_const, state->ps_consts_f, priv->pshader_const_dirty); 00674 shader_arb_ps_local_constants(gl_shader, context, state, rt_height); 00675 } 00676 } 00677 00678 static void shader_arb_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count) 00679 { 00680 struct wined3d_context *context = context_get_current(); 00681 struct shader_arb_priv *priv = device->shader_priv; 00682 00683 /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active 00684 * context. On a context switch the old context will be fully dirtified */ 00685 if (!context || context->swapchain->device != device) return; 00686 00687 memset(priv->vshader_const_dirty + start, 1, sizeof(*priv->vshader_const_dirty) * count); 00688 priv->highest_dirty_vs_const = max(priv->highest_dirty_vs_const, start + count); 00689 } 00690 00691 static void shader_arb_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count) 00692 { 00693 struct wined3d_context *context = context_get_current(); 00694 struct shader_arb_priv *priv = device->shader_priv; 00695 00696 /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active 00697 * context. On a context switch the old context will be fully dirtified */ 00698 if (!context || context->swapchain->device != device) return; 00699 00700 memset(priv->pshader_const_dirty + start, 1, sizeof(*priv->pshader_const_dirty) * count); 00701 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, start + count); 00702 } 00703 00704 static DWORD *local_const_mapping(const struct wined3d_shader *shader) 00705 { 00706 const struct wined3d_shader_lconst *lconst; 00707 DWORD *ret; 00708 DWORD idx = 0; 00709 00710 if (shader->load_local_constsF || list_empty(&shader->constantsF)) 00711 return NULL; 00712 00713 ret = HeapAlloc(GetProcessHeap(), 0, sizeof(DWORD) * shader->limits.constant_float); 00714 if (!ret) 00715 { 00716 ERR("Out of memory\n"); 00717 return NULL; 00718 } 00719 00720 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 00721 { 00722 ret[lconst->idx] = idx++; 00723 } 00724 return ret; 00725 } 00726 00727 /* Generate the variable & register declarations for the ARB_vertex_program output target */ 00728 static DWORD shader_generate_arb_declarations(const struct wined3d_shader *shader, 00729 const struct wined3d_shader_reg_maps *reg_maps, struct wined3d_shader_buffer *buffer, 00730 const struct wined3d_gl_info *gl_info, const DWORD *lconst_map, 00731 DWORD *num_clipplanes, const struct shader_arb_ctx_priv *ctx) 00732 { 00733 DWORD i, next_local = 0; 00734 char pshader = shader_is_pshader_version(reg_maps->shader_version.type); 00735 const struct wined3d_shader_lconst *lconst; 00736 unsigned max_constantsF; 00737 DWORD map; 00738 00739 /* In pixel shaders, all private constants are program local, we don't need anything 00740 * from program.env. Thus we can advertise the full set of constants in pixel shaders. 00741 * If we need a private constant the GL implementation will squeeze it in somewhere 00742 * 00743 * With vertex shaders we need the posFixup and on some GL implementations 4 helper 00744 * immediate values. The posFixup is loaded using program.env for now, so always 00745 * subtract one from the number of constants. If the shader uses indirect addressing, 00746 * account for the helper const too because we have to declare all available d3d constants 00747 * and don't know which are actually used. 00748 */ 00749 if (pshader) 00750 { 00751 max_constantsF = gl_info->limits.arb_ps_native_constants; 00752 /* 24 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value. */ 00753 if (max_constantsF < 24) 00754 max_constantsF = gl_info->limits.arb_ps_float_constants; 00755 } 00756 else 00757 { 00758 const struct arb_vshader_private *shader_data = shader->backend_data; 00759 max_constantsF = gl_info->limits.arb_vs_native_constants; 00760 /* 96 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value. 00761 * Also prevents max_constantsF from becoming less than 0 and 00762 * wrapping . */ 00763 if (max_constantsF < 96) 00764 max_constantsF = gl_info->limits.arb_vs_float_constants; 00765 00766 if (reg_maps->usesrelconstF) 00767 { 00768 DWORD highest_constf = 0, clip_limit; 00769 00770 max_constantsF -= reserved_vs_const(shader_data, reg_maps, gl_info); 00771 max_constantsF -= count_bits(reg_maps->integer_constants); 00772 00773 for (i = 0; i < shader->limits.constant_float; ++i) 00774 { 00775 DWORD idx = i >> 5; 00776 DWORD shift = i & 0x1f; 00777 if(reg_maps->constf[idx] & (1 << shift)) highest_constf = i; 00778 } 00779 00780 if(use_nv_clip(gl_info) && ctx->target_version >= NV2) 00781 { 00782 if(ctx->cur_vs_args->super.clip_enabled) 00783 clip_limit = gl_info->limits.clipplanes; 00784 else 00785 clip_limit = 0; 00786 } 00787 else 00788 { 00789 unsigned int mask = ctx->cur_vs_args->clip.boolclip.clipplane_mask; 00790 clip_limit = min(count_bits(mask), 4); 00791 } 00792 *num_clipplanes = min(clip_limit, max_constantsF - highest_constf - 1); 00793 max_constantsF -= *num_clipplanes; 00794 if(*num_clipplanes < clip_limit) 00795 { 00796 WARN("Only %u clipplanes out of %u enabled\n", *num_clipplanes, gl_info->limits.clipplanes); 00797 } 00798 } 00799 else 00800 { 00801 if (ctx->target_version >= NV2) *num_clipplanes = gl_info->limits.clipplanes; 00802 else *num_clipplanes = min(gl_info->limits.clipplanes, 4); 00803 } 00804 } 00805 00806 for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i) 00807 { 00808 if (map & 1) shader_addline(buffer, "TEMP R%u;\n", i); 00809 } 00810 00811 for (i = 0, map = reg_maps->address; map; map >>= 1, ++i) 00812 { 00813 if (map & 1) shader_addline(buffer, "ADDRESS A%u;\n", i); 00814 } 00815 00816 if (pshader && reg_maps->shader_version.major == 1 && reg_maps->shader_version.minor <= 3) 00817 { 00818 for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i) 00819 { 00820 if (map & 1) shader_addline(buffer, "TEMP T%u;\n", i); 00821 } 00822 } 00823 00824 /* Load local constants using the program-local space, 00825 * this avoids reloading them each time the shader is used 00826 */ 00827 if (lconst_map) 00828 { 00829 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 00830 { 00831 shader_addline(buffer, "PARAM C%u = program.local[%u];\n", lconst->idx, 00832 lconst_map[lconst->idx]); 00833 next_local = max(next_local, lconst_map[lconst->idx] + 1); 00834 } 00835 } 00836 00837 /* After subtracting privately used constants from the hardware limit(they are loaded as 00838 * local constants), make sure the shader doesn't violate the env constant limit 00839 */ 00840 if(pshader) 00841 { 00842 max_constantsF = min(max_constantsF, gl_info->limits.arb_ps_float_constants); 00843 } 00844 else 00845 { 00846 max_constantsF = min(max_constantsF, gl_info->limits.arb_vs_float_constants); 00847 } 00848 00849 /* Avoid declaring more constants than needed */ 00850 max_constantsF = min(max_constantsF, shader->limits.constant_float); 00851 00852 /* we use the array-based constants array if the local constants are marked for loading, 00853 * because then we use indirect addressing, or when the local constant list is empty, 00854 * because then we don't know if we're using indirect addressing or not. If we're hardcoding 00855 * local constants do not declare the loaded constants as an array because ARB compilers usually 00856 * do not optimize unused constants away 00857 */ 00858 if (reg_maps->usesrelconstF) 00859 { 00860 /* Need to PARAM the environment parameters (constants) so we can use relative addressing */ 00861 shader_addline(buffer, "PARAM C[%d] = { program.env[0..%d] };\n", 00862 max_constantsF, max_constantsF - 1); 00863 } else { 00864 for(i = 0; i < max_constantsF; i++) { 00865 DWORD idx, mask; 00866 idx = i >> 5; 00867 mask = 1 << (i & 0x1f); 00868 if (!shader_constant_is_local(shader, i) && (reg_maps->constf[idx] & mask)) 00869 { 00870 shader_addline(buffer, "PARAM C%d = program.env[%d];\n",i, i); 00871 } 00872 } 00873 } 00874 00875 return next_local; 00876 } 00877 00878 static const char * const shift_tab[] = { 00879 "dummy", /* 0 (none) */ 00880 "coefmul.x", /* 1 (x2) */ 00881 "coefmul.y", /* 2 (x4) */ 00882 "coefmul.z", /* 3 (x8) */ 00883 "coefmul.w", /* 4 (x16) */ 00884 "dummy", /* 5 (x32) */ 00885 "dummy", /* 6 (x64) */ 00886 "dummy", /* 7 (x128) */ 00887 "dummy", /* 8 (d256) */ 00888 "dummy", /* 9 (d128) */ 00889 "dummy", /* 10 (d64) */ 00890 "dummy", /* 11 (d32) */ 00891 "coefdiv.w", /* 12 (d16) */ 00892 "coefdiv.z", /* 13 (d8) */ 00893 "coefdiv.y", /* 14 (d4) */ 00894 "coefdiv.x" /* 15 (d2) */ 00895 }; 00896 00897 static void shader_arb_get_write_mask(const struct wined3d_shader_instruction *ins, 00898 const struct wined3d_shader_dst_param *dst, char *write_mask) 00899 { 00900 char *ptr = write_mask; 00901 00902 if (dst->write_mask != WINED3DSP_WRITEMASK_ALL) 00903 { 00904 *ptr++ = '.'; 00905 if (dst->write_mask & WINED3DSP_WRITEMASK_0) *ptr++ = 'x'; 00906 if (dst->write_mask & WINED3DSP_WRITEMASK_1) *ptr++ = 'y'; 00907 if (dst->write_mask & WINED3DSP_WRITEMASK_2) *ptr++ = 'z'; 00908 if (dst->write_mask & WINED3DSP_WRITEMASK_3) *ptr++ = 'w'; 00909 } 00910 00911 *ptr = '\0'; 00912 } 00913 00914 static void shader_arb_get_swizzle(const struct wined3d_shader_src_param *param, BOOL fixup, char *swizzle_str) 00915 { 00916 /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra", 00917 * but addressed as "rgba". To fix this we need to swap the register's x 00918 * and z components. */ 00919 const char *swizzle_chars = fixup ? "zyxw" : "xyzw"; 00920 char *ptr = swizzle_str; 00921 00922 /* swizzle bits fields: wwzzyyxx */ 00923 DWORD swizzle = param->swizzle; 00924 DWORD swizzle_x = swizzle & 0x03; 00925 DWORD swizzle_y = (swizzle >> 2) & 0x03; 00926 DWORD swizzle_z = (swizzle >> 4) & 0x03; 00927 DWORD swizzle_w = (swizzle >> 6) & 0x03; 00928 00929 /* If the swizzle is the default swizzle (ie, "xyzw"), we don't need to 00930 * generate a swizzle string. Unless we need to our own swizzling. */ 00931 if (swizzle != WINED3DSP_NOSWIZZLE || fixup) 00932 { 00933 *ptr++ = '.'; 00934 if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) { 00935 *ptr++ = swizzle_chars[swizzle_x]; 00936 } else { 00937 *ptr++ = swizzle_chars[swizzle_x]; 00938 *ptr++ = swizzle_chars[swizzle_y]; 00939 *ptr++ = swizzle_chars[swizzle_z]; 00940 *ptr++ = swizzle_chars[swizzle_w]; 00941 } 00942 } 00943 00944 *ptr = '\0'; 00945 } 00946 00947 static void shader_arb_request_a0(const struct wined3d_shader_instruction *ins, const char *src) 00948 { 00949 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 00950 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 00951 00952 if (!strcmp(priv->addr_reg, src)) return; 00953 00954 strcpy(priv->addr_reg, src); 00955 shader_addline(buffer, "ARL A0.x, %s;\n", src); 00956 } 00957 00958 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins, 00959 const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr); 00960 00961 static void shader_arb_get_register_name(const struct wined3d_shader_instruction *ins, 00962 const struct wined3d_shader_register *reg, char *register_name, BOOL *is_color) 00963 { 00964 /* oPos, oFog and oPts in D3D */ 00965 static const char * const rastout_reg_names[] = {"TMP_OUT", "result.fogcoord", "result.pointsize"}; 00966 const struct wined3d_shader *shader = ins->ctx->shader; 00967 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 00968 BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type); 00969 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 00970 00971 *is_color = FALSE; 00972 00973 switch (reg->type) 00974 { 00975 case WINED3DSPR_TEMP: 00976 sprintf(register_name, "R%u", reg->idx); 00977 break; 00978 00979 case WINED3DSPR_INPUT: 00980 if (pshader) 00981 { 00982 if (reg_maps->shader_version.major < 3) 00983 { 00984 if (!reg->idx) strcpy(register_name, "fragment.color.primary"); 00985 else strcpy(register_name, "fragment.color.secondary"); 00986 } 00987 else 00988 { 00989 if(reg->rel_addr) 00990 { 00991 char rel_reg[50]; 00992 shader_arb_get_src_param(ins, reg->rel_addr, 0, rel_reg); 00993 00994 if (!strcmp(rel_reg, "**aL_emul**")) 00995 { 00996 DWORD idx = ctx->aL + reg->idx; 00997 if(idx < MAX_REG_INPUT) 00998 { 00999 strcpy(register_name, ctx->ps_input[idx]); 01000 } 01001 else 01002 { 01003 ERR("Pixel shader input register out of bounds: %u\n", idx); 01004 sprintf(register_name, "out_of_bounds_%u", idx); 01005 } 01006 } 01007 else if (reg_maps->input_registers & 0x0300) 01008 { 01009 /* There are two ways basically: 01010 * 01011 * 1) Use the unrolling code that is used for loop emulation and unroll the loop. 01012 * That means trouble if the loop also contains a breakc or if the control values 01013 * aren't local constants. 01014 * 2) Generate an if block that checks if aL.y < 8, == 8 or == 9 and selects the 01015 * source dynamically. The trouble is that we cannot simply read aL.y because it 01016 * is an ADDRESS register. We could however push it, load .zw with a value and use 01017 * ADAC to load the condition code register and pop it again afterwards 01018 */ 01019 FIXME("Relative input register addressing with more than 8 registers\n"); 01020 01021 /* This is better than nothing for now */ 01022 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx); 01023 } 01024 else if(ctx->cur_ps_args->super.vp_mode != vertexshader) 01025 { 01026 /* This is problematic because we'd have to consult the ctx->ps_input strings 01027 * for where to find the varying. Some may be "0.0", others can be texcoords or 01028 * colors. This needs either a pipeline replacement to make the vertex shader feed 01029 * proper varyings, or loop unrolling 01030 * 01031 * For now use the texcoords and hope for the best 01032 */ 01033 FIXME("Non-vertex shader varying input with indirect addressing\n"); 01034 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx); 01035 } 01036 else 01037 { 01038 /* D3D supports indirect addressing only with aL in loop registers. The loop instruction 01039 * pulls GL_NV_fragment_program2 in 01040 */ 01041 sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx); 01042 } 01043 } 01044 else 01045 { 01046 if(reg->idx < MAX_REG_INPUT) 01047 { 01048 strcpy(register_name, ctx->ps_input[reg->idx]); 01049 } 01050 else 01051 { 01052 ERR("Pixel shader input register out of bounds: %u\n", reg->idx); 01053 sprintf(register_name, "out_of_bounds_%u", reg->idx); 01054 } 01055 } 01056 } 01057 } 01058 else 01059 { 01060 if (ctx->cur_vs_args->super.swizzle_map & (1 << reg->idx)) *is_color = TRUE; 01061 sprintf(register_name, "vertex.attrib[%u]", reg->idx); 01062 } 01063 break; 01064 01065 case WINED3DSPR_CONST: 01066 if (!pshader && reg->rel_addr) 01067 { 01068 const struct arb_vshader_private *shader_data = shader->backend_data; 01069 UINT rel_offset = shader_data->rel_offset; 01070 BOOL aL = FALSE; 01071 char rel_reg[50]; 01072 if (reg_maps->shader_version.major < 2) 01073 { 01074 sprintf(rel_reg, "A0.x"); 01075 } else { 01076 shader_arb_get_src_param(ins, reg->rel_addr, 0, rel_reg); 01077 if(ctx->target_version == ARB) { 01078 if (!strcmp(rel_reg, "**aL_emul**")) 01079 { 01080 aL = TRUE; 01081 } else { 01082 shader_arb_request_a0(ins, rel_reg); 01083 sprintf(rel_reg, "A0.x"); 01084 } 01085 } 01086 } 01087 if(aL) 01088 sprintf(register_name, "C[%u]", ctx->aL + reg->idx); 01089 else if (reg->idx >= rel_offset) 01090 sprintf(register_name, "C[%s + %u]", rel_reg, reg->idx - rel_offset); 01091 else 01092 sprintf(register_name, "C[%s - %u]", rel_reg, rel_offset - reg->idx); 01093 } 01094 else 01095 { 01096 if (reg_maps->usesrelconstF) 01097 sprintf(register_name, "C[%u]", reg->idx); 01098 else 01099 sprintf(register_name, "C%u", reg->idx); 01100 } 01101 break; 01102 01103 case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */ 01104 if (pshader) 01105 { 01106 if (reg_maps->shader_version.major == 1 01107 && reg_maps->shader_version.minor <= 3) 01108 { 01109 /* In ps <= 1.3, Tx is a temporary register as destination to all instructions, 01110 * and as source to most instructions. For some instructions it is the texcoord 01111 * input. Those instructions know about the special use 01112 */ 01113 sprintf(register_name, "T%u", reg->idx); 01114 } else { 01115 /* in ps 1.4 and 2.x Tx is always a (read-only) varying */ 01116 sprintf(register_name, "fragment.texcoord[%u]", reg->idx); 01117 } 01118 } 01119 else 01120 { 01121 if (reg_maps->shader_version.major == 1 || ctx->target_version >= NV2) 01122 { 01123 sprintf(register_name, "A%u", reg->idx); 01124 } 01125 else 01126 { 01127 sprintf(register_name, "A%u_SHADOW", reg->idx); 01128 } 01129 } 01130 break; 01131 01132 case WINED3DSPR_COLOROUT: 01133 if (ctx->cur_ps_args->super.srgb_correction && !reg->idx) 01134 { 01135 strcpy(register_name, "TMP_COLOR"); 01136 } 01137 else 01138 { 01139 if(ctx->cur_ps_args->super.srgb_correction) FIXME("sRGB correction on higher render targets\n"); 01140 if (reg_maps->rt_mask > 1) 01141 { 01142 sprintf(register_name, "result.color[%u]", reg->idx); 01143 } 01144 else 01145 { 01146 strcpy(register_name, "result.color"); 01147 } 01148 } 01149 break; 01150 01151 case WINED3DSPR_RASTOUT: 01152 if(reg->idx == 1) sprintf(register_name, "%s", ctx->fog_output); 01153 else sprintf(register_name, "%s", rastout_reg_names[reg->idx]); 01154 break; 01155 01156 case WINED3DSPR_DEPTHOUT: 01157 strcpy(register_name, "result.depth"); 01158 break; 01159 01160 case WINED3DSPR_ATTROUT: 01161 /* case WINED3DSPR_OUTPUT: */ 01162 if (pshader) sprintf(register_name, "oD[%u]", reg->idx); 01163 else strcpy(register_name, ctx->color_output[reg->idx]); 01164 break; 01165 01166 case WINED3DSPR_TEXCRDOUT: 01167 if (pshader) 01168 { 01169 sprintf(register_name, "oT[%u]", reg->idx); 01170 } 01171 else 01172 { 01173 if (reg_maps->shader_version.major < 3) 01174 { 01175 strcpy(register_name, ctx->texcrd_output[reg->idx]); 01176 } 01177 else 01178 { 01179 strcpy(register_name, ctx->vs_output[reg->idx]); 01180 } 01181 } 01182 break; 01183 01184 case WINED3DSPR_LOOP: 01185 if(ctx->target_version >= NV2) 01186 { 01187 /* Pshader has an implicitly declared loop index counter A0.x that cannot be renamed */ 01188 if(pshader) sprintf(register_name, "A0.x"); 01189 else sprintf(register_name, "aL.y"); 01190 } 01191 else 01192 { 01193 /* Unfortunately this code cannot return the value of ctx->aL here. An immediate value 01194 * would be valid, but if aL is used for indexing(its only use), there's likely an offset, 01195 * thus the result would be something like C[15 + 30], which is not valid in the ARB program 01196 * grammar. So return a marker for the emulated aL and intercept it in constant and varying 01197 * indexing 01198 */ 01199 sprintf(register_name, "**aL_emul**"); 01200 } 01201 01202 break; 01203 01204 case WINED3DSPR_CONSTINT: 01205 sprintf(register_name, "I%u", reg->idx); 01206 break; 01207 01208 case WINED3DSPR_MISCTYPE: 01209 if (!reg->idx) 01210 { 01211 sprintf(register_name, "vpos"); 01212 } 01213 else if(reg->idx == 1) 01214 { 01215 sprintf(register_name, "fragment.facing.x"); 01216 } 01217 else 01218 { 01219 FIXME("Unknown MISCTYPE register index %u\n", reg->idx); 01220 } 01221 break; 01222 01223 default: 01224 FIXME("Unhandled register type %#x[%u]\n", reg->type, reg->idx); 01225 sprintf(register_name, "unrecognized_register[%u]", reg->idx); 01226 break; 01227 } 01228 } 01229 01230 static void shader_arb_get_dst_param(const struct wined3d_shader_instruction *ins, 01231 const struct wined3d_shader_dst_param *wined3d_dst, char *str) 01232 { 01233 char register_name[255]; 01234 char write_mask[6]; 01235 BOOL is_color; 01236 01237 shader_arb_get_register_name(ins, &wined3d_dst->reg, register_name, &is_color); 01238 strcpy(str, register_name); 01239 01240 shader_arb_get_write_mask(ins, wined3d_dst, write_mask); 01241 strcat(str, write_mask); 01242 } 01243 01244 static const char *shader_arb_get_fixup_swizzle(enum fixup_channel_source channel_source) 01245 { 01246 switch(channel_source) 01247 { 01248 case CHANNEL_SOURCE_ZERO: return "0"; 01249 case CHANNEL_SOURCE_ONE: return "1"; 01250 case CHANNEL_SOURCE_X: return "x"; 01251 case CHANNEL_SOURCE_Y: return "y"; 01252 case CHANNEL_SOURCE_Z: return "z"; 01253 case CHANNEL_SOURCE_W: return "w"; 01254 default: 01255 FIXME("Unhandled channel source %#x\n", channel_source); 01256 return "undefined"; 01257 } 01258 } 01259 01260 static void gen_color_correction(struct wined3d_shader_buffer *buffer, const char *reg, 01261 DWORD dst_mask, const char *one, const char *two, struct color_fixup_desc fixup) 01262 { 01263 DWORD mask; 01264 01265 if (is_complex_fixup(fixup)) 01266 { 01267 enum complex_fixup complex_fixup = get_complex_fixup(fixup); 01268 FIXME("Complex fixup (%#x) not supported\n", complex_fixup); 01269 return; 01270 } 01271 01272 mask = 0; 01273 if (fixup.x_source != CHANNEL_SOURCE_X) mask |= WINED3DSP_WRITEMASK_0; 01274 if (fixup.y_source != CHANNEL_SOURCE_Y) mask |= WINED3DSP_WRITEMASK_1; 01275 if (fixup.z_source != CHANNEL_SOURCE_Z) mask |= WINED3DSP_WRITEMASK_2; 01276 if (fixup.w_source != CHANNEL_SOURCE_W) mask |= WINED3DSP_WRITEMASK_3; 01277 mask &= dst_mask; 01278 01279 if (mask) 01280 { 01281 shader_addline(buffer, "SWZ %s, %s, %s, %s, %s, %s;\n", reg, reg, 01282 shader_arb_get_fixup_swizzle(fixup.x_source), shader_arb_get_fixup_swizzle(fixup.y_source), 01283 shader_arb_get_fixup_swizzle(fixup.z_source), shader_arb_get_fixup_swizzle(fixup.w_source)); 01284 } 01285 01286 mask = 0; 01287 if (fixup.x_sign_fixup) mask |= WINED3DSP_WRITEMASK_0; 01288 if (fixup.y_sign_fixup) mask |= WINED3DSP_WRITEMASK_1; 01289 if (fixup.z_sign_fixup) mask |= WINED3DSP_WRITEMASK_2; 01290 if (fixup.w_sign_fixup) mask |= WINED3DSP_WRITEMASK_3; 01291 mask &= dst_mask; 01292 01293 if (mask) 01294 { 01295 char reg_mask[6]; 01296 char *ptr = reg_mask; 01297 01298 if (mask != WINED3DSP_WRITEMASK_ALL) 01299 { 01300 *ptr++ = '.'; 01301 if (mask & WINED3DSP_WRITEMASK_0) *ptr++ = 'x'; 01302 if (mask & WINED3DSP_WRITEMASK_1) *ptr++ = 'y'; 01303 if (mask & WINED3DSP_WRITEMASK_2) *ptr++ = 'z'; 01304 if (mask & WINED3DSP_WRITEMASK_3) *ptr++ = 'w'; 01305 } 01306 *ptr = '\0'; 01307 01308 shader_addline(buffer, "MAD %s%s, %s, %s, -%s;\n", reg, reg_mask, reg, two, one); 01309 } 01310 } 01311 01312 static const char *shader_arb_get_modifier(const struct wined3d_shader_instruction *ins) 01313 { 01314 DWORD mod; 01315 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 01316 if (!ins->dst_count) return ""; 01317 01318 mod = ins->dst[0].modifiers; 01319 01320 /* Silently ignore PARTIALPRECISION if its not supported */ 01321 if(priv->target_version == ARB) mod &= ~WINED3DSPDM_PARTIALPRECISION; 01322 01323 if(mod & WINED3DSPDM_MSAMPCENTROID) 01324 { 01325 FIXME("Unhandled modifier WINED3DSPDM_MSAMPCENTROID\n"); 01326 mod &= ~WINED3DSPDM_MSAMPCENTROID; 01327 } 01328 01329 switch(mod) 01330 { 01331 case WINED3DSPDM_SATURATE | WINED3DSPDM_PARTIALPRECISION: 01332 return "H_SAT"; 01333 01334 case WINED3DSPDM_SATURATE: 01335 return "_SAT"; 01336 01337 case WINED3DSPDM_PARTIALPRECISION: 01338 return "H"; 01339 01340 case 0: 01341 return ""; 01342 01343 default: 01344 FIXME("Unknown modifiers 0x%08x\n", mod); 01345 return ""; 01346 } 01347 } 01348 01349 #define TEX_PROJ 0x1 01350 #define TEX_BIAS 0x2 01351 #define TEX_LOD 0x4 01352 #define TEX_DERIV 0x10 01353 01354 static void shader_hw_sample(const struct wined3d_shader_instruction *ins, DWORD sampler_idx, 01355 const char *dst_str, const char *coord_reg, WORD flags, const char *dsx, const char *dsy) 01356 { 01357 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 01358 DWORD sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx]; 01359 const struct wined3d_shader *shader = ins->ctx->shader; 01360 const struct wined3d_texture *texture; 01361 const char *tex_type; 01362 BOOL np2_fixup = FALSE; 01363 struct wined3d_device *device = shader->device; 01364 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 01365 const char *mod; 01366 BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type); 01367 01368 /* D3D vertex shader sampler IDs are vertex samplers(0-3), not global d3d samplers */ 01369 if(!pshader) sampler_idx += MAX_FRAGMENT_SAMPLERS; 01370 01371 switch(sampler_type) { 01372 case WINED3DSTT_1D: 01373 tex_type = "1D"; 01374 break; 01375 01376 case WINED3DSTT_2D: 01377 texture = device->stateBlock->state.textures[sampler_idx]; 01378 if (texture && texture->target == GL_TEXTURE_RECTANGLE_ARB) 01379 { 01380 tex_type = "RECT"; 01381 } else { 01382 tex_type = "2D"; 01383 } 01384 if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) 01385 { 01386 if (priv->cur_np2fixup_info->super.active & (1 << sampler_idx)) 01387 { 01388 if (flags) FIXME("Only ordinary sampling from NP2 textures is supported.\n"); 01389 else np2_fixup = TRUE; 01390 } 01391 } 01392 break; 01393 01394 case WINED3DSTT_VOLUME: 01395 tex_type = "3D"; 01396 break; 01397 01398 case WINED3DSTT_CUBE: 01399 tex_type = "CUBE"; 01400 break; 01401 01402 default: 01403 ERR("Unexpected texture type %d\n", sampler_type); 01404 tex_type = ""; 01405 } 01406 01407 /* TEX, TXL, TXD and TXP do not support the "H" modifier, 01408 * so don't use shader_arb_get_modifier 01409 */ 01410 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) mod = "_SAT"; 01411 else mod = ""; 01412 01413 /* Fragment samplers always have indentity mapping */ 01414 if(sampler_idx >= MAX_FRAGMENT_SAMPLERS) 01415 { 01416 sampler_idx = priv->cur_vs_args->vertex.samplers[sampler_idx - MAX_FRAGMENT_SAMPLERS]; 01417 } 01418 01419 if (flags & TEX_DERIV) 01420 { 01421 if(flags & TEX_PROJ) FIXME("Projected texture sampling with custom derivatives\n"); 01422 if(flags & TEX_BIAS) FIXME("Biased texture sampling with custom derivatives\n"); 01423 shader_addline(buffer, "TXD%s %s, %s, %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, 01424 dsx, dsy,sampler_idx, tex_type); 01425 } 01426 else if(flags & TEX_LOD) 01427 { 01428 if(flags & TEX_PROJ) FIXME("Projected texture sampling with explicit lod\n"); 01429 if(flags & TEX_BIAS) FIXME("Biased texture sampling with explicit lod\n"); 01430 shader_addline(buffer, "TXL%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, 01431 sampler_idx, tex_type); 01432 } 01433 else if (flags & TEX_BIAS) 01434 { 01435 /* Shouldn't be possible, but let's check for it */ 01436 if(flags & TEX_PROJ) FIXME("Biased and Projected texture sampling\n"); 01437 /* TXB takes the 4th component of the source vector automatically, as d3d. Nothing more to do */ 01438 shader_addline(buffer, "TXB%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, sampler_idx, tex_type); 01439 } 01440 else if (flags & TEX_PROJ) 01441 { 01442 shader_addline(buffer, "TXP%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, sampler_idx, tex_type); 01443 } 01444 else 01445 { 01446 if (np2_fixup) 01447 { 01448 const unsigned char idx = priv->cur_np2fixup_info->super.idx[sampler_idx]; 01449 shader_addline(buffer, "MUL TA, np2fixup[%u].%s, %s;\n", idx >> 1, 01450 (idx % 2) ? "zwxy" : "xyzw", coord_reg); 01451 01452 shader_addline(buffer, "TEX%s %s, TA, texture[%u], %s;\n", mod, dst_str, sampler_idx, tex_type); 01453 } 01454 else 01455 shader_addline(buffer, "TEX%s %s, %s, texture[%u], %s;\n", mod, dst_str, coord_reg, sampler_idx, tex_type); 01456 } 01457 01458 if (pshader) 01459 { 01460 gen_color_correction(buffer, dst_str, ins->dst[0].write_mask, 01461 arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_ONE), 01462 arb_get_helper_value(WINED3D_SHADER_TYPE_PIXEL, ARB_TWO), 01463 priv->cur_ps_args->super.color_fixup[sampler_idx]); 01464 } 01465 } 01466 01467 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins, 01468 const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr) 01469 { 01470 /* Generate a line that does the input modifier computation and return the input register to use */ 01471 BOOL is_color = FALSE; 01472 char regstr[256]; 01473 char swzstr[20]; 01474 int insert_line; 01475 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 01476 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 01477 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 01478 const char *two = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_TWO); 01479 01480 /* Assume a new line will be added */ 01481 insert_line = 1; 01482 01483 /* Get register name */ 01484 shader_arb_get_register_name(ins, &src->reg, regstr, &is_color); 01485 shader_arb_get_swizzle(src, is_color, swzstr); 01486 01487 switch (src->modifiers) 01488 { 01489 case WINED3DSPSM_NONE: 01490 sprintf(outregstr, "%s%s", regstr, swzstr); 01491 insert_line = 0; 01492 break; 01493 case WINED3DSPSM_NEG: 01494 sprintf(outregstr, "-%s%s", regstr, swzstr); 01495 insert_line = 0; 01496 break; 01497 case WINED3DSPSM_BIAS: 01498 shader_addline(buffer, "ADD T%c, %s, -coefdiv.x;\n", 'A' + tmpreg, regstr); 01499 break; 01500 case WINED3DSPSM_BIASNEG: 01501 shader_addline(buffer, "ADD T%c, -%s, coefdiv.x;\n", 'A' + tmpreg, regstr); 01502 break; 01503 case WINED3DSPSM_SIGN: 01504 shader_addline(buffer, "MAD T%c, %s, %s, -%s;\n", 'A' + tmpreg, regstr, two, one); 01505 break; 01506 case WINED3DSPSM_SIGNNEG: 01507 shader_addline(buffer, "MAD T%c, %s, -%s, %s;\n", 'A' + tmpreg, regstr, two, one); 01508 break; 01509 case WINED3DSPSM_COMP: 01510 shader_addline(buffer, "SUB T%c, %s, %s;\n", 'A' + tmpreg, one, regstr); 01511 break; 01512 case WINED3DSPSM_X2: 01513 shader_addline(buffer, "ADD T%c, %s, %s;\n", 'A' + tmpreg, regstr, regstr); 01514 break; 01515 case WINED3DSPSM_X2NEG: 01516 shader_addline(buffer, "ADD T%c, -%s, -%s;\n", 'A' + tmpreg, regstr, regstr); 01517 break; 01518 case WINED3DSPSM_DZ: 01519 shader_addline(buffer, "RCP T%c, %s.z;\n", 'A' + tmpreg, regstr); 01520 shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg); 01521 break; 01522 case WINED3DSPSM_DW: 01523 shader_addline(buffer, "RCP T%c, %s.w;\n", 'A' + tmpreg, regstr); 01524 shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg); 01525 break; 01526 case WINED3DSPSM_ABS: 01527 if(ctx->target_version >= NV2) { 01528 sprintf(outregstr, "|%s%s|", regstr, swzstr); 01529 insert_line = 0; 01530 } else { 01531 shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr); 01532 } 01533 break; 01534 case WINED3DSPSM_ABSNEG: 01535 if(ctx->target_version >= NV2) { 01536 sprintf(outregstr, "-|%s%s|", regstr, swzstr); 01537 } else { 01538 shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr); 01539 sprintf(outregstr, "-T%c%s", 'A' + tmpreg, swzstr); 01540 } 01541 insert_line = 0; 01542 break; 01543 default: 01544 sprintf(outregstr, "%s%s", regstr, swzstr); 01545 insert_line = 0; 01546 } 01547 01548 /* Return modified or original register, with swizzle */ 01549 if (insert_line) 01550 sprintf(outregstr, "T%c%s", 'A' + tmpreg, swzstr); 01551 } 01552 01553 static void pshader_hw_bem(const struct wined3d_shader_instruction *ins) 01554 { 01555 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 01556 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 01557 char dst_name[50]; 01558 char src_name[2][50]; 01559 DWORD sampler_code = dst->reg.idx; 01560 01561 shader_arb_get_dst_param(ins, dst, dst_name); 01562 01563 /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed 01564 * 01565 * Keep in mind that src_name[1] can be "TB" and src_name[0] can be "TA" because modifiers like _x2 are valid 01566 * with bem. So delay loading the first parameter until after the perturbation calculation which needs two 01567 * temps is done. 01568 */ 01569 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 01570 shader_addline(buffer, "SWZ TA, bumpenvmat%d, x, z, 0, 0;\n", sampler_code); 01571 shader_addline(buffer, "DP3 TC.r, TA, %s;\n", src_name[1]); 01572 shader_addline(buffer, "SWZ TA, bumpenvmat%d, y, w, 0, 0;\n", sampler_code); 01573 shader_addline(buffer, "DP3 TC.g, TA, %s;\n", src_name[1]); 01574 01575 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 01576 shader_addline(buffer, "ADD %s, %s, TC;\n", dst_name, src_name[0]); 01577 } 01578 01579 static DWORD negate_modifiers(DWORD mod, char *extra_char) 01580 { 01581 *extra_char = ' '; 01582 switch(mod) 01583 { 01584 case WINED3DSPSM_NONE: return WINED3DSPSM_NEG; 01585 case WINED3DSPSM_NEG: return WINED3DSPSM_NONE; 01586 case WINED3DSPSM_BIAS: return WINED3DSPSM_BIASNEG; 01587 case WINED3DSPSM_BIASNEG: return WINED3DSPSM_BIAS; 01588 case WINED3DSPSM_SIGN: return WINED3DSPSM_SIGNNEG; 01589 case WINED3DSPSM_SIGNNEG: return WINED3DSPSM_SIGN; 01590 case WINED3DSPSM_COMP: *extra_char = '-'; return WINED3DSPSM_COMP; 01591 case WINED3DSPSM_X2: return WINED3DSPSM_X2NEG; 01592 case WINED3DSPSM_X2NEG: return WINED3DSPSM_X2; 01593 case WINED3DSPSM_DZ: *extra_char = '-'; return WINED3DSPSM_DZ; 01594 case WINED3DSPSM_DW: *extra_char = '-'; return WINED3DSPSM_DW; 01595 case WINED3DSPSM_ABS: return WINED3DSPSM_ABSNEG; 01596 case WINED3DSPSM_ABSNEG: return WINED3DSPSM_ABS; 01597 } 01598 FIXME("Unknown modifier %u\n", mod); 01599 return mod; 01600 } 01601 01602 static void pshader_hw_cnd(const struct wined3d_shader_instruction *ins) 01603 { 01604 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 01605 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 01606 char dst_name[50]; 01607 char src_name[3][50]; 01608 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 01609 ins->ctx->reg_maps->shader_version.minor); 01610 01611 shader_arb_get_dst_param(ins, dst, dst_name); 01612 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 01613 01614 /* The coissue flag changes the semantic of the cnd instruction in <= 1.3 shaders */ 01615 if (shader_version <= WINED3D_SHADER_VERSION(1, 3) && ins->coissue) 01616 { 01617 shader_addline(buffer, "MOV%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[1]); 01618 } 01619 else 01620 { 01621 struct wined3d_shader_src_param src0_copy = ins->src[0]; 01622 char extra_neg; 01623 01624 /* src0 may have a negate srcmod set, so we can't blindly add "-" to the name */ 01625 src0_copy.modifiers = negate_modifiers(src0_copy.modifiers, &extra_neg); 01626 01627 shader_arb_get_src_param(ins, &src0_copy, 0, src_name[0]); 01628 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 01629 shader_addline(buffer, "ADD TA, %c%s, coefdiv.x;\n", extra_neg, src_name[0]); 01630 shader_addline(buffer, "CMP%s %s, TA, %s, %s;\n", shader_arb_get_modifier(ins), 01631 dst_name, src_name[1], src_name[2]); 01632 } 01633 } 01634 01635 static void pshader_hw_cmp(const struct wined3d_shader_instruction *ins) 01636 { 01637 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 01638 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 01639 char dst_name[50]; 01640 char src_name[3][50]; 01641 01642 shader_arb_get_dst_param(ins, dst, dst_name); 01643 01644 /* Generate input register names (with modifiers) */ 01645 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 01646 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 01647 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 01648 01649 shader_addline(buffer, "CMP%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), 01650 dst_name, src_name[0], src_name[2], src_name[1]); 01651 } 01652 01655 static void pshader_hw_dp2add(const struct wined3d_shader_instruction *ins) 01656 { 01657 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 01658 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 01659 char dst_name[50]; 01660 char src_name[3][50]; 01661 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 01662 01663 shader_arb_get_dst_param(ins, dst, dst_name); 01664 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 01665 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 01666 01667 if(ctx->target_version >= NV3) 01668 { 01669 /* GL_NV_fragment_program2 has a 1:1 matching instruction */ 01670 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 01671 shader_addline(buffer, "DP2A%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), 01672 dst_name, src_name[0], src_name[1], src_name[2]); 01673 } 01674 else if(ctx->target_version >= NV2) 01675 { 01676 /* dst.x = src2.?, src0.x, src1.x + src0.y * src1.y 01677 * dst.y = src2.?, src0.x, src1.z + src0.y * src1.w 01678 * dst.z = src2.?, src0.x, src1.x + src0.y * src1.y 01679 * dst.z = src2.?, src0.x, src1.z + src0.y * src1.w 01680 * 01681 * Make sure that src1.zw = src1.xy, then we get a classic dp2add 01682 * 01683 * .xyxy and other swizzles that we could get with this are not valid in 01684 * plain ARBfp, but luckily the NV extension grammar lifts this limitation. 01685 */ 01686 struct wined3d_shader_src_param tmp_param = ins->src[1]; 01687 DWORD swizzle = tmp_param.swizzle & 0xf; /* Selects .xy */ 01688 tmp_param.swizzle = swizzle | (swizzle << 4); /* Creates .xyxy */ 01689 01690 shader_arb_get_src_param(ins, &tmp_param, 1, src_name[1]); 01691 01692 shader_addline(buffer, "X2D%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins), 01693 dst_name, src_name[2], src_name[0], src_name[1]); 01694 } 01695 else 01696 { 01697 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 01698 /* Emulate a DP2 with a DP3 and 0.0. Don't use the dest as temp register, it could be src[1] or src[2] 01699 * src_name[0] can be TA, but TA is a private temp for modifiers, so it is save to overwrite 01700 */ 01701 shader_addline(buffer, "MOV TA, %s;\n", src_name[0]); 01702 shader_addline(buffer, "MOV TA.z, 0.0;\n"); 01703 shader_addline(buffer, "DP3 TA, TA, %s;\n", src_name[1]); 01704 shader_addline(buffer, "ADD%s %s, TA, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[2]); 01705 } 01706 } 01707 01708 /* Map the opcode 1-to-1 to the GL code */ 01709 static void shader_hw_map2gl(const struct wined3d_shader_instruction *ins) 01710 { 01711 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 01712 const char *instruction; 01713 char arguments[256], dst_str[50]; 01714 unsigned int i; 01715 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 01716 01717 switch (ins->handler_idx) 01718 { 01719 case WINED3DSIH_ABS: instruction = "ABS"; break; 01720 case WINED3DSIH_ADD: instruction = "ADD"; break; 01721 case WINED3DSIH_CRS: instruction = "XPD"; break; 01722 case WINED3DSIH_DP3: instruction = "DP3"; break; 01723 case WINED3DSIH_DP4: instruction = "DP4"; break; 01724 case WINED3DSIH_DST: instruction = "DST"; break; 01725 case WINED3DSIH_FRC: instruction = "FRC"; break; 01726 case WINED3DSIH_LIT: instruction = "LIT"; break; 01727 case WINED3DSIH_LRP: instruction = "LRP"; break; 01728 case WINED3DSIH_MAD: instruction = "MAD"; break; 01729 case WINED3DSIH_MAX: instruction = "MAX"; break; 01730 case WINED3DSIH_MIN: instruction = "MIN"; break; 01731 case WINED3DSIH_MOV: instruction = "MOV"; break; 01732 case WINED3DSIH_MUL: instruction = "MUL"; break; 01733 case WINED3DSIH_SGE: instruction = "SGE"; break; 01734 case WINED3DSIH_SLT: instruction = "SLT"; break; 01735 case WINED3DSIH_SUB: instruction = "SUB"; break; 01736 case WINED3DSIH_MOVA:instruction = "ARR"; break; 01737 case WINED3DSIH_DSX: instruction = "DDX"; break; 01738 default: instruction = ""; 01739 FIXME("Unhandled opcode %#x\n", ins->handler_idx); 01740 break; 01741 } 01742 01743 /* Note that shader_arb_add_dst_param() adds spaces. */ 01744 arguments[0] = '\0'; 01745 shader_arb_get_dst_param(ins, dst, dst_str); 01746 for (i = 0; i < ins->src_count; ++i) 01747 { 01748 char operand[100]; 01749 strcat(arguments, ", "); 01750 shader_arb_get_src_param(ins, &ins->src[i], i, operand); 01751 strcat(arguments, operand); 01752 } 01753 shader_addline(buffer, "%s%s %s%s;\n", instruction, shader_arb_get_modifier(ins), dst_str, arguments); 01754 } 01755 01756 static void shader_hw_nop(const struct wined3d_shader_instruction *ins) 01757 { 01758 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 01759 shader_addline(buffer, "NOP;\n"); 01760 } 01761 01762 static void shader_hw_mov(const struct wined3d_shader_instruction *ins) 01763 { 01764 const struct wined3d_shader *shader = ins->ctx->shader; 01765 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 01766 BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type); 01767 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 01768 const char *zero = arb_get_helper_value(reg_maps->shader_version.type, ARB_ZERO); 01769 const char *one = arb_get_helper_value(reg_maps->shader_version.type, ARB_ONE); 01770 const char *two = arb_get_helper_value(reg_maps->shader_version.type, ARB_TWO); 01771 01772 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 01773 char src0_param[256]; 01774 01775 if (ins->handler_idx == WINED3DSIH_MOVA) 01776 { 01777 const struct arb_vshader_private *shader_data = shader->backend_data; 01778 char write_mask[6]; 01779 const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET); 01780 01781 if(ctx->target_version >= NV2) { 01782 shader_hw_map2gl(ins); 01783 return; 01784 } 01785 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param); 01786 shader_arb_get_write_mask(ins, &ins->dst[0], write_mask); 01787 01788 /* This implements the mova formula used in GLSL. The first two instructions 01789 * prepare the sign() part. Note that it is fine to have my_sign(0.0) = 1.0 01790 * in this case: 01791 * mova A0.x, 0.0 01792 * 01793 * A0.x = arl(floor(abs(0.0) + 0.5) * 1.0) = floor(0.5) = 0.0 since arl does a floor 01794 * 01795 * The ARL is performed when A0 is used - the requested component is read from A0_SHADOW into 01796 * A0.x. We can use the overwritten component of A0_shadow as temporary storage for the sign. 01797 */ 01798 shader_addline(buffer, "SGE A0_SHADOW%s, %s, %s;\n", write_mask, src0_param, zero); 01799 shader_addline(buffer, "MAD A0_SHADOW%s, A0_SHADOW, %s, -%s;\n", write_mask, two, one); 01800 01801 shader_addline(buffer, "ABS TA%s, %s;\n", write_mask, src0_param); 01802 shader_addline(buffer, "ADD TA%s, TA, rel_addr_const.x;\n", write_mask); 01803 shader_addline(buffer, "FLR TA%s, TA;\n", write_mask); 01804 if (shader_data->rel_offset) 01805 { 01806 shader_addline(buffer, "ADD TA%s, TA, %s;\n", write_mask, offset); 01807 } 01808 shader_addline(buffer, "MUL A0_SHADOW%s, TA, A0_SHADOW;\n", write_mask); 01809 01810 ((struct shader_arb_ctx_priv *)ins->ctx->backend_data)->addr_reg[0] = '\0'; 01811 } 01812 else if (reg_maps->shader_version.major == 1 01813 && !shader_is_pshader_version(reg_maps->shader_version.type) 01814 && ins->dst[0].reg.type == WINED3DSPR_ADDR) 01815 { 01816 const struct arb_vshader_private *shader_data = shader->backend_data; 01817 src0_param[0] = '\0'; 01818 01819 if (shader_data->rel_offset) 01820 { 01821 const char *offset = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_VS_REL_OFFSET); 01822 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param); 01823 shader_addline(buffer, "ADD TA.x, %s, %s;\n", src0_param, offset); 01824 shader_addline(buffer, "ARL A0.x, TA.x;\n"); 01825 } 01826 else 01827 { 01828 /* Apple's ARB_vertex_program implementation does not accept an ARL source argument 01829 * with more than one component. Thus replicate the first source argument over all 01830 * 4 components. For example, .xyzw -> .x (or better: .xxxx), .zwxy -> .z, etc) */ 01831 struct wined3d_shader_src_param tmp_src = ins->src[0]; 01832 tmp_src.swizzle = (tmp_src.swizzle & 0x3) * 0x55; 01833 shader_arb_get_src_param(ins, &tmp_src, 0, src0_param); 01834 shader_addline(buffer, "ARL A0.x, %s;\n", src0_param); 01835 } 01836 } 01837 else if (ins->dst[0].reg.type == WINED3DSPR_COLOROUT && !ins->dst[0].reg.idx && pshader) 01838 { 01839 if (ctx->cur_ps_args->super.srgb_correction && shader->u.ps.color0_mov) 01840 { 01841 shader_addline(buffer, "#mov handled in srgb write code\n"); 01842 return; 01843 } 01844 shader_hw_map2gl(ins); 01845 } 01846 else 01847 { 01848 shader_hw_map2gl(ins); 01849 } 01850 } 01851 01852 static void pshader_hw_texkill(const struct wined3d_shader_instruction *ins) 01853 { 01854 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 01855 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 01856 char reg_dest[40]; 01857 01858 /* No swizzles are allowed in d3d's texkill. PS 1.x ignores the 4th component as documented, 01859 * but >= 2.0 honors it (undocumented, but tested by the d3d9 testsuite) 01860 */ 01861 shader_arb_get_dst_param(ins, dst, reg_dest); 01862 01863 if (ins->ctx->reg_maps->shader_version.major >= 2) 01864 { 01865 const char *kilsrc = "TA"; 01866 BOOL is_color; 01867 01868 shader_arb_get_register_name(ins, &dst->reg, reg_dest, &is_color); 01869 if(dst->write_mask == WINED3DSP_WRITEMASK_ALL) 01870 { 01871 kilsrc = reg_dest; 01872 } 01873 else 01874 { 01875 /* Sigh. KIL doesn't support swizzles/writemasks. KIL passes a writemask, but ".xy" for example 01876 * is not valid as a swizzle in ARB (needs ".xyyy"). Use SWZ to load the register properly, and set 01877 * masked out components to 0(won't kill) 01878 */ 01879 char x = '0', y = '0', z = '0', w = '0'; 01880 if(dst->write_mask & WINED3DSP_WRITEMASK_0) x = 'x'; 01881 if(dst->write_mask & WINED3DSP_WRITEMASK_1) y = 'y'; 01882 if(dst->write_mask & WINED3DSP_WRITEMASK_2) z = 'z'; 01883 if(dst->write_mask & WINED3DSP_WRITEMASK_3) w = 'w'; 01884 shader_addline(buffer, "SWZ TA, %s, %c, %c, %c, %c;\n", reg_dest, x, y, z, w); 01885 } 01886 shader_addline(buffer, "KIL %s;\n", kilsrc); 01887 } else { 01888 /* ARB fp doesn't like swizzles on the parameter of the KIL instruction. To mask the 4th component, 01889 * copy the register into our general purpose TMP variable, overwrite .w and pass TMP to KIL 01890 * 01891 * ps_1_3 shaders use the texcoord incarnation of the Tx register. ps_1_4 shaders can use the same, 01892 * or pass in any temporary register(in shader phase 2) 01893 */ 01894 if(ins->ctx->reg_maps->shader_version.minor <= 3) { 01895 sprintf(reg_dest, "fragment.texcoord[%u]", dst->reg.idx); 01896 } else { 01897 shader_arb_get_dst_param(ins, dst, reg_dest); 01898 } 01899 shader_addline(buffer, "SWZ TA, %s, x, y, z, 1;\n", reg_dest); 01900 shader_addline(buffer, "KIL TA;\n"); 01901 } 01902 } 01903 01904 static void pshader_hw_tex(const struct wined3d_shader_instruction *ins) 01905 { 01906 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 01907 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 01908 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 01909 ins->ctx->reg_maps->shader_version.minor); 01910 struct wined3d_shader_src_param src; 01911 01912 char reg_dest[40]; 01913 char reg_coord[40]; 01914 DWORD reg_sampler_code; 01915 WORD myflags = 0; 01916 01917 /* All versions have a destination register */ 01918 shader_arb_get_dst_param(ins, dst, reg_dest); 01919 01920 /* 1.0-1.4: Use destination register number as texture code. 01921 2.0+: Use provided sampler number as texure code. */ 01922 if (shader_version < WINED3D_SHADER_VERSION(2,0)) 01923 reg_sampler_code = dst->reg.idx; 01924 else 01925 reg_sampler_code = ins->src[1].reg.idx; 01926 01927 /* 1.0-1.3: Use the texcoord varying. 01928 1.4+: Use provided coordinate source register. */ 01929 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 01930 sprintf(reg_coord, "fragment.texcoord[%u]", reg_sampler_code); 01931 else { 01932 /* TEX is the only instruction that can handle DW and DZ natively */ 01933 src = ins->src[0]; 01934 if(src.modifiers == WINED3DSPSM_DW) src.modifiers = WINED3DSPSM_NONE; 01935 if(src.modifiers == WINED3DSPSM_DZ) src.modifiers = WINED3DSPSM_NONE; 01936 shader_arb_get_src_param(ins, &src, 0, reg_coord); 01937 } 01938 01939 /* projection flag: 01940 * 1.1, 1.2, 1.3: Use WINED3D_TSS_TEXTURETRANSFORMFLAGS 01941 * 1.4: Use WINED3DSPSM_DZ or WINED3DSPSM_DW on src[0] 01942 * 2.0+: Use WINED3DSI_TEXLD_PROJECT on the opcode 01943 */ 01944 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 01945 { 01946 DWORD flags = 0; 01947 if (reg_sampler_code < MAX_TEXTURES) 01948 flags = priv->cur_ps_args->super.tex_transform >> reg_sampler_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT; 01949 if (flags & WINED3D_PSARGS_PROJECTED) 01950 myflags |= TEX_PROJ; 01951 } 01952 else if (shader_version < WINED3D_SHADER_VERSION(2,0)) 01953 { 01954 enum wined3d_shader_src_modifier src_mod = ins->src[0].modifiers; 01955 if (src_mod == WINED3DSPSM_DZ) 01956 { 01957 /* TXP cannot handle DZ natively, so move the z coordinate to .w. reg_coord is a read-only 01958 * varying register, so we need a temp reg 01959 */ 01960 shader_addline(ins->ctx->buffer, "SWZ TA, %s, x, y, z, z;\n", reg_coord); 01961 strcpy(reg_coord, "TA"); 01962 myflags |= TEX_PROJ; 01963 } else if(src_mod == WINED3DSPSM_DW) { 01964 myflags |= TEX_PROJ; 01965 } 01966 } else { 01967 if (ins->flags & WINED3DSI_TEXLD_PROJECT) myflags |= TEX_PROJ; 01968 if (ins->flags & WINED3DSI_TEXLD_BIAS) myflags |= TEX_BIAS; 01969 } 01970 shader_hw_sample(ins, reg_sampler_code, reg_dest, reg_coord, myflags, NULL, NULL); 01971 } 01972 01973 static void pshader_hw_texcoord(const struct wined3d_shader_instruction *ins) 01974 { 01975 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 01976 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 01977 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major, 01978 ins->ctx->reg_maps->shader_version.minor); 01979 char dst_str[50]; 01980 01981 if (shader_version < WINED3D_SHADER_VERSION(1,4)) 01982 { 01983 DWORD reg = dst->reg.idx; 01984 01985 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 01986 shader_addline(buffer, "MOV_SAT %s, fragment.texcoord[%u];\n", dst_str, reg); 01987 } else { 01988 char reg_src[40]; 01989 01990 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src); 01991 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 01992 shader_addline(buffer, "MOV %s, %s;\n", dst_str, reg_src); 01993 } 01994 } 01995 01996 static void pshader_hw_texreg2ar(const struct wined3d_shader_instruction *ins) 01997 { 01998 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 01999 DWORD flags = 0; 02000 02001 DWORD reg1 = ins->dst[0].reg.idx; 02002 char dst_str[50]; 02003 char src_str[50]; 02004 02005 /* Note that texreg2ar treats Tx as a temporary register, not as a varying */ 02006 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 02007 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 02008 /* Move .x first in case src_str is "TA" */ 02009 shader_addline(buffer, "MOV TA.y, %s.x;\n", src_str); 02010 shader_addline(buffer, "MOV TA.x, %s.w;\n", src_str); 02011 if (reg1 < MAX_TEXTURES) 02012 { 02013 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 02014 flags = priv->cur_ps_args->super.tex_transform >> reg1 * WINED3D_PSARGS_TEXTRANSFORM_SHIFT; 02015 } 02016 shader_hw_sample(ins, reg1, dst_str, "TA", flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 02017 } 02018 02019 static void pshader_hw_texreg2gb(const struct wined3d_shader_instruction *ins) 02020 { 02021 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02022 02023 DWORD reg1 = ins->dst[0].reg.idx; 02024 char dst_str[50]; 02025 char src_str[50]; 02026 02027 /* Note that texreg2gb treats Tx as a temporary register, not as a varying */ 02028 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 02029 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 02030 shader_addline(buffer, "MOV TA.x, %s.y;\n", src_str); 02031 shader_addline(buffer, "MOV TA.y, %s.z;\n", src_str); 02032 shader_hw_sample(ins, reg1, dst_str, "TA", 0, NULL, NULL); 02033 } 02034 02035 static void pshader_hw_texreg2rgb(const struct wined3d_shader_instruction *ins) 02036 { 02037 DWORD reg1 = ins->dst[0].reg.idx; 02038 char dst_str[50]; 02039 char src_str[50]; 02040 02041 /* Note that texreg2rg treats Tx as a temporary register, not as a varying */ 02042 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 02043 shader_arb_get_src_param(ins, &ins->src[0], 0, src_str); 02044 shader_hw_sample(ins, reg1, dst_str, src_str, 0, NULL, NULL); 02045 } 02046 02047 static void pshader_hw_texbem(const struct wined3d_shader_instruction *ins) 02048 { 02049 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 02050 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 02051 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02052 char reg_coord[40], dst_reg[50], src_reg[50]; 02053 DWORD reg_dest_code; 02054 02055 /* All versions have a destination register. The Tx where the texture coordinates come 02056 * from is the varying incarnation of the texture register 02057 */ 02058 reg_dest_code = dst->reg.idx; 02059 shader_arb_get_dst_param(ins, &ins->dst[0], dst_reg); 02060 shader_arb_get_src_param(ins, &ins->src[0], 0, src_reg); 02061 sprintf(reg_coord, "fragment.texcoord[%u]", reg_dest_code); 02062 02063 /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed 02064 * The Tx in which the perturbation map is stored is the tempreg incarnation of the texture register 02065 * 02066 * GL_NV_fragment_program_option could handle this in one instruction via X2D: 02067 * X2D TA.xy, fragment.texcoord, T%u, bumpenvmat%u.xzyw 02068 * 02069 * However, the NV extensions are never enabled for <= 2.0 shaders because of the performance penalty that 02070 * comes with it, and texbem is an 1.x only instruction. No 1.x instruction forces us to enable the NV 02071 * extension. 02072 */ 02073 shader_addline(buffer, "SWZ TB, bumpenvmat%d, x, z, 0, 0;\n", reg_dest_code); 02074 shader_addline(buffer, "DP3 TA.x, TB, %s;\n", src_reg); 02075 shader_addline(buffer, "SWZ TB, bumpenvmat%d, y, w, 0, 0;\n", reg_dest_code); 02076 shader_addline(buffer, "DP3 TA.y, TB, %s;\n", src_reg); 02077 02078 /* with projective textures, texbem only divides the static texture coord, not the displacement, 02079 * so we can't let the GL handle this. 02080 */ 02081 if ((priv->cur_ps_args->super.tex_transform >> reg_dest_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT) 02082 & WINED3D_PSARGS_PROJECTED) 02083 { 02084 shader_addline(buffer, "RCP TB.w, %s.w;\n", reg_coord); 02085 shader_addline(buffer, "MUL TB.xy, %s, TB.w;\n", reg_coord); 02086 shader_addline(buffer, "ADD TA.xy, TA, TB;\n"); 02087 } else { 02088 shader_addline(buffer, "ADD TA.xy, TA, %s;\n", reg_coord); 02089 } 02090 02091 shader_hw_sample(ins, reg_dest_code, dst_reg, "TA", 0, NULL, NULL); 02092 02093 if (ins->handler_idx == WINED3DSIH_TEXBEML) 02094 { 02095 /* No src swizzles are allowed, so this is ok */ 02096 shader_addline(buffer, "MAD TA, %s.z, luminance%d.x, luminance%d.y;\n", 02097 src_reg, reg_dest_code, reg_dest_code); 02098 shader_addline(buffer, "MUL %s, %s, TA;\n", dst_reg, dst_reg); 02099 } 02100 } 02101 02102 static void pshader_hw_texm3x2pad(const struct wined3d_shader_instruction *ins) 02103 { 02104 DWORD reg = ins->dst[0].reg.idx; 02105 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02106 char src0_name[50], dst_name[50]; 02107 BOOL is_color; 02108 struct wined3d_shader_register tmp_reg = ins->dst[0].reg; 02109 02110 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 02111 /* The next instruction will be a texm3x2tex or texm3x2depth that writes to the uninitialized 02112 * T<reg+1> register. Use this register to store the calculated vector 02113 */ 02114 tmp_reg.idx = reg + 1; 02115 shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color); 02116 shader_addline(buffer, "DP3 %s.x, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name); 02117 } 02118 02119 static void pshader_hw_texm3x2tex(const struct wined3d_shader_instruction *ins) 02120 { 02121 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 02122 DWORD flags; 02123 DWORD reg = ins->dst[0].reg.idx; 02124 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02125 char dst_str[50]; 02126 char src0_name[50]; 02127 char dst_reg[50]; 02128 BOOL is_color; 02129 02130 /* We know that we're writing to the uninitialized T<reg> register, so use it for temporary storage */ 02131 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 02132 02133 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 02134 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 02135 shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 02136 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 02137 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 02138 } 02139 02140 static void pshader_hw_texm3x3pad(const struct wined3d_shader_instruction *ins) 02141 { 02142 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 02143 DWORD reg = ins->dst[0].reg.idx; 02144 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02145 char src0_name[50], dst_name[50]; 02146 struct wined3d_shader_register tmp_reg = ins->dst[0].reg; 02147 BOOL is_color; 02148 02149 /* There are always 2 texm3x3pad instructions followed by one texm3x3[tex,vspec, ...] instruction, with 02150 * incrementing ins->dst[0].register_idx numbers. So the pad instruction already knows the final destination 02151 * register, and this register is uninitialized(otherwise the assembler complains that it is 'redeclared') 02152 */ 02153 tmp_reg.idx = reg + 2 - tex_mx->current_row; 02154 shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color); 02155 02156 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 02157 shader_addline(buffer, "DP3 %s.%c, fragment.texcoord[%u], %s;\n", 02158 dst_name, 'x' + tex_mx->current_row, reg, src0_name); 02159 tex_mx->texcoord_w[tex_mx->current_row++] = reg; 02160 } 02161 02162 static void pshader_hw_texm3x3tex(const struct wined3d_shader_instruction *ins) 02163 { 02164 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 02165 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 02166 DWORD flags; 02167 DWORD reg = ins->dst[0].reg.idx; 02168 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02169 char dst_str[50]; 02170 char src0_name[50], dst_name[50]; 02171 BOOL is_color; 02172 02173 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 02174 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 02175 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name); 02176 02177 /* Sample the texture using the calculated coordinates */ 02178 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 02179 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 02180 shader_hw_sample(ins, reg, dst_str, dst_name, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 02181 tex_mx->current_row = 0; 02182 } 02183 02184 static void pshader_hw_texm3x3vspec(const struct wined3d_shader_instruction *ins) 02185 { 02186 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 02187 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 02188 DWORD flags; 02189 DWORD reg = ins->dst[0].reg.idx; 02190 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02191 char dst_str[50]; 02192 char src0_name[50]; 02193 char dst_reg[50]; 02194 BOOL is_color; 02195 02196 /* Get the dst reg without writemask strings. We know this register is uninitialized, so we can use all 02197 * components for temporary data storage 02198 */ 02199 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 02200 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 02201 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 02202 02203 /* Construct the eye-ray vector from w coordinates */ 02204 shader_addline(buffer, "MOV TB.x, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[0]); 02205 shader_addline(buffer, "MOV TB.y, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[1]); 02206 shader_addline(buffer, "MOV TB.z, fragment.texcoord[%u].w;\n", reg); 02207 02208 /* Calculate reflection vector 02209 */ 02210 shader_addline(buffer, "DP3 %s.w, %s, TB;\n", dst_reg, dst_reg); 02211 /* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */ 02212 shader_addline(buffer, "DP3 TB.w, %s, %s;\n", dst_reg, dst_reg); 02213 shader_addline(buffer, "RCP TB.w, TB.w;\n"); 02214 shader_addline(buffer, "MUL %s.w, %s.w, TB.w;\n", dst_reg, dst_reg); 02215 shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg); 02216 shader_addline(buffer, "MAD %s, coefmul.x, %s, -TB;\n", dst_reg, dst_reg); 02217 02218 /* Sample the texture using the calculated coordinates */ 02219 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 02220 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 02221 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 02222 tex_mx->current_row = 0; 02223 } 02224 02225 static void pshader_hw_texm3x3spec(const struct wined3d_shader_instruction *ins) 02226 { 02227 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 02228 struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx; 02229 DWORD flags; 02230 DWORD reg = ins->dst[0].reg.idx; 02231 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02232 char dst_str[50]; 02233 char src0_name[50]; 02234 char src1_name[50]; 02235 char dst_reg[50]; 02236 BOOL is_color; 02237 02238 shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name); 02239 shader_arb_get_src_param(ins, &ins->src[0], 1, src1_name); 02240 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color); 02241 /* Note: dst_reg.xy is input here, generated by two texm3x3pad instructions */ 02242 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name); 02243 02244 /* Calculate reflection vector. 02245 * 02246 * dot(N, E) 02247 * dst_reg.xyz = 2 * --------- * N - E 02248 * dot(N, N) 02249 * 02250 * Which normalizes the normal vector 02251 */ 02252 shader_addline(buffer, "DP3 %s.w, %s, %s;\n", dst_reg, dst_reg, src1_name); 02253 shader_addline(buffer, "DP3 TC.w, %s, %s;\n", dst_reg, dst_reg); 02254 shader_addline(buffer, "RCP TC.w, TC.w;\n"); 02255 shader_addline(buffer, "MUL %s.w, %s.w, TC.w;\n", dst_reg, dst_reg); 02256 shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg); 02257 shader_addline(buffer, "MAD %s, coefmul.x, %s, -%s;\n", dst_reg, dst_reg, src1_name); 02258 02259 /* Sample the texture using the calculated coordinates */ 02260 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 02261 flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0; 02262 shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL); 02263 tex_mx->current_row = 0; 02264 } 02265 02266 static void pshader_hw_texdepth(const struct wined3d_shader_instruction *ins) 02267 { 02268 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 02269 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02270 char dst_name[50]; 02271 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 02272 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 02273 02274 /* texdepth has an implicit destination, the fragment depth value. It's only parameter, 02275 * which is essentially an input, is the destination register because it is the first 02276 * parameter. According to the msdn, this must be register r5, but let's keep it more flexible 02277 * here(writemasks/swizzles are not valid on texdepth) 02278 */ 02279 shader_arb_get_dst_param(ins, dst, dst_name); 02280 02281 /* According to the msdn, the source register(must be r5) is unusable after 02282 * the texdepth instruction, so we're free to modify it 02283 */ 02284 shader_addline(buffer, "MIN %s.y, %s.y, %s;\n", dst_name, dst_name, one); 02285 02286 /* How to deal with the special case dst_name.g == 0? if r != 0, then 02287 * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct 02288 * result. But if r = 0.0, then 0 * inf = 0, which is incorrect. 02289 */ 02290 shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name); 02291 shader_addline(buffer, "MUL TA.x, %s.x, %s.y;\n", dst_name, dst_name); 02292 shader_addline(buffer, "MIN TA.x, TA.x, %s;\n", one); 02293 shader_addline(buffer, "MAX result.depth, TA.x, %s;\n", zero); 02294 } 02295 02299 static void pshader_hw_texdp3tex(const struct wined3d_shader_instruction *ins) 02300 { 02301 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02302 DWORD sampler_idx = ins->dst[0].reg.idx; 02303 char src0[50]; 02304 char dst_str[50]; 02305 02306 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 02307 shader_addline(buffer, "MOV TB, 0.0;\n"); 02308 shader_addline(buffer, "DP3 TB.x, fragment.texcoord[%u], %s;\n", sampler_idx, src0); 02309 02310 shader_arb_get_dst_param(ins, &ins->dst[0], dst_str); 02311 shader_hw_sample(ins, sampler_idx, dst_str, "TB", 0 /* Only one coord, can't be projected */, NULL, NULL); 02312 } 02313 02316 static void pshader_hw_texdp3(const struct wined3d_shader_instruction *ins) 02317 { 02318 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 02319 char src0[50]; 02320 char dst_str[50]; 02321 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02322 02323 /* Handle output register */ 02324 shader_arb_get_dst_param(ins, dst, dst_str); 02325 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 02326 shader_addline(buffer, "DP3 %s, fragment.texcoord[%u], %s;\n", dst_str, dst->reg.idx, src0); 02327 } 02328 02331 static void pshader_hw_texm3x3(const struct wined3d_shader_instruction *ins) 02332 { 02333 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 02334 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02335 char dst_str[50], dst_name[50]; 02336 char src0[50]; 02337 BOOL is_color; 02338 02339 shader_arb_get_dst_param(ins, dst, dst_str); 02340 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 02341 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 02342 shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx, src0); 02343 shader_addline(buffer, "MOV %s, %s;\n", dst_str, dst_name); 02344 } 02345 02351 static void pshader_hw_texm3x2depth(const struct wined3d_shader_instruction *ins) 02352 { 02353 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02354 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 02355 char src0[50], dst_name[50]; 02356 BOOL is_color; 02357 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 02358 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 02359 02360 shader_arb_get_src_param(ins, &ins->src[0], 0, src0); 02361 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 02362 shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx, src0); 02363 02364 /* How to deal with the special case dst_name.g == 0? if r != 0, then 02365 * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct 02366 * result. But if r = 0.0, then 0 * inf = 0, which is incorrect. 02367 */ 02368 shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name); 02369 shader_addline(buffer, "MUL %s.x, %s.x, %s.y;\n", dst_name, dst_name, dst_name); 02370 shader_addline(buffer, "MIN %s.x, %s.x, %s;\n", dst_name, dst_name, one); 02371 shader_addline(buffer, "MAX result.depth, %s.x, %s;\n", dst_name, zero); 02372 } 02373 02376 static void shader_hw_mnxn(const struct wined3d_shader_instruction *ins) 02377 { 02378 int i; 02379 int nComponents = 0; 02380 struct wined3d_shader_dst_param tmp_dst = {{0}}; 02381 struct wined3d_shader_src_param tmp_src[2] = {{{0}}}; 02382 struct wined3d_shader_instruction tmp_ins; 02383 02384 memset(&tmp_ins, 0, sizeof(tmp_ins)); 02385 02386 /* Set constants for the temporary argument */ 02387 tmp_ins.ctx = ins->ctx; 02388 tmp_ins.dst_count = 1; 02389 tmp_ins.dst = &tmp_dst; 02390 tmp_ins.src_count = 2; 02391 tmp_ins.src = tmp_src; 02392 02393 switch(ins->handler_idx) 02394 { 02395 case WINED3DSIH_M4x4: 02396 nComponents = 4; 02397 tmp_ins.handler_idx = WINED3DSIH_DP4; 02398 break; 02399 case WINED3DSIH_M4x3: 02400 nComponents = 3; 02401 tmp_ins.handler_idx = WINED3DSIH_DP4; 02402 break; 02403 case WINED3DSIH_M3x4: 02404 nComponents = 4; 02405 tmp_ins.handler_idx = WINED3DSIH_DP3; 02406 break; 02407 case WINED3DSIH_M3x3: 02408 nComponents = 3; 02409 tmp_ins.handler_idx = WINED3DSIH_DP3; 02410 break; 02411 case WINED3DSIH_M3x2: 02412 nComponents = 2; 02413 tmp_ins.handler_idx = WINED3DSIH_DP3; 02414 break; 02415 default: 02416 FIXME("Unhandled opcode %#x\n", ins->handler_idx); 02417 break; 02418 } 02419 02420 tmp_dst = ins->dst[0]; 02421 tmp_src[0] = ins->src[0]; 02422 tmp_src[1] = ins->src[1]; 02423 for (i = 0; i < nComponents; i++) { 02424 tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i; 02425 shader_hw_map2gl(&tmp_ins); 02426 ++tmp_src[1].reg.idx; 02427 } 02428 } 02429 02430 static void shader_hw_rcp(const struct wined3d_shader_instruction *ins) 02431 { 02432 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02433 02434 char dst[50]; 02435 char src[50]; 02436 02437 shader_arb_get_dst_param(ins, &ins->dst[0], dst); /* Destination */ 02438 shader_arb_get_src_param(ins, &ins->src[0], 0, src); 02439 if (ins->src[0].swizzle == WINED3DSP_NOSWIZZLE) 02440 { 02441 /* Dx sdk says .x is used if no swizzle is given, but our test shows that 02442 * .w is used 02443 */ 02444 strcat(src, ".w"); 02445 } 02446 02447 shader_addline(buffer, "RCP%s %s, %s;\n", shader_arb_get_modifier(ins), dst, src); 02448 } 02449 02450 static void shader_hw_scalar_op(const struct wined3d_shader_instruction *ins) 02451 { 02452 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02453 const char *instruction; 02454 02455 char dst[50]; 02456 char src[50]; 02457 02458 switch(ins->handler_idx) 02459 { 02460 case WINED3DSIH_RSQ: instruction = "RSQ"; break; 02461 case WINED3DSIH_RCP: instruction = "RCP"; break; 02462 case WINED3DSIH_EXP: instruction = "EX2"; break; 02463 case WINED3DSIH_EXPP: instruction = "EXP"; break; 02464 default: instruction = ""; 02465 FIXME("Unhandled opcode %#x\n", ins->handler_idx); 02466 break; 02467 } 02468 02469 shader_arb_get_dst_param(ins, &ins->dst[0], dst); /* Destination */ 02470 shader_arb_get_src_param(ins, &ins->src[0], 0, src); 02471 if (ins->src[0].swizzle == WINED3DSP_NOSWIZZLE) 02472 { 02473 /* Dx sdk says .x is used if no swizzle is given, but our test shows that 02474 * .w is used 02475 */ 02476 strcat(src, ".w"); 02477 } 02478 02479 shader_addline(buffer, "%s%s %s, %s;\n", instruction, shader_arb_get_modifier(ins), dst, src); 02480 } 02481 02482 static void shader_hw_nrm(const struct wined3d_shader_instruction *ins) 02483 { 02484 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02485 char dst_name[50]; 02486 char src_name[50]; 02487 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 02488 BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type); 02489 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 02490 02491 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 02492 shader_arb_get_src_param(ins, &ins->src[0], 1 /* Use TB */, src_name); 02493 02494 /* In D3D, NRM of a vector with length zero returns zero. Catch this situation, as 02495 * otherwise NRM or RSQ would return NaN */ 02496 if(pshader && priv->target_version >= NV3) 02497 { 02498 /* GL_NV_fragment_program2's NRM needs protection against length zero vectors too 02499 * 02500 * TODO: Find out if DP3+NRM+MOV is really faster than DP3+RSQ+MUL 02501 */ 02502 shader_addline(buffer, "DP3C TA, %s, %s;\n", src_name, src_name); 02503 shader_addline(buffer, "NRM%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name); 02504 shader_addline(buffer, "MOV %s (EQ), %s;\n", dst_name, zero); 02505 } 02506 else if(priv->target_version >= NV2) 02507 { 02508 shader_addline(buffer, "DP3C TA.x, %s, %s;\n", src_name, src_name); 02509 shader_addline(buffer, "RSQ TA.x (NE), TA.x;\n"); 02510 shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name, 02511 src_name); 02512 } 02513 else 02514 { 02515 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 02516 02517 shader_addline(buffer, "DP3 TA.x, %s, %s;\n", src_name, src_name); 02518 /* Pass any non-zero value to RSQ if the input vector has a length of zero. The 02519 * RSQ result doesn't matter, as long as multiplying it by 0 returns 0. 02520 */ 02521 shader_addline(buffer, "SGE TA.y, -TA.x, %s;\n", zero); 02522 shader_addline(buffer, "MAD TA.x, %s, TA.y, TA.x;\n", one); 02523 02524 shader_addline(buffer, "RSQ TA.x, TA.x;\n"); 02525 /* dst.w = src[0].w * 1 / (src.x^2 + src.y^2 + src.z^2)^(1/2) according to msdn*/ 02526 shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name, 02527 src_name); 02528 } 02529 } 02530 02531 static void shader_hw_lrp(const struct wined3d_shader_instruction *ins) 02532 { 02533 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02534 char dst_name[50]; 02535 char src_name[3][50]; 02536 02537 /* ARB_fragment_program has a convenient LRP instruction */ 02538 if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) { 02539 shader_hw_map2gl(ins); 02540 return; 02541 } 02542 02543 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 02544 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]); 02545 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]); 02546 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]); 02547 02548 shader_addline(buffer, "SUB TA, %s, %s;\n", src_name[1], src_name[2]); 02549 shader_addline(buffer, "MAD%s %s, %s, TA, %s;\n", shader_arb_get_modifier(ins), 02550 dst_name, src_name[0], src_name[2]); 02551 } 02552 02553 static void shader_hw_sincos(const struct wined3d_shader_instruction *ins) 02554 { 02555 /* This instruction exists in ARB, but the d3d instruction takes two extra parameters which 02556 * must contain fixed constants. So we need a separate function to filter those constants and 02557 * can't use map2gl 02558 */ 02559 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02560 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 02561 const struct wined3d_shader_dst_param *dst = &ins->dst[0]; 02562 char dst_name[50]; 02563 char src_name0[50], src_name1[50], src_name2[50]; 02564 BOOL is_color; 02565 02566 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 02567 if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) { 02568 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 02569 /* No modifiers are supported on SCS */ 02570 shader_addline(buffer, "SCS %s, %s;\n", dst_name, src_name0); 02571 02572 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) 02573 { 02574 shader_arb_get_register_name(ins, &dst->reg, src_name0, &is_color); 02575 shader_addline(buffer, "MOV_SAT %s, %s;\n", dst_name, src_name0); 02576 } 02577 } else if(priv->target_version >= NV2) { 02578 shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color); 02579 02580 /* Sincos writemask must be .x, .y or .xy */ 02581 if(dst->write_mask & WINED3DSP_WRITEMASK_0) 02582 shader_addline(buffer, "COS%s %s.x, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0); 02583 if(dst->write_mask & WINED3DSP_WRITEMASK_1) 02584 shader_addline(buffer, "SIN%s %s.y, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0); 02585 } else { 02586 /* Approximate sine and cosine with a taylor series, as per math textbook. The application passes 8 02587 * helper constants(D3DSINCOSCONST1 and D3DSINCOSCONST2) in src1 and src2. 02588 * 02589 * sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ... 02590 * cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + ... 02591 * 02592 * The constants we get are: 02593 * 02594 * +1 +1, -1 -1 +1 +1 -1 -1 02595 * ---- , ---- , ---- , ----- , ----- , ----- , ------ 02596 * 1!*2 2!*4 3!*8 4!*16 5!*32 6!*64 7!*128 02597 * 02598 * If used with x^2, x^3, x^4 etc they calculate sin(x/2) and cos(x/2): 02599 * 02600 * (x/2)^2 = x^2 / 4 02601 * (x/2)^3 = x^3 / 8 02602 * (x/2)^4 = x^4 / 16 02603 * (x/2)^5 = x^5 / 32 02604 * etc 02605 * 02606 * To get the final result: 02607 * sin(x) = 2 * sin(x/2) * cos(x/2) 02608 * cos(x) = cos(x/2)^2 - sin(x/2)^2 02609 * (from sin(x+y) and cos(x+y) rules) 02610 * 02611 * As per MSDN, dst.z is undefined after the operation, and so is 02612 * dst.x and dst.y if they're masked out by the writemask. Ie 02613 * sincos dst.y, src1, c0, c1 02614 * returns the sine in dst.y. dst.x and dst.z are undefined, dst.w is not touched. The assembler 02615 * vsa.exe also stops with an error if the dest register is the same register as the source 02616 * register. This means we can use dest.xyz as temporary storage. The assembler vsa.exe output also 02617 * indicates that sincos consumes 8 instruction slots in vs_2_0(and, strangely, in vs_3_0). 02618 */ 02619 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 02620 shader_arb_get_src_param(ins, &ins->src[2], 2, src_name2); 02621 shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color); 02622 02623 shader_addline(buffer, "MUL %s.x, %s, %s;\n", dst_name, src_name0, src_name0); /* x ^ 2 */ 02624 shader_addline(buffer, "MUL TA.y, %s.x, %s;\n", dst_name, src_name0); /* x ^ 3 */ 02625 shader_addline(buffer, "MUL %s.y, TA.y, %s;\n", dst_name, src_name0); /* x ^ 4 */ 02626 shader_addline(buffer, "MUL TA.z, %s.y, %s;\n", dst_name, src_name0); /* x ^ 5 */ 02627 shader_addline(buffer, "MUL %s.z, TA.z, %s;\n", dst_name, src_name0); /* x ^ 6 */ 02628 shader_addline(buffer, "MUL TA.w, %s.z, %s;\n", dst_name, src_name0); /* x ^ 7 */ 02629 02630 /* sin(x/2) 02631 * 02632 * Unfortunately we don't get the constants in a DP4-capable form. Is there a way to 02633 * properly merge that with MULs in the code above? 02634 * The swizzles .yz and xw however fit into the .yzxw swizzle added to ps_2_0. Maybe 02635 * we can merge the sine and cosine MAD rows to calculate them together. 02636 */ 02637 shader_addline(buffer, "MUL TA.x, %s, %s.w;\n", src_name0, src_name2); /* x^1, +1/(1!*2) */ 02638 shader_addline(buffer, "MAD TA.x, TA.y, %s.x, TA.x;\n", src_name2); /* -1/(3!*8) */ 02639 shader_addline(buffer, "MAD TA.x, TA.z, %s.w, TA.x;\n", src_name1); /* +1/(5!*32) */ 02640 shader_addline(buffer, "MAD TA.x, TA.w, %s.x, TA.x;\n", src_name1); /* -1/(7!*128) */ 02641 02642 /* cos(x/2) */ 02643 shader_addline(buffer, "MAD TA.y, %s.x, %s.y, %s.z;\n", dst_name, src_name2, src_name2); /* -1/(2!*4), +1.0 */ 02644 shader_addline(buffer, "MAD TA.y, %s.y, %s.z, TA.y;\n", dst_name, src_name1); /* +1/(4!*16) */ 02645 shader_addline(buffer, "MAD TA.y, %s.z, %s.y, TA.y;\n", dst_name, src_name1); /* -1/(6!*64) */ 02646 02647 if(dst->write_mask & WINED3DSP_WRITEMASK_0) { 02648 /* cos x */ 02649 shader_addline(buffer, "MUL TA.z, TA.y, TA.y;\n"); 02650 shader_addline(buffer, "MAD %s.x, -TA.x, TA.x, TA.z;\n", dst_name); 02651 } 02652 if(dst->write_mask & WINED3DSP_WRITEMASK_1) { 02653 /* sin x */ 02654 shader_addline(buffer, "MUL %s.y, TA.x, TA.y;\n", dst_name); 02655 shader_addline(buffer, "ADD %s.y, %s.y, %s.y;\n", dst_name, dst_name, dst_name); 02656 } 02657 } 02658 } 02659 02660 static void shader_hw_sgn(const struct wined3d_shader_instruction *ins) 02661 { 02662 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02663 char dst_name[50]; 02664 char src_name[50]; 02665 struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data; 02666 02667 shader_arb_get_dst_param(ins, &ins->dst[0], dst_name); 02668 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name); 02669 02670 /* SGN is only valid in vertex shaders */ 02671 if(ctx->target_version >= NV2) { 02672 shader_addline(buffer, "SSG%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name); 02673 return; 02674 } 02675 02676 /* If SRC > 0.0, -SRC < SRC = TRUE, otherwise false. 02677 * if SRC < 0.0, SRC < -SRC = TRUE. If neither is true, src = 0.0 02678 */ 02679 if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) { 02680 shader_addline(buffer, "SLT %s, -%s, %s;\n", dst_name, src_name, src_name); 02681 } else { 02682 /* src contains TA? Write to the dest first. This won't overwrite our destination. 02683 * Then use TA, and calculate the final result 02684 * 02685 * Not reading from TA? Store the first result in TA to avoid overwriting the 02686 * destination if src reg = dst reg 02687 */ 02688 if(strstr(src_name, "TA")) 02689 { 02690 shader_addline(buffer, "SLT %s, %s, -%s;\n", dst_name, src_name, src_name); 02691 shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name); 02692 shader_addline(buffer, "ADD %s, %s, -TA;\n", dst_name, dst_name); 02693 } 02694 else 02695 { 02696 shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name); 02697 shader_addline(buffer, "SLT %s, %s, -%s;\n", dst_name, src_name, src_name); 02698 shader_addline(buffer, "ADD %s, TA, -%s;\n", dst_name, dst_name); 02699 } 02700 } 02701 } 02702 02703 static void shader_hw_dsy(const struct wined3d_shader_instruction *ins) 02704 { 02705 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02706 char src[50]; 02707 char dst[50]; 02708 char dst_name[50]; 02709 BOOL is_color; 02710 02711 shader_arb_get_dst_param(ins, &ins->dst[0], dst); 02712 shader_arb_get_src_param(ins, &ins->src[0], 0, src); 02713 shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color); 02714 02715 shader_addline(buffer, "DDY %s, %s;\n", dst, src); 02716 shader_addline(buffer, "MUL%s %s, %s, ycorrection.y;\n", shader_arb_get_modifier(ins), dst, dst_name); 02717 } 02718 02719 static DWORD abs_modifier(DWORD mod, BOOL *need_abs) 02720 { 02721 *need_abs = FALSE; 02722 02723 switch(mod) 02724 { 02725 case WINED3DSPSM_NONE: return WINED3DSPSM_ABS; 02726 case WINED3DSPSM_NEG: return WINED3DSPSM_ABS; 02727 case WINED3DSPSM_BIAS: *need_abs = TRUE; return WINED3DSPSM_BIAS; 02728 case WINED3DSPSM_BIASNEG: *need_abs = TRUE; return WINED3DSPSM_BIASNEG; 02729 case WINED3DSPSM_SIGN: *need_abs = TRUE; return WINED3DSPSM_SIGN; 02730 case WINED3DSPSM_SIGNNEG: *need_abs = TRUE; return WINED3DSPSM_SIGNNEG; 02731 case WINED3DSPSM_COMP: *need_abs = TRUE; return WINED3DSPSM_COMP; 02732 case WINED3DSPSM_X2: *need_abs = TRUE; return WINED3DSPSM_X2; 02733 case WINED3DSPSM_X2NEG: *need_abs = TRUE; return WINED3DSPSM_X2NEG; 02734 case WINED3DSPSM_DZ: *need_abs = TRUE; return WINED3DSPSM_DZ; 02735 case WINED3DSPSM_DW: *need_abs = TRUE; return WINED3DSPSM_DW; 02736 case WINED3DSPSM_ABS: return WINED3DSPSM_ABS; 02737 case WINED3DSPSM_ABSNEG: return WINED3DSPSM_ABS; 02738 } 02739 FIXME("Unknown modifier %u\n", mod); 02740 return mod; 02741 } 02742 02743 static void shader_hw_log(const struct wined3d_shader_instruction *ins) 02744 { 02745 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02746 char src0[50], dst[50]; 02747 struct wined3d_shader_src_param src0_copy = ins->src[0]; 02748 BOOL need_abs = FALSE; 02749 const char *instr; 02750 02751 switch(ins->handler_idx) 02752 { 02753 case WINED3DSIH_LOG: instr = "LG2"; break; 02754 case WINED3DSIH_LOGP: instr = "LOG"; break; 02755 default: 02756 ERR("Unexpected instruction %d\n", ins->handler_idx); 02757 return; 02758 } 02759 02760 /* LOG and LOGP operate on the absolute value of the input */ 02761 src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs); 02762 02763 shader_arb_get_dst_param(ins, &ins->dst[0], dst); 02764 shader_arb_get_src_param(ins, &src0_copy, 0, src0); 02765 02766 if(need_abs) 02767 { 02768 shader_addline(buffer, "ABS TA, %s;\n", src0); 02769 shader_addline(buffer, "%s%s %s, TA;\n", instr, shader_arb_get_modifier(ins), dst); 02770 } 02771 else 02772 { 02773 shader_addline(buffer, "%s%s %s, %s;\n", instr, shader_arb_get_modifier(ins), dst, src0); 02774 } 02775 } 02776 02777 static void shader_hw_pow(const struct wined3d_shader_instruction *ins) 02778 { 02779 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02780 char src0[50], src1[50], dst[50]; 02781 struct wined3d_shader_src_param src0_copy = ins->src[0]; 02782 BOOL need_abs = FALSE; 02783 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 02784 const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE); 02785 02786 /* POW operates on the absolute value of the input */ 02787 src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs); 02788 02789 shader_arb_get_dst_param(ins, &ins->dst[0], dst); 02790 shader_arb_get_src_param(ins, &src0_copy, 0, src0); 02791 shader_arb_get_src_param(ins, &ins->src[1], 1, src1); 02792 02793 if (need_abs) 02794 shader_addline(buffer, "ABS TA.x, %s;\n", src0); 02795 else 02796 shader_addline(buffer, "MOV TA.x, %s;\n", src0); 02797 02798 if (priv->target_version >= NV2) 02799 { 02800 shader_addline(buffer, "MOVC TA.y, %s;\n", src1); 02801 shader_addline(buffer, "POW%s %s, TA.x, TA.y;\n", shader_arb_get_modifier(ins), dst); 02802 shader_addline(buffer, "MOV %s (EQ.y), %s;\n", dst, one); 02803 } 02804 else 02805 { 02806 const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO); 02807 const char *flt_eps = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_EPS); 02808 02809 shader_addline(buffer, "ABS TA.y, %s;\n", src1); 02810 shader_addline(buffer, "SGE TA.y, -TA.y, %s;\n", zero); 02811 /* Possibly add flt_eps to avoid getting float special values */ 02812 shader_addline(buffer, "MAD TA.z, TA.y, %s, %s;\n", flt_eps, src1); 02813 shader_addline(buffer, "POW%s TA.x, TA.x, TA.z;\n", shader_arb_get_modifier(ins)); 02814 shader_addline(buffer, "MAD TA.x, -TA.x, TA.y, TA.x;\n"); 02815 shader_addline(buffer, "MAD %s, TA.y, %s, TA.x;\n", dst, one); 02816 } 02817 } 02818 02819 static void shader_hw_loop(const struct wined3d_shader_instruction *ins) 02820 { 02821 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02822 char src_name[50]; 02823 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 02824 02825 /* src0 is aL */ 02826 shader_arb_get_src_param(ins, &ins->src[1], 0, src_name); 02827 02828 if(vshader) 02829 { 02830 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 02831 struct list *e = list_head(&priv->control_frames); 02832 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 02833 02834 if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n"); 02835 /* The constant loader makes sure to load -1 into iX.w */ 02836 shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name); 02837 shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop); 02838 shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop); 02839 } 02840 else 02841 { 02842 shader_addline(buffer, "LOOP %s;\n", src_name); 02843 } 02844 } 02845 02846 static void shader_hw_rep(const struct wined3d_shader_instruction *ins) 02847 { 02848 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02849 char src_name[50]; 02850 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 02851 02852 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name); 02853 02854 /* The constant loader makes sure to load -1 into iX.w */ 02855 if(vshader) 02856 { 02857 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 02858 struct list *e = list_head(&priv->control_frames); 02859 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 02860 02861 if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n"); 02862 02863 shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name); 02864 shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop); 02865 shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop); 02866 } 02867 else 02868 { 02869 shader_addline(buffer, "REP %s;\n", src_name); 02870 } 02871 } 02872 02873 static void shader_hw_endloop(const struct wined3d_shader_instruction *ins) 02874 { 02875 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02876 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 02877 02878 if(vshader) 02879 { 02880 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 02881 struct list *e = list_head(&priv->control_frames); 02882 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 02883 02884 shader_addline(buffer, "ARAC aL.xy, aL;\n"); 02885 shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop); 02886 shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop); 02887 02888 if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n"); 02889 } 02890 else 02891 { 02892 shader_addline(buffer, "ENDLOOP;\n"); 02893 } 02894 } 02895 02896 static void shader_hw_endrep(const struct wined3d_shader_instruction *ins) 02897 { 02898 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02899 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 02900 02901 if(vshader) 02902 { 02903 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 02904 struct list *e = list_head(&priv->control_frames); 02905 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 02906 02907 shader_addline(buffer, "ARAC aL.xy, aL;\n"); 02908 shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop); 02909 shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop); 02910 02911 if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n"); 02912 } 02913 else 02914 { 02915 shader_addline(buffer, "ENDREP;\n"); 02916 } 02917 } 02918 02919 static const struct control_frame *find_last_loop(const struct shader_arb_ctx_priv *priv) 02920 { 02921 struct control_frame *control_frame; 02922 02923 LIST_FOR_EACH_ENTRY(control_frame, &priv->control_frames, struct control_frame, entry) 02924 { 02925 if(control_frame->type == LOOP || control_frame->type == REP) return control_frame; 02926 } 02927 ERR("Could not find loop for break\n"); 02928 return NULL; 02929 } 02930 02931 static void shader_hw_break(const struct wined3d_shader_instruction *ins) 02932 { 02933 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02934 const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data); 02935 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 02936 02937 if(vshader) 02938 { 02939 shader_addline(buffer, "BRA loop_%u_end;\n", control_frame->no.loop); 02940 } 02941 else 02942 { 02943 shader_addline(buffer, "BRK;\n"); 02944 } 02945 } 02946 02947 static const char *get_compare(enum wined3d_shader_rel_op op) 02948 { 02949 switch (op) 02950 { 02951 case WINED3D_SHADER_REL_OP_GT: return "GT"; 02952 case WINED3D_SHADER_REL_OP_EQ: return "EQ"; 02953 case WINED3D_SHADER_REL_OP_GE: return "GE"; 02954 case WINED3D_SHADER_REL_OP_LT: return "LT"; 02955 case WINED3D_SHADER_REL_OP_NE: return "NE"; 02956 case WINED3D_SHADER_REL_OP_LE: return "LE"; 02957 default: 02958 FIXME("Unrecognized operator %#x.\n", op); 02959 return "(\?\?)"; 02960 } 02961 } 02962 02963 static enum wined3d_shader_rel_op invert_compare(enum wined3d_shader_rel_op op) 02964 { 02965 switch (op) 02966 { 02967 case WINED3D_SHADER_REL_OP_GT: return WINED3D_SHADER_REL_OP_LE; 02968 case WINED3D_SHADER_REL_OP_EQ: return WINED3D_SHADER_REL_OP_NE; 02969 case WINED3D_SHADER_REL_OP_GE: return WINED3D_SHADER_REL_OP_LT; 02970 case WINED3D_SHADER_REL_OP_LT: return WINED3D_SHADER_REL_OP_GE; 02971 case WINED3D_SHADER_REL_OP_NE: return WINED3D_SHADER_REL_OP_EQ; 02972 case WINED3D_SHADER_REL_OP_LE: return WINED3D_SHADER_REL_OP_GT; 02973 default: 02974 FIXME("Unrecognized operator %#x.\n", op); 02975 return -1; 02976 } 02977 } 02978 02979 static void shader_hw_breakc(const struct wined3d_shader_instruction *ins) 02980 { 02981 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 02982 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 02983 const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data); 02984 char src_name0[50]; 02985 char src_name1[50]; 02986 const char *comp = get_compare(ins->flags); 02987 02988 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 02989 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 02990 02991 if(vshader) 02992 { 02993 /* SUBC CC, src0, src1" works only in pixel shaders, so use TA to throw 02994 * away the subtraction result 02995 */ 02996 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 02997 shader_addline(buffer, "BRA loop_%u_end (%s.x);\n", control_frame->no.loop, comp); 02998 } 02999 else 03000 { 03001 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 03002 shader_addline(buffer, "BRK (%s.x);\n", comp); 03003 } 03004 } 03005 03006 static void shader_hw_ifc(const struct wined3d_shader_instruction *ins) 03007 { 03008 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 03009 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 03010 struct list *e = list_head(&priv->control_frames); 03011 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 03012 const char *comp; 03013 char src_name0[50]; 03014 char src_name1[50]; 03015 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 03016 03017 shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0); 03018 shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1); 03019 03020 if(vshader) 03021 { 03022 /* Invert the flag. We jump to the else label if the condition is NOT true */ 03023 comp = get_compare(invert_compare(ins->flags)); 03024 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 03025 shader_addline(buffer, "BRA ifc_%u_else (%s.x);\n", control_frame->no.ifc, comp); 03026 } 03027 else 03028 { 03029 comp = get_compare(ins->flags); 03030 shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1); 03031 shader_addline(buffer, "IF %s.x;\n", comp); 03032 } 03033 } 03034 03035 static void shader_hw_else(const struct wined3d_shader_instruction *ins) 03036 { 03037 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 03038 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 03039 struct list *e = list_head(&priv->control_frames); 03040 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 03041 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 03042 03043 if(vshader) 03044 { 03045 shader_addline(buffer, "BRA ifc_%u_endif;\n", control_frame->no.ifc); 03046 shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc); 03047 control_frame->had_else = TRUE; 03048 } 03049 else 03050 { 03051 shader_addline(buffer, "ELSE;\n"); 03052 } 03053 } 03054 03055 static void shader_hw_endif(const struct wined3d_shader_instruction *ins) 03056 { 03057 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 03058 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 03059 struct list *e = list_head(&priv->control_frames); 03060 struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry); 03061 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 03062 03063 if(vshader) 03064 { 03065 if(control_frame->had_else) 03066 { 03067 shader_addline(buffer, "ifc_%u_endif:\n", control_frame->no.ifc); 03068 } 03069 else 03070 { 03071 shader_addline(buffer, "#No else branch. else is endif\n"); 03072 shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc); 03073 } 03074 } 03075 else 03076 { 03077 shader_addline(buffer, "ENDIF;\n"); 03078 } 03079 } 03080 03081 static void shader_hw_texldd(const struct wined3d_shader_instruction *ins) 03082 { 03083 DWORD sampler_idx = ins->src[1].reg.idx; 03084 char reg_dest[40]; 03085 char reg_src[3][40]; 03086 WORD flags = TEX_DERIV; 03087 03088 shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest); 03089 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src[0]); 03090 shader_arb_get_src_param(ins, &ins->src[2], 1, reg_src[1]); 03091 shader_arb_get_src_param(ins, &ins->src[3], 2, reg_src[2]); 03092 03093 if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ; 03094 if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS; 03095 03096 shader_hw_sample(ins, sampler_idx, reg_dest, reg_src[0], flags, reg_src[1], reg_src[2]); 03097 } 03098 03099 static void shader_hw_texldl(const struct wined3d_shader_instruction *ins) 03100 { 03101 DWORD sampler_idx = ins->src[1].reg.idx; 03102 char reg_dest[40]; 03103 char reg_coord[40]; 03104 WORD flags = TEX_LOD; 03105 03106 shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest); 03107 shader_arb_get_src_param(ins, &ins->src[0], 0, reg_coord); 03108 03109 if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ; 03110 if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS; 03111 03112 shader_hw_sample(ins, sampler_idx, reg_dest, reg_coord, flags, NULL, NULL); 03113 } 03114 03115 static void shader_hw_label(const struct wined3d_shader_instruction *ins) 03116 { 03117 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 03118 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 03119 03120 priv->in_main_func = FALSE; 03121 /* Call instructions activate the NV extensions, not labels and rets. If there is an uncalled 03122 * subroutine, don't generate a label that will make GL complain 03123 */ 03124 if(priv->target_version == ARB) return; 03125 03126 shader_addline(buffer, "l%u:\n", ins->src[0].reg.idx); 03127 } 03128 03129 static void vshader_add_footer(struct shader_arb_ctx_priv *priv_ctx, 03130 const struct arb_vshader_private *shader_data, const struct arb_vs_compile_args *args, 03131 const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info, 03132 struct wined3d_shader_buffer *buffer) 03133 { 03134 unsigned int i; 03135 03136 /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used 03137 * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE), 03138 * the fog frag coord is thrown away. If the fog frag coord is used, but not written by 03139 * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0) 03140 */ 03141 if (args->super.fog_src == VS_FOG_Z) 03142 shader_addline(buffer, "MOV result.fogcoord, TMP_OUT.z;\n"); 03143 else if (!reg_maps->fog) 03144 /* posFixup.x is always 1.0, so we can safely use it */ 03145 shader_addline(buffer, "ADD result.fogcoord, posFixup.x, -posFixup.x;\n"); 03146 03147 /* Clipplanes are always stored without y inversion */ 03148 if (use_nv_clip(gl_info) && priv_ctx->target_version >= NV2) 03149 { 03150 if (args->super.clip_enabled) 03151 { 03152 for (i = 0; i < priv_ctx->vs_clipplanes; i++) 03153 { 03154 shader_addline(buffer, "DP4 result.clip[%u].x, TMP_OUT, state.clip[%u].plane;\n", i, i); 03155 } 03156 } 03157 } 03158 else if (args->clip.boolclip.clip_texcoord) 03159 { 03160 unsigned int cur_clip = 0; 03161 char component[4] = {'x', 'y', 'z', 'w'}; 03162 const char *zero = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ZERO); 03163 03164 for (i = 0; i < gl_info->limits.clipplanes; ++i) 03165 { 03166 if (args->clip.boolclip.clipplane_mask & (1 << i)) 03167 { 03168 shader_addline(buffer, "DP4 TA.%c, TMP_OUT, state.clip[%u].plane;\n", 03169 component[cur_clip++], i); 03170 } 03171 } 03172 switch (cur_clip) 03173 { 03174 case 0: 03175 shader_addline(buffer, "MOV TA, %s;\n", zero); 03176 break; 03177 case 1: 03178 shader_addline(buffer, "MOV TA.yzw, %s;\n", zero); 03179 break; 03180 case 2: 03181 shader_addline(buffer, "MOV TA.zw, %s;\n", zero); 03182 break; 03183 case 3: 03184 shader_addline(buffer, "MOV TA.w, %s;\n", zero); 03185 break; 03186 } 03187 shader_addline(buffer, "MOV result.texcoord[%u], TA;\n", 03188 args->clip.boolclip.clip_texcoord - 1); 03189 } 03190 03191 /* Write the final position. 03192 * 03193 * OpenGL coordinates specify the center of the pixel while d3d coords specify 03194 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains 03195 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x 03196 * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that. 03197 */ 03198 shader_addline(buffer, "MUL TA, posFixup, TMP_OUT.w;\n"); 03199 shader_addline(buffer, "ADD TMP_OUT.x, TMP_OUT.x, TA.z;\n"); 03200 shader_addline(buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, TA.w;\n"); 03201 03202 /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c 03203 * and the glsl equivalent 03204 */ 03205 if (need_helper_const(shader_data, reg_maps, gl_info)) 03206 { 03207 const char *two = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_TWO); 03208 shader_addline(buffer, "MAD TMP_OUT.z, TMP_OUT.z, %s, -TMP_OUT.w;\n", two); 03209 } 03210 else 03211 { 03212 shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, TMP_OUT.z;\n"); 03213 shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, -TMP_OUT.w;\n"); 03214 } 03215 03216 shader_addline(buffer, "MOV result.position, TMP_OUT;\n"); 03217 03218 priv_ctx->footer_written = TRUE; 03219 } 03220 03221 static void shader_hw_ret(const struct wined3d_shader_instruction *ins) 03222 { 03223 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 03224 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 03225 const struct wined3d_shader *shader = ins->ctx->shader; 03226 BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type); 03227 03228 if(priv->target_version == ARB) return; 03229 03230 if(vshader) 03231 { 03232 if (priv->in_main_func) vshader_add_footer(priv, shader->backend_data, 03233 priv->cur_vs_args, ins->ctx->reg_maps, ins->ctx->gl_info, buffer); 03234 } 03235 03236 shader_addline(buffer, "RET;\n"); 03237 } 03238 03239 static void shader_hw_call(const struct wined3d_shader_instruction *ins) 03240 { 03241 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 03242 shader_addline(buffer, "CAL l%u;\n", ins->src[0].reg.idx); 03243 } 03244 03245 /* GL locking is done by the caller */ 03246 static GLuint create_arb_blt_vertex_program(const struct wined3d_gl_info *gl_info) 03247 { 03248 GLuint program_id = 0; 03249 GLint pos; 03250 03251 const char *blt_vprogram = 03252 "!!ARBvp1.0\n" 03253 "PARAM c[1] = { { 1, 0.5 } };\n" 03254 "MOV result.position, vertex.position;\n" 03255 "MOV result.color, c[0].x;\n" 03256 "MOV result.texcoord[0], vertex.texcoord[0];\n" 03257 "END\n"; 03258 03259 GL_EXTCALL(glGenProgramsARB(1, &program_id)); 03260 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, program_id)); 03261 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, 03262 strlen(blt_vprogram), blt_vprogram)); 03263 checkGLcall("glProgramStringARB()"); 03264 03265 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); 03266 if (pos != -1) 03267 { 03268 FIXME("Vertex program error at position %d: %s\n\n", pos, 03269 debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 03270 shader_arb_dump_program_source(blt_vprogram); 03271 } 03272 else 03273 { 03274 GLint native; 03275 03276 GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 03277 checkGLcall("glGetProgramivARB()"); 03278 if (!native) WARN("Program exceeds native resource limits.\n"); 03279 } 03280 03281 return program_id; 03282 } 03283 03284 /* GL locking is done by the caller */ 03285 static GLuint create_arb_blt_fragment_program(const struct wined3d_gl_info *gl_info, 03286 enum tex_types tex_type, BOOL masked) 03287 { 03288 GLuint program_id = 0; 03289 const char *fprogram; 03290 GLint pos; 03291 03292 static const char * const blt_fprograms_full[tex_type_count] = 03293 { 03294 /* tex_1d */ 03295 NULL, 03296 /* tex_2d */ 03297 "!!ARBfp1.0\n" 03298 "TEMP R0;\n" 03299 "TEX R0.x, fragment.texcoord[0], texture[0], 2D;\n" 03300 "MOV result.depth.z, R0.x;\n" 03301 "END\n", 03302 /* tex_3d */ 03303 NULL, 03304 /* tex_cube */ 03305 "!!ARBfp1.0\n" 03306 "TEMP R0;\n" 03307 "TEX R0.x, fragment.texcoord[0], texture[0], CUBE;\n" 03308 "MOV result.depth.z, R0.x;\n" 03309 "END\n", 03310 /* tex_rect */ 03311 "!!ARBfp1.0\n" 03312 "TEMP R0;\n" 03313 "TEX R0.x, fragment.texcoord[0], texture[0], RECT;\n" 03314 "MOV result.depth.z, R0.x;\n" 03315 "END\n", 03316 }; 03317 03318 static const char * const blt_fprograms_masked[tex_type_count] = 03319 { 03320 /* tex_1d */ 03321 NULL, 03322 /* tex_2d */ 03323 "!!ARBfp1.0\n" 03324 "PARAM mask = program.local[0];\n" 03325 "TEMP R0;\n" 03326 "SLT R0.xy, fragment.position, mask.zwzw;\n" 03327 "MUL R0.x, R0.x, R0.y;\n" 03328 "KIL -R0.x;\n" 03329 "TEX R0.x, fragment.texcoord[0], texture[0], 2D;\n" 03330 "MOV result.depth.z, R0.x;\n" 03331 "END\n", 03332 /* tex_3d */ 03333 NULL, 03334 /* tex_cube */ 03335 "!!ARBfp1.0\n" 03336 "PARAM mask = program.local[0];\n" 03337 "TEMP R0;\n" 03338 "SLT R0.xy, fragment.position, mask.zwzw;\n" 03339 "MUL R0.x, R0.x, R0.y;\n" 03340 "KIL -R0.x;\n" 03341 "TEX R0.x, fragment.texcoord[0], texture[0], CUBE;\n" 03342 "MOV result.depth.z, R0.x;\n" 03343 "END\n", 03344 /* tex_rect */ 03345 "!!ARBfp1.0\n" 03346 "PARAM mask = program.local[0];\n" 03347 "TEMP R0;\n" 03348 "SLT R0.xy, fragment.position, mask.zwzw;\n" 03349 "MUL R0.x, R0.x, R0.y;\n" 03350 "KIL -R0.x;\n" 03351 "TEX R0.x, fragment.texcoord[0], texture[0], RECT;\n" 03352 "MOV result.depth.z, R0.x;\n" 03353 "END\n", 03354 }; 03355 03356 fprogram = masked ? blt_fprograms_masked[tex_type] : blt_fprograms_full[tex_type]; 03357 if (!fprogram) 03358 { 03359 FIXME("tex_type %#x not supported, falling back to tex_2d\n", tex_type); 03360 tex_type = tex_2d; 03361 fprogram = masked ? blt_fprograms_masked[tex_type] : blt_fprograms_full[tex_type]; 03362 } 03363 03364 GL_EXTCALL(glGenProgramsARB(1, &program_id)); 03365 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, program_id)); 03366 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(fprogram), fprogram)); 03367 checkGLcall("glProgramStringARB()"); 03368 03369 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); 03370 if (pos != -1) 03371 { 03372 FIXME("Fragment program error at position %d: %s\n\n", pos, 03373 debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 03374 shader_arb_dump_program_source(fprogram); 03375 } 03376 else 03377 { 03378 GLint native; 03379 03380 GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 03381 checkGLcall("glGetProgramivARB()"); 03382 if (!native) WARN("Program exceeds native resource limits.\n"); 03383 } 03384 03385 return program_id; 03386 } 03387 03388 static void arbfp_add_sRGB_correction(struct wined3d_shader_buffer *buffer, const char *fragcolor, 03389 const char *tmp1, const char *tmp2, const char *tmp3, const char *tmp4, BOOL condcode) 03390 { 03391 /* Perform sRGB write correction. See GLX_EXT_framebuffer_sRGB */ 03392 03393 if(condcode) 03394 { 03395 /* Sigh. MOVC CC doesn't work, so use one of the temps as dummy dest */ 03396 shader_addline(buffer, "SUBC %s, %s.x, srgb_consts1.y;\n", tmp1, fragcolor); 03397 /* Calculate the > 0.0031308 case */ 03398 shader_addline(buffer, "POW %s.x (GE), %s.x, srgb_consts1.z;\n", fragcolor, fragcolor); 03399 shader_addline(buffer, "POW %s.y (GE), %s.y, srgb_consts1.z;\n", fragcolor, fragcolor); 03400 shader_addline(buffer, "POW %s.z (GE), %s.z, srgb_consts1.z;\n", fragcolor, fragcolor); 03401 shader_addline(buffer, "MUL %s.xyz (GE), %s, srgb_consts1.w;\n", fragcolor, fragcolor); 03402 shader_addline(buffer, "SUB %s.xyz (GE), %s, srgb_consts2.x;\n", fragcolor, fragcolor); 03403 /* Calculate the < case */ 03404 shader_addline(buffer, "MUL %s.xyz (LT), srgb_consts1.x, %s;\n", fragcolor, fragcolor); 03405 } 03406 else 03407 { 03408 /* Calculate the > 0.0031308 case */ 03409 shader_addline(buffer, "POW %s.x, %s.x, srgb_consts1.z;\n", tmp1, fragcolor); 03410 shader_addline(buffer, "POW %s.y, %s.y, srgb_consts1.z;\n", tmp1, fragcolor); 03411 shader_addline(buffer, "POW %s.z, %s.z, srgb_consts1.z;\n", tmp1, fragcolor); 03412 shader_addline(buffer, "MUL %s, %s, srgb_consts1.w;\n", tmp1, tmp1); 03413 shader_addline(buffer, "SUB %s, %s, srgb_consts2.x;\n", tmp1, tmp1); 03414 /* Calculate the < case */ 03415 shader_addline(buffer, "MUL %s, srgb_consts1.x, %s;\n", tmp2, fragcolor); 03416 /* Get 1.0 / 0.0 masks for > 0.0031308 and < 0.0031308 */ 03417 shader_addline(buffer, "SLT %s, srgb_consts1.y, %s;\n", tmp3, fragcolor); 03418 shader_addline(buffer, "SGE %s, srgb_consts1.y, %s;\n", tmp4, fragcolor); 03419 /* Store the components > 0.0031308 in the destination */ 03420 shader_addline(buffer, "MUL %s.xyz, %s, %s;\n", fragcolor, tmp1, tmp3); 03421 /* Add the components that are < 0.0031308 */ 03422 shader_addline(buffer, "MAD %s.xyz, %s, %s, %s;\n", fragcolor, tmp2, tmp4, fragcolor); 03423 /* Move everything into result.color at once. Nvidia hardware cannot handle partial 03424 * result.color writes(.rgb first, then .a), or handle overwriting already written 03425 * components. The assembler uses a temporary register in this case, which is usually 03426 * not allocated from one of our registers that were used earlier. 03427 */ 03428 } 03429 /* [0.0;1.0] clamping. Not needed, this is done implicitly */ 03430 } 03431 03432 static const DWORD *find_loop_control_values(const struct wined3d_shader *shader, DWORD idx) 03433 { 03434 const struct wined3d_shader_lconst *constant; 03435 03436 LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry) 03437 { 03438 if (constant->idx == idx) 03439 { 03440 return constant->value; 03441 } 03442 } 03443 return NULL; 03444 } 03445 03446 static void init_ps_input(const struct wined3d_shader *shader, 03447 const struct arb_ps_compile_args *args, struct shader_arb_ctx_priv *priv) 03448 { 03449 static const char * const texcoords[8] = 03450 { 03451 "fragment.texcoord[0]", "fragment.texcoord[1]", "fragment.texcoord[2]", "fragment.texcoord[3]", 03452 "fragment.texcoord[4]", "fragment.texcoord[5]", "fragment.texcoord[6]", "fragment.texcoord[7]" 03453 }; 03454 unsigned int i; 03455 const struct wined3d_shader_signature_element *sig = shader->input_signature; 03456 const char *semantic_name; 03457 DWORD semantic_idx; 03458 03459 switch(args->super.vp_mode) 03460 { 03461 case pretransformed: 03462 case fixedfunction: 03463 /* The pixelshader has to collect the varyings on its own. In any case properly load 03464 * color0 and color1. In the case of pretransformed vertices also load texcoords. Set 03465 * other attribs to 0.0. 03466 * 03467 * For fixedfunction this behavior is correct, according to the tests. For pretransformed 03468 * we'd either need a replacement shader that can load other attribs like BINORMAL, or 03469 * load the texcoord attrib pointers to match the pixel shader signature 03470 */ 03471 for(i = 0; i < MAX_REG_INPUT; i++) 03472 { 03473 semantic_name = sig[i].semantic_name; 03474 semantic_idx = sig[i].semantic_idx; 03475 if (!semantic_name) continue; 03476 03477 if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR)) 03478 { 03479 if (!semantic_idx) priv->ps_input[i] = "fragment.color.primary"; 03480 else if(semantic_idx == 1) priv->ps_input[i] = "fragment.color.secondary"; 03481 else priv->ps_input[i] = "0.0"; 03482 } 03483 else if(args->super.vp_mode == fixedfunction) 03484 { 03485 priv->ps_input[i] = "0.0"; 03486 } 03487 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD)) 03488 { 03489 if(semantic_idx < 8) priv->ps_input[i] = texcoords[semantic_idx]; 03490 else priv->ps_input[i] = "0.0"; 03491 } 03492 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG)) 03493 { 03494 if (!semantic_idx) priv->ps_input[i] = "fragment.fogcoord"; 03495 else priv->ps_input[i] = "0.0"; 03496 } 03497 else 03498 { 03499 priv->ps_input[i] = "0.0"; 03500 } 03501 03502 TRACE("v%u, semantic %s%u is %s\n", i, semantic_name, semantic_idx, priv->ps_input[i]); 03503 } 03504 break; 03505 03506 case vertexshader: 03507 /* That one is easy. The vertex shaders provide v0-v7 in fragment.texcoord and v8 and v9 in 03508 * fragment.color 03509 */ 03510 for(i = 0; i < 8; i++) 03511 { 03512 priv->ps_input[i] = texcoords[i]; 03513 } 03514 priv->ps_input[8] = "fragment.color.primary"; 03515 priv->ps_input[9] = "fragment.color.secondary"; 03516 break; 03517 } 03518 } 03519 03520 /* GL locking is done by the caller */ 03521 static GLuint shader_arb_generate_pshader(const struct wined3d_shader *shader, 03522 const struct wined3d_gl_info *gl_info, struct wined3d_shader_buffer *buffer, 03523 const struct arb_ps_compile_args *args, struct arb_ps_compiled_shader *compiled) 03524 { 03525 const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 03526 const struct wined3d_shader_lconst *lconst; 03527 const DWORD *function = shader->function; 03528 GLuint retval; 03529 char fragcolor[16]; 03530 DWORD *lconst_map = local_const_mapping(shader), next_local; 03531 struct shader_arb_ctx_priv priv_ctx; 03532 BOOL dcl_td = FALSE; 03533 BOOL want_nv_prog = FALSE; 03534 struct arb_pshader_private *shader_priv = shader->backend_data; 03535 GLint errPos; 03536 DWORD map; 03537 03538 char srgbtmp[4][4]; 03539 unsigned int i, found = 0; 03540 03541 for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i) 03542 { 03543 if (!(map & 1) 03544 || (shader->u.ps.color0_mov && i == shader->u.ps.color0_reg) 03545 || (reg_maps->shader_version.major < 2 && !i)) 03546 continue; 03547 03548 sprintf(srgbtmp[found], "R%u", i); 03549 ++found; 03550 if (found == 4) break; 03551 } 03552 03553 switch(found) { 03554 case 0: 03555 sprintf(srgbtmp[0], "TA"); 03556 sprintf(srgbtmp[1], "TB"); 03557 sprintf(srgbtmp[2], "TC"); 03558 sprintf(srgbtmp[3], "TD"); 03559 dcl_td = TRUE; 03560 break; 03561 case 1: 03562 sprintf(srgbtmp[1], "TA"); 03563 sprintf(srgbtmp[2], "TB"); 03564 sprintf(srgbtmp[3], "TC"); 03565 break; 03566 case 2: 03567 sprintf(srgbtmp[2], "TA"); 03568 sprintf(srgbtmp[3], "TB"); 03569 break; 03570 case 3: 03571 sprintf(srgbtmp[3], "TA"); 03572 break; 03573 case 4: 03574 break; 03575 } 03576 03577 /* Create the hw ARB shader */ 03578 memset(&priv_ctx, 0, sizeof(priv_ctx)); 03579 priv_ctx.cur_ps_args = args; 03580 priv_ctx.compiled_fprog = compiled; 03581 priv_ctx.cur_np2fixup_info = &compiled->np2fixup_info; 03582 init_ps_input(shader, args, &priv_ctx); 03583 list_init(&priv_ctx.control_frames); 03584 03585 /* Avoid enabling NV_fragment_program* if we do not need it. 03586 * 03587 * Enabling GL_NV_fragment_program_option causes the driver to occupy a temporary register, 03588 * and it slows down the shader execution noticeably(about 5%). Usually our instruction emulation 03589 * is faster than what we gain from using higher native instructions. There are some things though 03590 * that cannot be emulated. In that case enable the extensions. 03591 * If the extension is enabled, instruction handlers that support both ways will use it. 03592 * 03593 * Testing shows no performance difference between OPTION NV_fragment_program2 and NV_fragment_program. 03594 * So enable the best we can get. 03595 */ 03596 if(reg_maps->usesdsx || reg_maps->usesdsy || reg_maps->loop_depth > 0 || reg_maps->usestexldd || 03597 reg_maps->usestexldl || reg_maps->usesfacing || reg_maps->usesifc || reg_maps->usescall) 03598 { 03599 want_nv_prog = TRUE; 03600 } 03601 03602 shader_addline(buffer, "!!ARBfp1.0\n"); 03603 if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM2]) 03604 { 03605 shader_addline(buffer, "OPTION NV_fragment_program2;\n"); 03606 priv_ctx.target_version = NV3; 03607 } 03608 else if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION]) 03609 { 03610 shader_addline(buffer, "OPTION NV_fragment_program;\n"); 03611 priv_ctx.target_version = NV2; 03612 } else { 03613 if(want_nv_prog) 03614 { 03615 /* This is an error - either we're advertising the wrong shader version, or aren't enforcing some 03616 * limits properly 03617 */ 03618 ERR("The shader requires instructions that are not available in plain GL_ARB_fragment_program\n"); 03619 ERR("Try GLSL\n"); 03620 } 03621 priv_ctx.target_version = ARB; 03622 } 03623 03624 if (reg_maps->rt_mask > 1) 03625 { 03626 shader_addline(buffer, "OPTION ARB_draw_buffers;\n"); 03627 } 03628 03629 if (reg_maps->shader_version.major < 3) 03630 { 03631 switch(args->super.fog) { 03632 case FOG_OFF: 03633 break; 03634 case FOG_LINEAR: 03635 shader_addline(buffer, "OPTION ARB_fog_linear;\n"); 03636 break; 03637 case FOG_EXP: 03638 shader_addline(buffer, "OPTION ARB_fog_exp;\n"); 03639 break; 03640 case FOG_EXP2: 03641 shader_addline(buffer, "OPTION ARB_fog_exp2;\n"); 03642 break; 03643 } 03644 } 03645 03646 /* For now always declare the temps. At least the Nvidia assembler optimizes completely 03647 * unused temps away(but occupies them for the whole shader if they're used once). Always 03648 * declaring them avoids tricky bookkeeping work 03649 */ 03650 shader_addline(buffer, "TEMP TA;\n"); /* Used for modifiers */ 03651 shader_addline(buffer, "TEMP TB;\n"); /* Used for modifiers */ 03652 shader_addline(buffer, "TEMP TC;\n"); /* Used for modifiers */ 03653 if(dcl_td) shader_addline(buffer, "TEMP TD;\n"); /* Used for sRGB writing */ 03654 shader_addline(buffer, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n"); 03655 shader_addline(buffer, "PARAM coefmul = { 2, 4, 8, 16 };\n"); 03656 shader_addline(buffer, "PARAM ps_helper_const = { 0.0, 1.0, %1.10f, 0.0 };\n", eps); 03657 03658 if (reg_maps->shader_version.major < 2) 03659 { 03660 strcpy(fragcolor, "R0"); 03661 } 03662 else 03663 { 03664 if (args->super.srgb_correction) 03665 { 03666 if (shader->u.ps.color0_mov) 03667 { 03668 sprintf(fragcolor, "R%u", shader->u.ps.color0_reg); 03669 } 03670 else 03671 { 03672 shader_addline(buffer, "TEMP TMP_COLOR;\n"); 03673 strcpy(fragcolor, "TMP_COLOR"); 03674 } 03675 } else { 03676 strcpy(fragcolor, "result.color"); 03677 } 03678 } 03679 03680 if(args->super.srgb_correction) { 03681 shader_addline(buffer, "PARAM srgb_consts1 = {%f, %f, %f, %f};\n", 03682 srgb_mul_low, srgb_cmp, srgb_pow, srgb_mul_high); 03683 shader_addline(buffer, "PARAM srgb_consts2 = {%f, %f, %f, %f};\n", 03684 srgb_sub_high, 0.0, 0.0, 0.0); 03685 } 03686 03687 /* Base Declarations */ 03688 next_local = shader_generate_arb_declarations(shader, reg_maps, 03689 buffer, gl_info, lconst_map, NULL, &priv_ctx); 03690 03691 for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i) 03692 { 03693 unsigned char bump_const; 03694 03695 if (!(map & 1)) continue; 03696 03697 bump_const = compiled->numbumpenvmatconsts; 03698 compiled->bumpenvmatconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED; 03699 compiled->bumpenvmatconst[bump_const].texunit = i; 03700 compiled->luminanceconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED; 03701 compiled->luminanceconst[bump_const].texunit = i; 03702 03703 /* We can fit the constants into the constant limit for sure because texbem, texbeml, bem and beml are only supported 03704 * in 1.x shaders, and GL_ARB_fragment_program has a constant limit of 24 constants. So in the worst case we're loading 03705 * 8 shader constants, 8 bump matrices and 8 luminance parameters and are perfectly fine. (No NP2 fixup on bumpmapped 03706 * textures due to conditional NP2 restrictions) 03707 * 03708 * Use local constants to load the bump env parameters, not program.env. This avoids collisions with d3d constants of 03709 * shaders in newer shader models. Since the bump env parameters have to share their space with NP2 fixup constants, 03710 * their location is shader dependent anyway and they cannot be loaded globally. 03711 */ 03712 compiled->bumpenvmatconst[bump_const].const_num = next_local++; 03713 shader_addline(buffer, "PARAM bumpenvmat%d = program.local[%d];\n", 03714 i, compiled->bumpenvmatconst[bump_const].const_num); 03715 compiled->numbumpenvmatconsts = bump_const + 1; 03716 03717 if (!(reg_maps->luminanceparams & (1 << i))) continue; 03718 03719 compiled->luminanceconst[bump_const].const_num = next_local++; 03720 shader_addline(buffer, "PARAM luminance%d = program.local[%d];\n", 03721 i, compiled->luminanceconst[bump_const].const_num); 03722 } 03723 03724 for(i = 0; i < MAX_CONST_I; i++) 03725 { 03726 compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED; 03727 if (reg_maps->integer_constants & (1 << i) && priv_ctx.target_version >= NV2) 03728 { 03729 const DWORD *control_values = find_loop_control_values(shader, i); 03730 03731 if(control_values) 03732 { 03733 shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i, 03734 control_values[0], control_values[1], control_values[2]); 03735 } 03736 else 03737 { 03738 compiled->int_consts[i] = next_local; 03739 compiled->num_int_consts++; 03740 shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++); 03741 } 03742 } 03743 } 03744 03745 if(reg_maps->vpos || reg_maps->usesdsy) 03746 { 03747 compiled->ycorrection = next_local; 03748 shader_addline(buffer, "PARAM ycorrection = program.local[%u];\n", next_local++); 03749 03750 if(reg_maps->vpos) 03751 { 03752 shader_addline(buffer, "TEMP vpos;\n"); 03753 /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen). 03754 * ycorrection.y: -1.0(onscreen), 1.0(offscreen) 03755 * ycorrection.z: 1.0 03756 * ycorrection.w: 0.0 03757 */ 03758 shader_addline(buffer, "MAD vpos, fragment.position, ycorrection.zyww, ycorrection.wxww;\n"); 03759 shader_addline(buffer, "FLR vpos.xy, vpos;\n"); 03760 } 03761 } 03762 else 03763 { 03764 compiled->ycorrection = WINED3D_CONST_NUM_UNUSED; 03765 } 03766 03767 /* Load constants to fixup NP2 texcoords if there are still free constants left: 03768 * Constants (texture dimensions) for the NP2 fixup are loaded as local program parameters. This will consume 03769 * at most 8 (MAX_FRAGMENT_SAMPLERS / 2) parameters, which is highly unlikely, since the application had to 03770 * use 16 NP2 textures at the same time. In case that we run out of constants the fixup is simply not 03771 * applied / activated. This will probably result in wrong rendering of the texture, but will save us from 03772 * shader compilation errors and the subsequent errors when drawing with this shader. */ 03773 if (priv_ctx.cur_ps_args->super.np2_fixup) { 03774 unsigned char cur_fixup_sampler = 0; 03775 03776 struct arb_ps_np2fixup_info* const fixup = priv_ctx.cur_np2fixup_info; 03777 const WORD map = priv_ctx.cur_ps_args->super.np2_fixup; 03778 const UINT max_lconsts = gl_info->limits.arb_ps_local_constants; 03779 03780 fixup->offset = next_local; 03781 fixup->super.active = 0; 03782 03783 for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) { 03784 if (!(map & (1 << i))) continue; 03785 03786 if (fixup->offset + (cur_fixup_sampler >> 1) < max_lconsts) { 03787 fixup->super.active |= (1 << i); 03788 fixup->super.idx[i] = cur_fixup_sampler++; 03789 } else { 03790 FIXME("No free constant found to load NP2 fixup data into shader. " 03791 "Sampling from this texture will probably look wrong.\n"); 03792 break; 03793 } 03794 } 03795 03796 fixup->super.num_consts = (cur_fixup_sampler + 1) >> 1; 03797 if (fixup->super.num_consts) { 03798 shader_addline(buffer, "PARAM np2fixup[%u] = { program.env[%u..%u] };\n", 03799 fixup->super.num_consts, fixup->offset, fixup->super.num_consts + fixup->offset - 1); 03800 } 03801 } 03802 03803 if (shader_priv->clipplane_emulation != ~0U && args->clip) 03804 { 03805 shader_addline(buffer, "KIL fragment.texcoord[%u];\n", shader_priv->clipplane_emulation); 03806 } 03807 03808 /* Base Shader Body */ 03809 shader_generate_main(shader, buffer, reg_maps, function, &priv_ctx); 03810 03811 if(args->super.srgb_correction) { 03812 arbfp_add_sRGB_correction(buffer, fragcolor, srgbtmp[0], srgbtmp[1], srgbtmp[2], srgbtmp[3], 03813 priv_ctx.target_version >= NV2); 03814 } 03815 03816 if(strcmp(fragcolor, "result.color")) { 03817 shader_addline(buffer, "MOV result.color, %s;\n", fragcolor); 03818 } 03819 shader_addline(buffer, "END\n"); 03820 03821 /* TODO: change to resource.glObjectHandle or something like that */ 03822 GL_EXTCALL(glGenProgramsARB(1, &retval)); 03823 03824 TRACE("Creating a hw pixel shader, prg=%d\n", retval); 03825 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, retval)); 03826 03827 TRACE("Created hw pixel shader, prg=%d\n", retval); 03828 /* Create the program and check for errors */ 03829 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, 03830 buffer->bsize, buffer->buffer)); 03831 checkGLcall("glProgramStringARB()"); 03832 03833 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos); 03834 if (errPos != -1) 03835 { 03836 FIXME("HW PixelShader Error at position %d: %s\n\n", 03837 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 03838 shader_arb_dump_program_source(buffer->buffer); 03839 retval = 0; 03840 } 03841 else 03842 { 03843 GLint native; 03844 03845 GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 03846 checkGLcall("glGetProgramivARB()"); 03847 if (!native) WARN("Program exceeds native resource limits.\n"); 03848 } 03849 03850 /* Load immediate constants */ 03851 if (lconst_map) 03852 { 03853 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 03854 { 03855 const float *value = (const float *)lconst->value; 03856 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, lconst_map[lconst->idx], value)); 03857 checkGLcall("glProgramLocalParameter4fvARB"); 03858 } 03859 HeapFree(GetProcessHeap(), 0, lconst_map); 03860 } 03861 03862 return retval; 03863 } 03864 03865 static int compare_sig(const struct wined3d_shader_signature_element *sig1, const struct wined3d_shader_signature_element *sig2) 03866 { 03867 unsigned int i; 03868 int ret; 03869 03870 for(i = 0; i < MAX_REG_INPUT; i++) 03871 { 03872 if (!sig1[i].semantic_name || !sig2[i].semantic_name) 03873 { 03874 /* Compare pointers, not contents. One string is NULL(element does not exist), the other one is not NULL */ 03875 if(sig1[i].semantic_name != sig2[i].semantic_name) return sig1[i].semantic_name < sig2[i].semantic_name ? -1 : 1; 03876 continue; 03877 } 03878 03879 if ((ret = strcmp(sig1[i].semantic_name, sig2[i].semantic_name))) return ret; 03880 if(sig1[i].semantic_idx != sig2[i].semantic_idx) return sig1[i].semantic_idx < sig2[i].semantic_idx ? -1 : 1; 03881 if(sig1[i].sysval_semantic != sig2[i].sysval_semantic) return sig1[i].sysval_semantic < sig2[i].sysval_semantic ? -1 : 1; 03882 if(sig1[i].component_type != sig2[i].component_type) return sig1[i].component_type < sig2[i].component_type ? -1 : 1; 03883 if(sig1[i].register_idx != sig2[i].register_idx) return sig1[i].register_idx < sig2[i].register_idx ? -1 : 1; 03884 if(sig1[i].mask != sig2[i].mask) return sig1[i].mask < sig2[i].mask ? -1 : 1; 03885 } 03886 return 0; 03887 } 03888 03889 static struct wined3d_shader_signature_element *clone_sig(const struct wined3d_shader_signature_element *sig) 03890 { 03891 struct wined3d_shader_signature_element *new; 03892 int i; 03893 char *name; 03894 03895 new = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*new) * MAX_REG_INPUT); 03896 for(i = 0; i < MAX_REG_INPUT; i++) 03897 { 03898 if (!sig[i].semantic_name) continue; 03899 03900 new[i] = sig[i]; 03901 /* Clone the semantic string */ 03902 name = HeapAlloc(GetProcessHeap(), 0, strlen(sig[i].semantic_name) + 1); 03903 strcpy(name, sig[i].semantic_name); 03904 new[i].semantic_name = name; 03905 } 03906 return new; 03907 } 03908 03909 static DWORD find_input_signature(struct shader_arb_priv *priv, const struct wined3d_shader_signature_element *sig) 03910 { 03911 struct wine_rb_entry *entry = wine_rb_get(&priv->signature_tree, sig); 03912 struct ps_signature *found_sig; 03913 03914 if (entry) 03915 { 03916 found_sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 03917 TRACE("Found existing signature %u\n", found_sig->idx); 03918 return found_sig->idx; 03919 } 03920 found_sig = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*sig)); 03921 found_sig->sig = clone_sig(sig); 03922 found_sig->idx = priv->ps_sig_number++; 03923 TRACE("New signature stored and assigned number %u\n", found_sig->idx); 03924 if(wine_rb_put(&priv->signature_tree, sig, &found_sig->entry) == -1) 03925 { 03926 ERR("Failed to insert program entry.\n"); 03927 } 03928 return found_sig->idx; 03929 } 03930 03931 static void init_output_registers(const struct wined3d_shader *shader, DWORD sig_num, 03932 struct shader_arb_ctx_priv *priv_ctx, struct arb_vs_compiled_shader *compiled) 03933 { 03934 unsigned int i, j; 03935 static const char * const texcoords[8] = 03936 { 03937 "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]", 03938 "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]" 03939 }; 03940 struct wined3d_device *device = shader->device; 03941 const struct wined3d_shader_signature_element *sig; 03942 const char *semantic_name; 03943 DWORD semantic_idx, reg_idx; 03944 03945 /* Write generic input varyings 0 to 7 to result.texcoord[], varying 8 to result.color.primary 03946 * and varying 9 to result.color.secondary 03947 */ 03948 static const char * const decl_idx_to_string[MAX_REG_INPUT] = 03949 { 03950 "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]", 03951 "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]", 03952 "result.color.primary", "result.color.secondary" 03953 }; 03954 03955 if(sig_num == ~0) 03956 { 03957 TRACE("Pixel shader uses builtin varyings\n"); 03958 /* Map builtins to builtins */ 03959 for(i = 0; i < 8; i++) 03960 { 03961 priv_ctx->texcrd_output[i] = texcoords[i]; 03962 } 03963 priv_ctx->color_output[0] = "result.color.primary"; 03964 priv_ctx->color_output[1] = "result.color.secondary"; 03965 priv_ctx->fog_output = "result.fogcoord"; 03966 03967 /* Map declared regs to builtins. Use "TA" to /dev/null unread output */ 03968 for (i = 0; i < (sizeof(shader->output_signature) / sizeof(*shader->output_signature)); ++i) 03969 { 03970 semantic_name = shader->output_signature[i].semantic_name; 03971 if (!semantic_name) continue; 03972 03973 if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION)) 03974 { 03975 TRACE("o%u is TMP_OUT\n", i); 03976 if (!shader->output_signature[i].semantic_idx) priv_ctx->vs_output[i] = "TMP_OUT"; 03977 else priv_ctx->vs_output[i] = "TA"; 03978 } 03979 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE)) 03980 { 03981 TRACE("o%u is result.pointsize\n", i); 03982 if (!shader->output_signature[i].semantic_idx) priv_ctx->vs_output[i] = "result.pointsize"; 03983 else priv_ctx->vs_output[i] = "TA"; 03984 } 03985 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR)) 03986 { 03987 TRACE("o%u is result.color.?, idx %u\n", i, shader->output_signature[i].semantic_idx); 03988 if (!shader->output_signature[i].semantic_idx) 03989 priv_ctx->vs_output[i] = "result.color.primary"; 03990 else if (shader->output_signature[i].semantic_idx == 1) 03991 priv_ctx->vs_output[i] = "result.color.secondary"; 03992 else priv_ctx->vs_output[i] = "TA"; 03993 } 03994 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD)) 03995 { 03996 TRACE("o%u is %s\n", i, texcoords[shader->output_signature[i].semantic_idx]); 03997 if (shader->output_signature[i].semantic_idx >= 8) priv_ctx->vs_output[i] = "TA"; 03998 else priv_ctx->vs_output[i] = texcoords[shader->output_signature[i].semantic_idx]; 03999 } 04000 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG)) 04001 { 04002 TRACE("o%u is result.fogcoord\n", i); 04003 if (shader->output_signature[i].semantic_idx > 0) priv_ctx->vs_output[i] = "TA"; 04004 else priv_ctx->vs_output[i] = "result.fogcoord"; 04005 } 04006 else 04007 { 04008 priv_ctx->vs_output[i] = "TA"; 04009 } 04010 } 04011 return; 04012 } 04013 04014 /* Instead of searching for the signature in the signature list, read the one from the current pixel shader. 04015 * Its maybe not the shader where the signature came from, but it is the same signature and faster to find 04016 */ 04017 sig = device->stateBlock->state.pixel_shader->input_signature; 04018 TRACE("Pixel shader uses declared varyings\n"); 04019 04020 /* Map builtin to declared. /dev/null the results by default to the TA temp reg */ 04021 for(i = 0; i < 8; i++) 04022 { 04023 priv_ctx->texcrd_output[i] = "TA"; 04024 } 04025 priv_ctx->color_output[0] = "TA"; 04026 priv_ctx->color_output[1] = "TA"; 04027 priv_ctx->fog_output = "TA"; 04028 04029 for(i = 0; i < MAX_REG_INPUT; i++) 04030 { 04031 semantic_name = sig[i].semantic_name; 04032 semantic_idx = sig[i].semantic_idx; 04033 reg_idx = sig[i].register_idx; 04034 if (!semantic_name) continue; 04035 04036 /* If a declared input register is not written by builtin arguments, don't write to it. 04037 * GL_NV_vertex_program makes sure the input defaults to 0.0, which is correct with D3D 04038 * 04039 * Don't care about POSITION and PSIZE here - this is a builtin vertex shader, position goes 04040 * to TMP_OUT in any case 04041 */ 04042 if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD)) 04043 { 04044 if(semantic_idx < 8) priv_ctx->texcrd_output[semantic_idx] = decl_idx_to_string[reg_idx]; 04045 } 04046 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR)) 04047 { 04048 if(semantic_idx < 2) priv_ctx->color_output[semantic_idx] = decl_idx_to_string[reg_idx]; 04049 } 04050 else if(shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG)) 04051 { 04052 if (!semantic_idx) priv_ctx->fog_output = decl_idx_to_string[reg_idx]; 04053 } 04054 else 04055 { 04056 continue; 04057 } 04058 04059 if (!strcmp(decl_idx_to_string[reg_idx], "result.color.primary") 04060 || !strcmp(decl_idx_to_string[reg_idx], "result.color.secondary")) 04061 { 04062 compiled->need_color_unclamp = TRUE; 04063 } 04064 } 04065 04066 /* Map declared to declared */ 04067 for (i = 0; i < (sizeof(shader->output_signature) / sizeof(*shader->output_signature)); ++i) 04068 { 04069 /* Write unread output to TA to throw them away */ 04070 priv_ctx->vs_output[i] = "TA"; 04071 semantic_name = shader->output_signature[i].semantic_name; 04072 if (!semantic_name) continue; 04073 04074 if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION) 04075 && !shader->output_signature[i].semantic_idx) 04076 { 04077 priv_ctx->vs_output[i] = "TMP_OUT"; 04078 continue; 04079 } 04080 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE) 04081 && !shader->output_signature[i].semantic_idx) 04082 { 04083 priv_ctx->vs_output[i] = "result.pointsize"; 04084 continue; 04085 } 04086 04087 for(j = 0; j < MAX_REG_INPUT; j++) 04088 { 04089 if (!sig[j].semantic_name) continue; 04090 04091 if (!strcmp(sig[j].semantic_name, semantic_name) 04092 && sig[j].semantic_idx == shader->output_signature[i].semantic_idx) 04093 { 04094 priv_ctx->vs_output[i] = decl_idx_to_string[sig[j].register_idx]; 04095 04096 if (!strcmp(priv_ctx->vs_output[i], "result.color.primary") 04097 || !strcmp(priv_ctx->vs_output[i], "result.color.secondary")) 04098 { 04099 compiled->need_color_unclamp = TRUE; 04100 } 04101 } 04102 } 04103 } 04104 } 04105 04106 /* GL locking is done by the caller */ 04107 static GLuint shader_arb_generate_vshader(const struct wined3d_shader *shader, 04108 const struct wined3d_gl_info *gl_info, struct wined3d_shader_buffer *buffer, 04109 const struct arb_vs_compile_args *args, struct arb_vs_compiled_shader *compiled) 04110 { 04111 const struct arb_vshader_private *shader_data = shader->backend_data; 04112 const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 04113 const struct wined3d_shader_lconst *lconst; 04114 const DWORD *function = shader->function; 04115 GLuint ret; 04116 DWORD next_local, *lconst_map = local_const_mapping(shader); 04117 struct shader_arb_ctx_priv priv_ctx; 04118 unsigned int i; 04119 GLint errPos; 04120 04121 memset(&priv_ctx, 0, sizeof(priv_ctx)); 04122 priv_ctx.cur_vs_args = args; 04123 list_init(&priv_ctx.control_frames); 04124 init_output_registers(shader, args->ps_signature, &priv_ctx, compiled); 04125 04126 /* Create the hw ARB shader */ 04127 shader_addline(buffer, "!!ARBvp1.0\n"); 04128 04129 /* Always enable the NV extension if available. Unlike fragment shaders, there is no 04130 * mesurable performance penalty, and we can always make use of it for clipplanes. 04131 */ 04132 if (gl_info->supported[NV_VERTEX_PROGRAM3]) 04133 { 04134 shader_addline(buffer, "OPTION NV_vertex_program3;\n"); 04135 priv_ctx.target_version = NV3; 04136 shader_addline(buffer, "ADDRESS aL;\n"); 04137 } 04138 else if (gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]) 04139 { 04140 shader_addline(buffer, "OPTION NV_vertex_program2;\n"); 04141 priv_ctx.target_version = NV2; 04142 shader_addline(buffer, "ADDRESS aL;\n"); 04143 } else { 04144 priv_ctx.target_version = ARB; 04145 } 04146 04147 shader_addline(buffer, "TEMP TMP_OUT;\n"); 04148 if (need_helper_const(shader_data, reg_maps, gl_info)) 04149 { 04150 shader_addline(buffer, "PARAM helper_const = { 0.0, 1.0, 2.0, %1.10f};\n", eps); 04151 } 04152 if (need_rel_addr_const(shader_data, reg_maps, gl_info)) 04153 { 04154 shader_addline(buffer, "PARAM rel_addr_const = { 0.5, %d.0, 0.0, 0.0 };\n", shader_data->rel_offset); 04155 shader_addline(buffer, "TEMP A0_SHADOW;\n"); 04156 } 04157 04158 shader_addline(buffer, "TEMP TA;\n"); 04159 shader_addline(buffer, "TEMP TB;\n"); 04160 04161 /* Base Declarations */ 04162 next_local = shader_generate_arb_declarations(shader, reg_maps, buffer, 04163 gl_info, lconst_map, &priv_ctx.vs_clipplanes, &priv_ctx); 04164 04165 for(i = 0; i < MAX_CONST_I; i++) 04166 { 04167 compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED; 04168 if(reg_maps->integer_constants & (1 << i) && priv_ctx.target_version >= NV2) 04169 { 04170 const DWORD *control_values = find_loop_control_values(shader, i); 04171 04172 if(control_values) 04173 { 04174 shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i, 04175 control_values[0], control_values[1], control_values[2]); 04176 } 04177 else 04178 { 04179 compiled->int_consts[i] = next_local; 04180 compiled->num_int_consts++; 04181 shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++); 04182 } 04183 } 04184 } 04185 04186 /* We need a constant to fixup the final position */ 04187 shader_addline(buffer, "PARAM posFixup = program.local[%u];\n", next_local); 04188 compiled->pos_fixup = next_local++; 04189 04190 /* Initialize output parameters. GL_ARB_vertex_program does not require special initialization values 04191 * for output parameters. D3D in theory does not do that either, but some applications depend on a 04192 * proper initialization of the secondary color, and programs using the fixed function pipeline without 04193 * a replacement shader depend on the texcoord.w being set properly. 04194 * 04195 * GL_NV_vertex_program defines that all output values are initialized to {0.0, 0.0, 0.0, 1.0}. This 04196 * assertion is in effect even when using GL_ARB_vertex_program without any NV specific additions. So 04197 * skip this if NV_vertex_program is supported. Otherwise, initialize the secondary color. For the tex- 04198 * coords, we have a flag in the opengl caps. Many cards do not require the texcoord being set, and 04199 * this can eat a number of instructions, so skip it unless this cap is set as well 04200 */ 04201 if (!gl_info->supported[NV_VERTEX_PROGRAM]) 04202 { 04203 struct wined3d_device *device = shader->device; 04204 const char *color_init = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_0001); 04205 shader_addline(buffer, "MOV result.color.secondary, %s;\n", color_init); 04206 04207 if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W && !device->frag_pipe->ffp_proj_control) 04208 { 04209 int i; 04210 const char *one = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_ONE); 04211 for(i = 0; i < min(8, MAX_REG_TEXCRD); i++) 04212 { 04213 if (reg_maps->texcoord_mask[i] && reg_maps->texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) 04214 shader_addline(buffer, "MOV result.texcoord[%u].w, %s\n", i, one); 04215 } 04216 } 04217 } 04218 04219 /* The shader starts with the main function */ 04220 priv_ctx.in_main_func = TRUE; 04221 /* Base Shader Body */ 04222 shader_generate_main(shader, buffer, reg_maps, function, &priv_ctx); 04223 04224 if (!priv_ctx.footer_written) vshader_add_footer(&priv_ctx, 04225 shader_data, args, reg_maps, gl_info, buffer); 04226 04227 shader_addline(buffer, "END\n"); 04228 04229 /* TODO: change to resource.glObjectHandle or something like that */ 04230 GL_EXTCALL(glGenProgramsARB(1, &ret)); 04231 04232 TRACE("Creating a hw vertex shader, prg=%d\n", ret); 04233 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, ret)); 04234 04235 TRACE("Created hw vertex shader, prg=%d\n", ret); 04236 /* Create the program and check for errors */ 04237 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, 04238 buffer->bsize, buffer->buffer)); 04239 checkGLcall("glProgramStringARB()"); 04240 04241 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &errPos); 04242 if (errPos != -1) 04243 { 04244 FIXME("HW VertexShader Error at position %d: %s\n\n", 04245 errPos, debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 04246 shader_arb_dump_program_source(buffer->buffer); 04247 ret = -1; 04248 } 04249 else 04250 { 04251 GLint native; 04252 04253 GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 04254 checkGLcall("glGetProgramivARB()"); 04255 if (!native) WARN("Program exceeds native resource limits.\n"); 04256 04257 /* Load immediate constants */ 04258 if (lconst_map) 04259 { 04260 LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry) 04261 { 04262 const float *value = (const float *)lconst->value; 04263 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, lconst_map[lconst->idx], value)); 04264 } 04265 } 04266 } 04267 HeapFree(GetProcessHeap(), 0, lconst_map); 04268 04269 return ret; 04270 } 04271 04272 /* GL locking is done by the caller */ 04273 static struct arb_ps_compiled_shader *find_arb_pshader(struct wined3d_shader *shader, 04274 const struct arb_ps_compile_args *args) 04275 { 04276 struct wined3d_device *device = shader->device; 04277 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 04278 UINT i; 04279 DWORD new_size; 04280 struct arb_ps_compiled_shader *new_array; 04281 struct wined3d_shader_buffer buffer; 04282 struct arb_pshader_private *shader_data; 04283 GLuint ret; 04284 04285 if (!shader->backend_data) 04286 { 04287 struct shader_arb_priv *priv = device->shader_priv; 04288 04289 shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data)); 04290 shader_data = shader->backend_data; 04291 shader_data->clamp_consts = shader->reg_maps.shader_version.major == 1; 04292 04293 if (shader->reg_maps.shader_version.major < 3) 04294 shader_data->input_signature_idx = ~0; 04295 else 04296 shader_data->input_signature_idx = find_input_signature(priv, shader->input_signature); 04297 04298 TRACE("Shader got assigned input signature index %u\n", shader_data->input_signature_idx); 04299 04300 if (!device->vs_clipping) 04301 shader_data->clipplane_emulation = shader_find_free_input_register(&shader->reg_maps, 04302 gl_info->limits.texture_stages - 1); 04303 else 04304 shader_data->clipplane_emulation = ~0U; 04305 } 04306 shader_data = shader->backend_data; 04307 04308 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2), 04309 * so a linear search is more performant than a hashmap or a binary search 04310 * (cache coherency etc) 04311 */ 04312 for (i = 0; i < shader_data->num_gl_shaders; ++i) 04313 { 04314 if (!memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args))) 04315 return &shader_data->gl_shaders[i]; 04316 } 04317 04318 TRACE("No matching GL shader found, compiling a new shader\n"); 04319 if(shader_data->shader_array_size == shader_data->num_gl_shaders) { 04320 if (shader_data->num_gl_shaders) 04321 { 04322 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2); 04323 new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders, 04324 new_size * sizeof(*shader_data->gl_shaders)); 04325 } else { 04326 new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders)); 04327 new_size = 1; 04328 } 04329 04330 if(!new_array) { 04331 ERR("Out of memory\n"); 04332 return 0; 04333 } 04334 shader_data->gl_shaders = new_array; 04335 shader_data->shader_array_size = new_size; 04336 } 04337 04338 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args; 04339 04340 pixelshader_update_samplers(&shader->reg_maps, device->stateBlock->state.textures); 04341 04342 if (!shader_buffer_init(&buffer)) 04343 { 04344 ERR("Failed to initialize shader buffer.\n"); 04345 return 0; 04346 } 04347 04348 ret = shader_arb_generate_pshader(shader, gl_info, &buffer, args, 04349 &shader_data->gl_shaders[shader_data->num_gl_shaders]); 04350 shader_buffer_free(&buffer); 04351 shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret; 04352 04353 return &shader_data->gl_shaders[shader_data->num_gl_shaders++]; 04354 } 04355 04356 static inline BOOL vs_args_equal(const struct arb_vs_compile_args *stored, const struct arb_vs_compile_args *new, 04357 const DWORD use_map, BOOL skip_int) { 04358 if((stored->super.swizzle_map & use_map) != new->super.swizzle_map) return FALSE; 04359 if(stored->super.clip_enabled != new->super.clip_enabled) return FALSE; 04360 if(stored->super.fog_src != new->super.fog_src) return FALSE; 04361 if(stored->clip.boolclip_compare != new->clip.boolclip_compare) return FALSE; 04362 if(stored->ps_signature != new->ps_signature) return FALSE; 04363 if(stored->vertex.samplers_compare != new->vertex.samplers_compare) return FALSE; 04364 if(skip_int) return TRUE; 04365 04366 return !memcmp(stored->loop_ctrl, new->loop_ctrl, sizeof(stored->loop_ctrl)); 04367 } 04368 04369 static struct arb_vs_compiled_shader *find_arb_vshader(struct wined3d_shader *shader, 04370 const struct arb_vs_compile_args *args) 04371 { 04372 struct wined3d_device *device = shader->device; 04373 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 04374 DWORD use_map = device->strided_streams.use_map; 04375 UINT i; 04376 DWORD new_size; 04377 struct arb_vs_compiled_shader *new_array; 04378 struct wined3d_shader_buffer buffer; 04379 struct arb_vshader_private *shader_data; 04380 GLuint ret; 04381 04382 if (!shader->backend_data) 04383 { 04384 const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 04385 04386 shader->backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data)); 04387 shader_data = shader->backend_data; 04388 04389 if ((gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT) 04390 && reg_maps->min_rel_offset <= reg_maps->max_rel_offset) 04391 { 04392 if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 127) 04393 { 04394 FIXME("The difference between the minimum and maximum relative offset is > 127.\n"); 04395 FIXME("Which this OpenGL implementation does not support. Try using GLSL.\n"); 04396 FIXME("Min: %u, Max: %u.\n", reg_maps->min_rel_offset, reg_maps->max_rel_offset); 04397 } 04398 else if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 63) 04399 shader_data->rel_offset = reg_maps->min_rel_offset + 63; 04400 else if (reg_maps->max_rel_offset > 63) 04401 shader_data->rel_offset = reg_maps->min_rel_offset; 04402 } 04403 } 04404 shader_data = shader->backend_data; 04405 04406 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2), 04407 * so a linear search is more performant than a hashmap or a binary search 04408 * (cache coherency etc) 04409 */ 04410 for(i = 0; i < shader_data->num_gl_shaders; i++) { 04411 if (vs_args_equal(&shader_data->gl_shaders[i].args, args, 04412 use_map, gl_info->supported[NV_VERTEX_PROGRAM2_OPTION])) 04413 { 04414 return &shader_data->gl_shaders[i]; 04415 } 04416 } 04417 04418 TRACE("No matching GL shader found, compiling a new shader\n"); 04419 04420 if(shader_data->shader_array_size == shader_data->num_gl_shaders) { 04421 if (shader_data->num_gl_shaders) 04422 { 04423 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2); 04424 new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders, 04425 new_size * sizeof(*shader_data->gl_shaders)); 04426 } else { 04427 new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders)); 04428 new_size = 1; 04429 } 04430 04431 if(!new_array) { 04432 ERR("Out of memory\n"); 04433 return 0; 04434 } 04435 shader_data->gl_shaders = new_array; 04436 shader_data->shader_array_size = new_size; 04437 } 04438 04439 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args; 04440 04441 if (!shader_buffer_init(&buffer)) 04442 { 04443 ERR("Failed to initialize shader buffer.\n"); 04444 return 0; 04445 } 04446 04447 ret = shader_arb_generate_vshader(shader, gl_info, &buffer, args, 04448 &shader_data->gl_shaders[shader_data->num_gl_shaders]); 04449 shader_buffer_free(&buffer); 04450 shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret; 04451 04452 return &shader_data->gl_shaders[shader_data->num_gl_shaders++]; 04453 } 04454 04455 static void find_arb_ps_compile_args(const struct wined3d_state *state, 04456 const struct wined3d_shader *shader, struct arb_ps_compile_args *args) 04457 { 04458 struct wined3d_device *device = shader->device; 04459 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 04460 int i; 04461 WORD int_skip; 04462 04463 find_ps_compile_args(state, shader, &args->super); 04464 04465 /* This forces all local boolean constants to 1 to make them stateblock independent */ 04466 args->bools = shader->reg_maps.local_bool_consts; 04467 04468 for(i = 0; i < MAX_CONST_B; i++) 04469 { 04470 if (state->ps_consts_b[i]) 04471 args->bools |= ( 1 << i); 04472 } 04473 04474 /* Only enable the clip plane emulation KIL if at least one clipplane is enabled. The KIL instruction 04475 * is quite expensive because it forces the driver to disable early Z discards. It is cheaper to 04476 * duplicate the shader than have a no-op KIL instruction in every shader 04477 */ 04478 if (!device->vs_clipping && use_vs(state) 04479 && state->render_states[WINED3D_RS_CLIPPING] 04480 && state->render_states[WINED3D_RS_CLIPPLANEENABLE]) 04481 args->clip = 1; 04482 else 04483 args->clip = 0; 04484 04485 /* Skip if unused or local, or supported natively */ 04486 int_skip = ~shader->reg_maps.integer_constants | shader->reg_maps.local_int_consts; 04487 if (int_skip == 0xffff || gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION]) 04488 { 04489 memset(args->loop_ctrl, 0, sizeof(args->loop_ctrl)); 04490 return; 04491 } 04492 04493 for(i = 0; i < MAX_CONST_I; i++) 04494 { 04495 if(int_skip & (1 << i)) 04496 { 04497 args->loop_ctrl[i][0] = 0; 04498 args->loop_ctrl[i][1] = 0; 04499 args->loop_ctrl[i][2] = 0; 04500 } 04501 else 04502 { 04503 args->loop_ctrl[i][0] = state->ps_consts_i[i * 4]; 04504 args->loop_ctrl[i][1] = state->ps_consts_i[i * 4 + 1]; 04505 args->loop_ctrl[i][2] = state->ps_consts_i[i * 4 + 2]; 04506 } 04507 } 04508 } 04509 04510 static void find_arb_vs_compile_args(const struct wined3d_state *state, 04511 const struct wined3d_shader *shader, struct arb_vs_compile_args *args) 04512 { 04513 struct wined3d_device *device = shader->device; 04514 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 04515 int i; 04516 WORD int_skip; 04517 04518 find_vs_compile_args(state, shader, &args->super); 04519 04520 args->clip.boolclip_compare = 0; 04521 if (use_ps(state)) 04522 { 04523 const struct wined3d_shader *ps = state->pixel_shader; 04524 const struct arb_pshader_private *shader_priv = ps->backend_data; 04525 args->ps_signature = shader_priv->input_signature_idx; 04526 04527 args->clip.boolclip.clip_texcoord = shader_priv->clipplane_emulation + 1; 04528 } 04529 else 04530 { 04531 args->ps_signature = ~0; 04532 if (!device->vs_clipping && device->adapter->fragment_pipe == &arbfp_fragment_pipeline) 04533 { 04534 args->clip.boolclip.clip_texcoord = ffp_clip_emul(state) ? gl_info->limits.texture_stages : 0; 04535 } 04536 /* Otherwise: Setting boolclip_compare set clip_texcoord to 0 */ 04537 } 04538 04539 if (args->clip.boolclip.clip_texcoord) 04540 { 04541 if (state->render_states[WINED3D_RS_CLIPPING]) 04542 args->clip.boolclip.clipplane_mask = (unsigned char)state->render_states[WINED3D_RS_CLIPPLANEENABLE]; 04543 /* clipplane_mask was set to 0 by setting boolclip_compare to 0 */ 04544 } 04545 04546 /* This forces all local boolean constants to 1 to make them stateblock independent */ 04547 args->clip.boolclip.bools = shader->reg_maps.local_bool_consts; 04548 /* TODO: Figure out if it would be better to store bool constants as bitmasks in the stateblock */ 04549 for(i = 0; i < MAX_CONST_B; i++) 04550 { 04551 if (state->vs_consts_b[i]) 04552 args->clip.boolclip.bools |= ( 1 << i); 04553 } 04554 04555 args->vertex.samplers[0] = device->texUnitMap[MAX_FRAGMENT_SAMPLERS + 0]; 04556 args->vertex.samplers[1] = device->texUnitMap[MAX_FRAGMENT_SAMPLERS + 1]; 04557 args->vertex.samplers[2] = device->texUnitMap[MAX_FRAGMENT_SAMPLERS + 2]; 04558 args->vertex.samplers[3] = 0; 04559 04560 /* Skip if unused or local */ 04561 int_skip = ~shader->reg_maps.integer_constants | shader->reg_maps.local_int_consts; 04562 /* This is about flow control, not clipping. */ 04563 if (int_skip == 0xffff || gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]) 04564 { 04565 memset(args->loop_ctrl, 0, sizeof(args->loop_ctrl)); 04566 return; 04567 } 04568 04569 for(i = 0; i < MAX_CONST_I; i++) 04570 { 04571 if(int_skip & (1 << i)) 04572 { 04573 args->loop_ctrl[i][0] = 0; 04574 args->loop_ctrl[i][1] = 0; 04575 args->loop_ctrl[i][2] = 0; 04576 } 04577 else 04578 { 04579 args->loop_ctrl[i][0] = state->vs_consts_i[i * 4]; 04580 args->loop_ctrl[i][1] = state->vs_consts_i[i * 4 + 1]; 04581 args->loop_ctrl[i][2] = state->vs_consts_i[i * 4 + 2]; 04582 } 04583 } 04584 } 04585 04586 /* GL locking is done by the caller */ 04587 static void shader_arb_select(const struct wined3d_context *context, BOOL usePS, BOOL useVS) 04588 { 04589 struct wined3d_device *device = context->swapchain->device; 04590 struct shader_arb_priv *priv = device->shader_priv; 04591 const struct wined3d_gl_info *gl_info = context->gl_info; 04592 const struct wined3d_state *state = &device->stateBlock->state; 04593 int i; 04594 04595 /* Deal with pixel shaders first so the vertex shader arg function has the input signature ready */ 04596 if (usePS) 04597 { 04598 struct wined3d_shader *ps = state->pixel_shader; 04599 struct arb_ps_compile_args compile_args; 04600 struct arb_ps_compiled_shader *compiled; 04601 04602 TRACE("Using pixel shader %p.\n", ps); 04603 find_arb_ps_compile_args(state, ps, &compile_args); 04604 compiled = find_arb_pshader(ps, &compile_args); 04605 priv->current_fprogram_id = compiled->prgId; 04606 priv->compiled_fprog = compiled; 04607 04608 /* Bind the fragment program */ 04609 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id)); 04610 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id);"); 04611 04612 if(!priv->use_arbfp_fixed_func) { 04613 /* Enable OpenGL fragment programs */ 04614 glEnable(GL_FRAGMENT_PROGRAM_ARB); 04615 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB);"); 04616 } 04617 TRACE("(%p) : Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB\n", 04618 device, priv->current_fprogram_id); 04619 04620 /* Pixel Shader 1.x constants are clamped to [-1;1], Pixel Shader 2.0 constants are not. If switching between 04621 * a 1.x and newer shader, reload the first 8 constants 04622 */ 04623 if (priv->last_ps_const_clamped != ((struct arb_pshader_private *)ps->backend_data)->clamp_consts) 04624 { 04625 priv->last_ps_const_clamped = ((struct arb_pshader_private *)ps->backend_data)->clamp_consts; 04626 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, 8); 04627 for(i = 0; i < 8; i++) 04628 { 04629 priv->pshader_const_dirty[i] = 1; 04630 } 04631 /* Also takes care of loading local constants */ 04632 shader_arb_load_constants(context, TRUE, FALSE); 04633 } 04634 else 04635 { 04636 UINT rt_height = state->fb->render_targets[0]->resource.height; 04637 shader_arb_ps_local_constants(compiled, context, state, rt_height); 04638 } 04639 04640 /* Force constant reloading for the NP2 fixup (see comment in shader_glsl_select for more info) */ 04641 if (compiled->np2fixup_info.super.active) 04642 shader_arb_load_np2fixup_constants(priv, gl_info, state); 04643 } 04644 else if (gl_info->supported[ARB_FRAGMENT_PROGRAM] && !priv->use_arbfp_fixed_func) 04645 { 04646 /* Disable only if we're not using arbfp fixed function fragment processing. If this is used, 04647 * keep GL_FRAGMENT_PROGRAM_ARB enabled, and the fixed function pipeline will bind the fixed function 04648 * replacement shader 04649 */ 04650 glDisable(GL_FRAGMENT_PROGRAM_ARB); 04651 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 04652 priv->current_fprogram_id = 0; 04653 } 04654 04655 if (useVS) 04656 { 04657 struct wined3d_shader *vs = state->vertex_shader; 04658 struct arb_vs_compile_args compile_args; 04659 struct arb_vs_compiled_shader *compiled; 04660 04661 TRACE("Using vertex shader %p\n", vs); 04662 find_arb_vs_compile_args(state, vs, &compile_args); 04663 compiled = find_arb_vshader(vs, &compile_args); 04664 priv->current_vprogram_id = compiled->prgId; 04665 priv->compiled_vprog = compiled; 04666 04667 /* Bind the vertex program */ 04668 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id)); 04669 checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id);"); 04670 04671 /* Enable OpenGL vertex programs */ 04672 glEnable(GL_VERTEX_PROGRAM_ARB); 04673 checkGLcall("glEnable(GL_VERTEX_PROGRAM_ARB);"); 04674 TRACE("(%p) : Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB\n", device, priv->current_vprogram_id); 04675 shader_arb_vs_local_constants(compiled, context, state); 04676 04677 if(priv->last_vs_color_unclamp != compiled->need_color_unclamp) { 04678 priv->last_vs_color_unclamp = compiled->need_color_unclamp; 04679 04680 if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT]) 04681 { 04682 GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, !compiled->need_color_unclamp)); 04683 checkGLcall("glClampColorARB"); 04684 } else { 04685 FIXME("vertex color clamp needs to be changed, but extension not supported.\n"); 04686 } 04687 } 04688 } 04689 else if (gl_info->supported[ARB_VERTEX_PROGRAM]) 04690 { 04691 priv->current_vprogram_id = 0; 04692 glDisable(GL_VERTEX_PROGRAM_ARB); 04693 checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)"); 04694 } 04695 } 04696 04697 /* GL locking is done by the caller */ 04698 static void shader_arb_select_depth_blt(void *shader_priv, const struct wined3d_gl_info *gl_info, 04699 enum tex_types tex_type, const SIZE *ds_mask_size) 04700 { 04701 const float mask[] = {0.0f, 0.0f, (float)ds_mask_size->cx, (float)ds_mask_size->cy}; 04702 BOOL masked = ds_mask_size->cx && ds_mask_size->cy; 04703 struct shader_arb_priv *priv = shader_priv; 04704 GLuint *blt_fprogram; 04705 04706 if (!priv->depth_blt_vprogram_id) priv->depth_blt_vprogram_id = create_arb_blt_vertex_program(gl_info); 04707 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->depth_blt_vprogram_id)); 04708 glEnable(GL_VERTEX_PROGRAM_ARB); 04709 04710 blt_fprogram = masked ? &priv->depth_blt_fprogram_id_masked[tex_type] : &priv->depth_blt_fprogram_id_full[tex_type]; 04711 if (!*blt_fprogram) *blt_fprogram = create_arb_blt_fragment_program(gl_info, tex_type, masked); 04712 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, *blt_fprogram)); 04713 if (masked) GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0, mask)); 04714 glEnable(GL_FRAGMENT_PROGRAM_ARB); 04715 } 04716 04717 /* GL locking is done by the caller */ 04718 static void shader_arb_deselect_depth_blt(void *shader_priv, const struct wined3d_gl_info *gl_info) 04719 { 04720 struct shader_arb_priv *priv = shader_priv; 04721 04722 if (priv->current_vprogram_id) { 04723 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, priv->current_vprogram_id)); 04724 checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vertexShader->prgId);"); 04725 04726 TRACE("Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB.\n", priv->current_vprogram_id); 04727 } 04728 else 04729 { 04730 glDisable(GL_VERTEX_PROGRAM_ARB); 04731 checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)"); 04732 } 04733 04734 if (priv->current_fprogram_id) { 04735 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, priv->current_fprogram_id)); 04736 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, pixelShader->prgId);"); 04737 04738 TRACE("Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB.\n", priv->current_fprogram_id); 04739 } 04740 else if(!priv->use_arbfp_fixed_func) 04741 { 04742 glDisable(GL_FRAGMENT_PROGRAM_ARB); 04743 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 04744 } 04745 } 04746 04747 static void shader_arb_destroy(struct wined3d_shader *shader) 04748 { 04749 struct wined3d_device *device = shader->device; 04750 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 04751 04752 if (shader_is_pshader_version(shader->reg_maps.shader_version.type)) 04753 { 04754 struct arb_pshader_private *shader_data = shader->backend_data; 04755 UINT i; 04756 04757 if(!shader_data) return; /* This can happen if a shader was never compiled */ 04758 04759 if (shader_data->num_gl_shaders) 04760 { 04761 struct wined3d_context *context = context_acquire(device, NULL); 04762 04763 ENTER_GL(); 04764 for (i = 0; i < shader_data->num_gl_shaders; ++i) 04765 { 04766 GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId)); 04767 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))"); 04768 } 04769 LEAVE_GL(); 04770 04771 context_release(context); 04772 } 04773 04774 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders); 04775 HeapFree(GetProcessHeap(), 0, shader_data); 04776 shader->backend_data = NULL; 04777 } 04778 else 04779 { 04780 struct arb_vshader_private *shader_data = shader->backend_data; 04781 UINT i; 04782 04783 if(!shader_data) return; /* This can happen if a shader was never compiled */ 04784 04785 if (shader_data->num_gl_shaders) 04786 { 04787 struct wined3d_context *context = context_acquire(device, NULL); 04788 04789 ENTER_GL(); 04790 for (i = 0; i < shader_data->num_gl_shaders; ++i) 04791 { 04792 GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId)); 04793 checkGLcall("GL_EXTCALL(glDeleteProgramsARB(1, &shader_data->gl_shaders[i].prgId))"); 04794 } 04795 LEAVE_GL(); 04796 04797 context_release(context); 04798 } 04799 04800 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders); 04801 HeapFree(GetProcessHeap(), 0, shader_data); 04802 shader->backend_data = NULL; 04803 } 04804 } 04805 04806 static int sig_tree_compare(const void *key, const struct wine_rb_entry *entry) 04807 { 04808 struct ps_signature *e = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 04809 return compare_sig(key, e->sig); 04810 } 04811 04812 static const struct wine_rb_functions sig_tree_functions = 04813 { 04814 wined3d_rb_alloc, 04815 wined3d_rb_realloc, 04816 wined3d_rb_free, 04817 sig_tree_compare 04818 }; 04819 04820 static HRESULT shader_arb_alloc(struct wined3d_device *device) 04821 { 04822 struct shader_arb_priv *priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*priv)); 04823 04824 priv->vshader_const_dirty = HeapAlloc(GetProcessHeap(), 0, 04825 sizeof(*priv->vshader_const_dirty) * device->d3d_vshader_constantF); 04826 if (!priv->vshader_const_dirty) 04827 goto fail; 04828 memset(priv->vshader_const_dirty, 1, 04829 sizeof(*priv->vshader_const_dirty) * device->d3d_vshader_constantF); 04830 04831 priv->pshader_const_dirty = HeapAlloc(GetProcessHeap(), 0, 04832 sizeof(*priv->pshader_const_dirty) * device->d3d_pshader_constantF); 04833 if (!priv->pshader_const_dirty) 04834 goto fail; 04835 memset(priv->pshader_const_dirty, 1, 04836 sizeof(*priv->pshader_const_dirty) * device->d3d_pshader_constantF); 04837 04838 if(wine_rb_init(&priv->signature_tree, &sig_tree_functions) == -1) 04839 { 04840 ERR("RB tree init failed\n"); 04841 goto fail; 04842 } 04843 device->shader_priv = priv; 04844 return WINED3D_OK; 04845 04846 fail: 04847 HeapFree(GetProcessHeap(), 0, priv->pshader_const_dirty); 04848 HeapFree(GetProcessHeap(), 0, priv->vshader_const_dirty); 04849 HeapFree(GetProcessHeap(), 0, priv); 04850 return E_OUTOFMEMORY; 04851 } 04852 04853 static void release_signature(struct wine_rb_entry *entry, void *context) 04854 { 04855 struct ps_signature *sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry); 04856 int i; 04857 for(i = 0; i < MAX_REG_INPUT; i++) 04858 { 04859 HeapFree(GetProcessHeap(), 0, (char *) sig->sig[i].semantic_name); 04860 } 04861 HeapFree(GetProcessHeap(), 0, sig->sig); 04862 HeapFree(GetProcessHeap(), 0, sig); 04863 } 04864 04865 /* Context activation is done by the caller. */ 04866 static void shader_arb_free(struct wined3d_device *device) 04867 { 04868 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 04869 struct shader_arb_priv *priv = device->shader_priv; 04870 int i; 04871 04872 ENTER_GL(); 04873 if(priv->depth_blt_vprogram_id) { 04874 GL_EXTCALL(glDeleteProgramsARB(1, &priv->depth_blt_vprogram_id)); 04875 } 04876 for (i = 0; i < tex_type_count; ++i) 04877 { 04878 if (priv->depth_blt_fprogram_id_full[i]) 04879 { 04880 GL_EXTCALL(glDeleteProgramsARB(1, &priv->depth_blt_fprogram_id_full[i])); 04881 } 04882 if (priv->depth_blt_fprogram_id_masked[i]) 04883 { 04884 GL_EXTCALL(glDeleteProgramsARB(1, &priv->depth_blt_fprogram_id_masked[i])); 04885 } 04886 } 04887 LEAVE_GL(); 04888 04889 wine_rb_destroy(&priv->signature_tree, release_signature, NULL); 04890 HeapFree(GetProcessHeap(), 0, priv->pshader_const_dirty); 04891 HeapFree(GetProcessHeap(), 0, priv->vshader_const_dirty); 04892 HeapFree(GetProcessHeap(), 0, device->shader_priv); 04893 } 04894 04895 static void shader_arb_context_destroyed(void *shader_priv, const struct wined3d_context *context) 04896 { 04897 struct shader_arb_priv *priv = shader_priv; 04898 04899 if (priv->last_context == context) 04900 priv->last_context = NULL; 04901 } 04902 04903 static void shader_arb_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *caps) 04904 { 04905 if (gl_info->supported[ARB_VERTEX_PROGRAM]) 04906 { 04907 DWORD vs_consts; 04908 04909 /* 96 is the minimum allowed value of MAX_PROGRAM_ENV_PARAMETERS_ARB 04910 * for vertex programs. If the native limit is less than that it's 04911 * not very useful, and e.g. Mesa swrast returns 0, probably to 04912 * indicate it's a software implementation. */ 04913 if (gl_info->limits.arb_vs_native_constants < 96) 04914 vs_consts = gl_info->limits.arb_vs_float_constants; 04915 else 04916 vs_consts = min(gl_info->limits.arb_vs_float_constants, gl_info->limits.arb_vs_native_constants); 04917 04918 if (gl_info->supported[NV_VERTEX_PROGRAM3]) 04919 { 04920 caps->VertexShaderVersion = 3; 04921 TRACE_(d3d_caps)("Hardware vertex shader version 3.0 enabled (NV_VERTEX_PROGRAM3)\n"); 04922 } 04923 else if (vs_consts >= 256) 04924 { 04925 /* Shader Model 2.0 requires at least 256 vertex shader constants */ 04926 caps->VertexShaderVersion = 2; 04927 TRACE_(d3d_caps)("Hardware vertex shader version 2.0 enabled (ARB_PROGRAM)\n"); 04928 } 04929 else 04930 { 04931 caps->VertexShaderVersion = 1; 04932 TRACE_(d3d_caps)("Hardware vertex shader version 1.1 enabled (ARB_PROGRAM)\n"); 04933 } 04934 caps->MaxVertexShaderConst = vs_consts; 04935 } 04936 else 04937 { 04938 caps->VertexShaderVersion = 0; 04939 caps->MaxVertexShaderConst = 0; 04940 } 04941 04942 if (gl_info->supported[ARB_FRAGMENT_PROGRAM]) 04943 { 04944 DWORD ps_consts; 04945 04946 /* Similar as above for vertex programs, but the minimum for fragment 04947 * programs is 24. */ 04948 if (gl_info->limits.arb_ps_native_constants < 24) 04949 ps_consts = gl_info->limits.arb_ps_float_constants; 04950 else 04951 ps_consts = min(gl_info->limits.arb_ps_float_constants, gl_info->limits.arb_ps_native_constants); 04952 04953 if (gl_info->supported[NV_FRAGMENT_PROGRAM2]) 04954 { 04955 caps->PixelShaderVersion = 3; 04956 TRACE_(d3d_caps)("Hardware pixel shader version 3.0 enabled (NV_FRAGMENT_PROGRAM2)\n"); 04957 } 04958 else if (ps_consts >= 32) 04959 { 04960 /* Shader Model 2.0 requires at least 32 pixel shader constants */ 04961 caps->PixelShaderVersion = 2; 04962 TRACE_(d3d_caps)("Hardware pixel shader version 2.0 enabled (ARB_PROGRAM)\n"); 04963 } 04964 else 04965 { 04966 caps->PixelShaderVersion = 1; 04967 TRACE_(d3d_caps)("Hardware pixel shader version 1.4 enabled (ARB_PROGRAM)\n"); 04968 } 04969 caps->PixelShader1xMaxValue = 8.0f; 04970 caps->MaxPixelShaderConst = ps_consts; 04971 } 04972 else 04973 { 04974 caps->PixelShaderVersion = 0; 04975 caps->PixelShader1xMaxValue = 0.0f; 04976 caps->MaxPixelShaderConst = 0; 04977 } 04978 04979 caps->VSClipping = use_nv_clip(gl_info); 04980 } 04981 04982 static BOOL shader_arb_color_fixup_supported(struct color_fixup_desc fixup) 04983 { 04984 if (TRACE_ON(d3d_shader) && TRACE_ON(d3d)) 04985 { 04986 TRACE("Checking support for color_fixup:\n"); 04987 dump_color_fixup_desc(fixup); 04988 } 04989 04990 /* We support everything except complex conversions. */ 04991 if (!is_complex_fixup(fixup)) 04992 { 04993 TRACE("[OK]\n"); 04994 return TRUE; 04995 } 04996 04997 TRACE("[FAILED]\n"); 04998 return FALSE; 04999 } 05000 05001 static void shader_arb_add_instruction_modifiers(const struct wined3d_shader_instruction *ins) { 05002 DWORD shift; 05003 char write_mask[20], regstr[50]; 05004 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 05005 BOOL is_color = FALSE; 05006 const struct wined3d_shader_dst_param *dst; 05007 05008 if (!ins->dst_count) return; 05009 05010 dst = &ins->dst[0]; 05011 shift = dst->shift; 05012 if (!shift) return; /* Saturate alone is handled by the instructions */ 05013 05014 shader_arb_get_write_mask(ins, dst, write_mask); 05015 shader_arb_get_register_name(ins, &dst->reg, regstr, &is_color); 05016 05017 /* Generate a line that does the output modifier computation 05018 * FIXME: _SAT vs shift? _SAT alone is already handled in the instructions, if this 05019 * maps problems in e.g. _d4_sat modify shader_arb_get_modifier 05020 */ 05021 shader_addline(buffer, "MUL%s %s%s, %s, %s;\n", shader_arb_get_modifier(ins), 05022 regstr, write_mask, regstr, shift_tab[shift]); 05023 } 05024 05025 static const SHADER_HANDLER shader_arb_instruction_handler_table[WINED3DSIH_TABLE_SIZE] = 05026 { 05027 /* WINED3DSIH_ABS */ shader_hw_map2gl, 05028 /* WINED3DSIH_ADD */ shader_hw_map2gl, 05029 /* WINED3DSIH_AND */ NULL, 05030 /* WINED3DSIH_BEM */ pshader_hw_bem, 05031 /* WINED3DSIH_BREAK */ shader_hw_break, 05032 /* WINED3DSIH_BREAKC */ shader_hw_breakc, 05033 /* WINED3DSIH_BREAKP */ NULL, 05034 /* WINED3DSIH_CALL */ shader_hw_call, 05035 /* WINED3DSIH_CALLNZ */ NULL, 05036 /* WINED3DSIH_CMP */ pshader_hw_cmp, 05037 /* WINED3DSIH_CND */ pshader_hw_cnd, 05038 /* WINED3DSIH_CRS */ shader_hw_map2gl, 05039 /* WINED3DSIH_CUT */ NULL, 05040 /* WINED3DSIH_DCL */ NULL, 05041 /* WINED3DSIH_DEF */ NULL, 05042 /* WINED3DSIH_DEFB */ NULL, 05043 /* WINED3DSIH_DEFI */ NULL, 05044 /* WINED3DSIH_DIV */ NULL, 05045 /* WINED3DSIH_DP2ADD */ pshader_hw_dp2add, 05046 /* WINED3DSIH_DP3 */ shader_hw_map2gl, 05047 /* WINED3DSIH_DP4 */ shader_hw_map2gl, 05048 /* WINED3DSIH_DST */ shader_hw_map2gl, 05049 /* WINED3DSIH_DSX */ shader_hw_map2gl, 05050 /* WINED3DSIH_DSY */ shader_hw_dsy, 05051 /* WINED3DSIH_ELSE */ shader_hw_else, 05052 /* WINED3DSIH_EMIT */ NULL, 05053 /* WINED3DSIH_ENDIF */ shader_hw_endif, 05054 /* WINED3DSIH_ENDLOOP */ shader_hw_endloop, 05055 /* WINED3DSIH_ENDREP */ shader_hw_endrep, 05056 /* WINED3DSIH_EQ */ NULL, 05057 /* WINED3DSIH_EXP */ shader_hw_scalar_op, 05058 /* WINED3DSIH_EXPP */ shader_hw_scalar_op, 05059 /* WINED3DSIH_FRC */ shader_hw_map2gl, 05060 /* WINED3DSIH_FTOI */ NULL, 05061 /* WINED3DSIH_GE */ NULL, 05062 /* WINED3DSIH_IADD */ NULL, 05063 /* WINED3DSIH_IEQ */ NULL, 05064 /* WINED3DSIH_IF */ NULL /* Hardcoded into the shader */, 05065 /* WINED3DSIH_IFC */ shader_hw_ifc, 05066 /* WINED3DSIH_IGE */ NULL, 05067 /* WINED3DSIH_IMUL */ NULL, 05068 /* WINED3DSIH_ITOF */ NULL, 05069 /* WINED3DSIH_LABEL */ shader_hw_label, 05070 /* WINED3DSIH_LD */ NULL, 05071 /* WINED3DSIH_LIT */ shader_hw_map2gl, 05072 /* WINED3DSIH_LOG */ shader_hw_log, 05073 /* WINED3DSIH_LOGP */ shader_hw_log, 05074 /* WINED3DSIH_LOOP */ shader_hw_loop, 05075 /* WINED3DSIH_LRP */ shader_hw_lrp, 05076 /* WINED3DSIH_LT */ NULL, 05077 /* WINED3DSIH_M3x2 */ shader_hw_mnxn, 05078 /* WINED3DSIH_M3x3 */ shader_hw_mnxn, 05079 /* WINED3DSIH_M3x4 */ shader_hw_mnxn, 05080 /* WINED3DSIH_M4x3 */ shader_hw_mnxn, 05081 /* WINED3DSIH_M4x4 */ shader_hw_mnxn, 05082 /* WINED3DSIH_MAD */ shader_hw_map2gl, 05083 /* WINED3DSIH_MAX */ shader_hw_map2gl, 05084 /* WINED3DSIH_MIN */ shader_hw_map2gl, 05085 /* WINED3DSIH_MOV */ shader_hw_mov, 05086 /* WINED3DSIH_MOVA */ shader_hw_mov, 05087 /* WINED3DSIH_MOVC */ NULL, 05088 /* WINED3DSIH_MUL */ shader_hw_map2gl, 05089 /* WINED3DSIH_NOP */ shader_hw_nop, 05090 /* WINED3DSIH_NRM */ shader_hw_nrm, 05091 /* WINED3DSIH_PHASE */ NULL, 05092 /* WINED3DSIH_POW */ shader_hw_pow, 05093 /* WINED3DSIH_RCP */ shader_hw_rcp, 05094 /* WINED3DSIH_REP */ shader_hw_rep, 05095 /* WINED3DSIH_RET */ shader_hw_ret, 05096 /* WINED3DSIH_ROUND_NI */ NULL, 05097 /* WINED3DSIH_RSQ */ shader_hw_scalar_op, 05098 /* WINED3DSIH_SAMPLE */ NULL, 05099 /* WINED3DSIH_SAMPLE_GRAD */ NULL, 05100 /* WINED3DSIH_SAMPLE_LOD */ NULL, 05101 /* WINED3DSIH_SETP */ NULL, 05102 /* WINED3DSIH_SGE */ shader_hw_map2gl, 05103 /* WINED3DSIH_SGN */ shader_hw_sgn, 05104 /* WINED3DSIH_SINCOS */ shader_hw_sincos, 05105 /* WINED3DSIH_SLT */ shader_hw_map2gl, 05106 /* WINED3DSIH_SQRT */ NULL, 05107 /* WINED3DSIH_SUB */ shader_hw_map2gl, 05108 /* WINED3DSIH_TEX */ pshader_hw_tex, 05109 /* WINED3DSIH_TEXBEM */ pshader_hw_texbem, 05110 /* WINED3DSIH_TEXBEML */ pshader_hw_texbem, 05111 /* WINED3DSIH_TEXCOORD */ pshader_hw_texcoord, 05112 /* WINED3DSIH_TEXDEPTH */ pshader_hw_texdepth, 05113 /* WINED3DSIH_TEXDP3 */ pshader_hw_texdp3, 05114 /* WINED3DSIH_TEXDP3TEX */ pshader_hw_texdp3tex, 05115 /* WINED3DSIH_TEXKILL */ pshader_hw_texkill, 05116 /* WINED3DSIH_TEXLDD */ shader_hw_texldd, 05117 /* WINED3DSIH_TEXLDL */ shader_hw_texldl, 05118 /* WINED3DSIH_TEXM3x2DEPTH */ pshader_hw_texm3x2depth, 05119 /* WINED3DSIH_TEXM3x2PAD */ pshader_hw_texm3x2pad, 05120 /* WINED3DSIH_TEXM3x2TEX */ pshader_hw_texm3x2tex, 05121 /* WINED3DSIH_TEXM3x3 */ pshader_hw_texm3x3, 05122 /* WINED3DSIH_TEXM3x3DIFF */ NULL, 05123 /* WINED3DSIH_TEXM3x3PAD */ pshader_hw_texm3x3pad, 05124 /* WINED3DSIH_TEXM3x3SPEC */ pshader_hw_texm3x3spec, 05125 /* WINED3DSIH_TEXM3x3TEX */ pshader_hw_texm3x3tex, 05126 /* WINED3DSIH_TEXM3x3VSPEC */ pshader_hw_texm3x3vspec, 05127 /* WINED3DSIH_TEXREG2AR */ pshader_hw_texreg2ar, 05128 /* WINED3DSIH_TEXREG2GB */ pshader_hw_texreg2gb, 05129 /* WINED3DSIH_TEXREG2RGB */ pshader_hw_texreg2rgb, 05130 /* WINED3DSIH_UDIV */ NULL, 05131 /* WINED3DSIH_USHR */ NULL, 05132 /* WINED3DSIH_UTOF */ NULL, 05133 /* WINED3DSIH_XOR */ NULL, 05134 }; 05135 05136 static BOOL get_bool_const(const struct wined3d_shader_instruction *ins, 05137 const struct wined3d_shader *shader, DWORD idx) 05138 { 05139 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 05140 BOOL vshader = shader_is_vshader_version(reg_maps->shader_version.type); 05141 const struct wined3d_shader_lconst *constant; 05142 WORD bools = 0; 05143 WORD flag = (1 << idx); 05144 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 05145 05146 if (reg_maps->local_bool_consts & flag) 05147 { 05148 /* What good is a if(bool) with a hardcoded local constant? I don't know, but handle it */ 05149 LIST_FOR_EACH_ENTRY(constant, &shader->constantsB, struct wined3d_shader_lconst, entry) 05150 { 05151 if (constant->idx == idx) 05152 { 05153 return constant->value[0]; 05154 } 05155 } 05156 ERR("Local constant not found\n"); 05157 return FALSE; 05158 } 05159 else 05160 { 05161 if(vshader) bools = priv->cur_vs_args->clip.boolclip.bools; 05162 else bools = priv->cur_ps_args->bools; 05163 return bools & flag; 05164 } 05165 } 05166 05167 static void get_loop_control_const(const struct wined3d_shader_instruction *ins, 05168 const struct wined3d_shader *shader, UINT idx, struct wined3d_shader_loop_control *loop_control) 05169 { 05170 const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps; 05171 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 05172 05173 /* Integer constants can either be a local constant, or they can be stored in the shader 05174 * type specific compile args. */ 05175 if (reg_maps->local_int_consts & (1 << idx)) 05176 { 05177 const struct wined3d_shader_lconst *constant; 05178 05179 LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry) 05180 { 05181 if (constant->idx == idx) 05182 { 05183 loop_control->count = constant->value[0]; 05184 loop_control->start = constant->value[1]; 05185 /* Step is signed. */ 05186 loop_control->step = (int)constant->value[2]; 05187 return; 05188 } 05189 } 05190 /* If this happens the flag was set incorrectly */ 05191 ERR("Local constant not found\n"); 05192 loop_control->count = 0; 05193 loop_control->start = 0; 05194 loop_control->step = 0; 05195 return; 05196 } 05197 05198 switch (reg_maps->shader_version.type) 05199 { 05200 case WINED3D_SHADER_TYPE_VERTEX: 05201 /* Count and aL start value are unsigned */ 05202 loop_control->count = priv->cur_vs_args->loop_ctrl[idx][0]; 05203 loop_control->start = priv->cur_vs_args->loop_ctrl[idx][1]; 05204 /* Step is signed. */ 05205 loop_control->step = ((char)priv->cur_vs_args->loop_ctrl[idx][2]); 05206 break; 05207 05208 case WINED3D_SHADER_TYPE_PIXEL: 05209 loop_control->count = priv->cur_ps_args->loop_ctrl[idx][0]; 05210 loop_control->start = priv->cur_ps_args->loop_ctrl[idx][1]; 05211 loop_control->step = ((char)priv->cur_ps_args->loop_ctrl[idx][2]); 05212 break; 05213 05214 default: 05215 FIXME("Unhandled shader type %#x.\n", reg_maps->shader_version.type); 05216 break; 05217 } 05218 } 05219 05220 static void record_instruction(struct list *list, const struct wined3d_shader_instruction *ins) 05221 { 05222 unsigned int i; 05223 struct wined3d_shader_dst_param *dst_param = NULL; 05224 struct wined3d_shader_src_param *src_param = NULL, *rel_addr = NULL; 05225 struct recorded_instruction *rec = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*rec)); 05226 if(!rec) 05227 { 05228 ERR("Out of memory\n"); 05229 return; 05230 } 05231 05232 rec->ins = *ins; 05233 dst_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*dst_param)); 05234 if(!dst_param) goto free; 05235 *dst_param = *ins->dst; 05236 if(ins->dst->reg.rel_addr) 05237 { 05238 rel_addr = HeapAlloc(GetProcessHeap(), 0, sizeof(*dst_param->reg.rel_addr)); 05239 if(!rel_addr) goto free; 05240 *rel_addr = *ins->dst->reg.rel_addr; 05241 dst_param->reg.rel_addr = rel_addr; 05242 } 05243 rec->ins.dst = dst_param; 05244 05245 src_param = HeapAlloc(GetProcessHeap(), 0, sizeof(*src_param) * ins->src_count); 05246 if(!src_param) goto free; 05247 for(i = 0; i < ins->src_count; i++) 05248 { 05249 src_param[i] = ins->src[i]; 05250 if(ins->src[i].reg.rel_addr) 05251 { 05252 rel_addr = HeapAlloc(GetProcessHeap(), 0, sizeof(*rel_addr)); 05253 if(!rel_addr) goto free; 05254 *rel_addr = *ins->src[i].reg.rel_addr; 05255 src_param[i].reg.rel_addr = rel_addr; 05256 } 05257 } 05258 rec->ins.src = src_param; 05259 list_add_tail(list, &rec->entry); 05260 return; 05261 05262 free: 05263 ERR("Out of memory\n"); 05264 if(dst_param) 05265 { 05266 HeapFree(GetProcessHeap(), 0, (void *) dst_param->reg.rel_addr); 05267 HeapFree(GetProcessHeap(), 0, dst_param); 05268 } 05269 if(src_param) 05270 { 05271 for(i = 0; i < ins->src_count; i++) 05272 { 05273 HeapFree(GetProcessHeap(), 0, (void *) src_param[i].reg.rel_addr); 05274 } 05275 HeapFree(GetProcessHeap(), 0, src_param); 05276 } 05277 HeapFree(GetProcessHeap(), 0, rec); 05278 } 05279 05280 static void free_recorded_instruction(struct list *list) 05281 { 05282 struct recorded_instruction *rec_ins, *entry2; 05283 unsigned int i; 05284 05285 LIST_FOR_EACH_ENTRY_SAFE(rec_ins, entry2, list, struct recorded_instruction, entry) 05286 { 05287 list_remove(&rec_ins->entry); 05288 if(rec_ins->ins.dst) 05289 { 05290 HeapFree(GetProcessHeap(), 0, (void *) rec_ins->ins.dst->reg.rel_addr); 05291 HeapFree(GetProcessHeap(), 0, (void *) rec_ins->ins.dst); 05292 } 05293 if(rec_ins->ins.src) 05294 { 05295 for(i = 0; i < rec_ins->ins.src_count; i++) 05296 { 05297 HeapFree(GetProcessHeap(), 0, (void *) rec_ins->ins.src[i].reg.rel_addr); 05298 } 05299 HeapFree(GetProcessHeap(), 0, (void *) rec_ins->ins.src); 05300 } 05301 HeapFree(GetProcessHeap(), 0, rec_ins); 05302 } 05303 } 05304 05305 static void shader_arb_handle_instruction(const struct wined3d_shader_instruction *ins) { 05306 SHADER_HANDLER hw_fct; 05307 struct shader_arb_ctx_priv *priv = ins->ctx->backend_data; 05308 const struct wined3d_shader *shader = ins->ctx->shader; 05309 struct control_frame *control_frame; 05310 struct wined3d_shader_buffer *buffer = ins->ctx->buffer; 05311 BOOL bool_const; 05312 05313 if(ins->handler_idx == WINED3DSIH_LOOP || ins->handler_idx == WINED3DSIH_REP) 05314 { 05315 control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame)); 05316 list_add_head(&priv->control_frames, &control_frame->entry); 05317 05318 if(ins->handler_idx == WINED3DSIH_LOOP) control_frame->type = LOOP; 05319 if(ins->handler_idx == WINED3DSIH_REP) control_frame->type = REP; 05320 05321 if(priv->target_version >= NV2) 05322 { 05323 control_frame->no.loop = priv->num_loops++; 05324 priv->loop_depth++; 05325 } 05326 else 05327 { 05328 /* Don't bother recording when we're in a not used if branch */ 05329 if(priv->muted) 05330 { 05331 return; 05332 } 05333 05334 if(!priv->recording) 05335 { 05336 list_init(&priv->record); 05337 priv->recording = TRUE; 05338 control_frame->outer_loop = TRUE; 05339 get_loop_control_const(ins, shader, ins->src[0].reg.idx, &control_frame->loop_control); 05340 return; /* Instruction is handled */ 05341 } 05342 /* Record this loop in the outer loop's recording */ 05343 } 05344 } 05345 else if(ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP) 05346 { 05347 if(priv->target_version >= NV2) 05348 { 05349 /* Nothing to do. The control frame is popped after the HW instr handler */ 05350 } 05351 else 05352 { 05353 struct list *e = list_head(&priv->control_frames); 05354 control_frame = LIST_ENTRY(e, struct control_frame, entry); 05355 list_remove(&control_frame->entry); 05356 05357 if(control_frame->outer_loop) 05358 { 05359 unsigned int iteration; 05360 int aL = 0; 05361 struct list copy; 05362 05363 /* Turn off recording before playback */ 05364 priv->recording = FALSE; 05365 05366 /* Move the recorded instructions to a separate list and get them out of the private data 05367 * structure. If there are nested loops, the shader_arb_handle_instruction below will 05368 * be recorded again, thus priv->record might be overwritten 05369 */ 05370 list_init(©); 05371 list_move_tail(©, &priv->record); 05372 list_init(&priv->record); 05373 05374 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 05375 { 05376 shader_addline(buffer, "#unrolling loop: %u iterations, aL=%u, inc %d\n", 05377 control_frame->loop_control.count, control_frame->loop_control.start, 05378 control_frame->loop_control.step); 05379 aL = control_frame->loop_control.start; 05380 } 05381 else 05382 { 05383 shader_addline(buffer, "#unrolling rep: %u iterations\n", control_frame->loop_control.count); 05384 } 05385 05386 for (iteration = 0; iteration < control_frame->loop_control.count; ++iteration) 05387 { 05388 struct recorded_instruction *rec_ins; 05389 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 05390 { 05391 priv->aL = aL; 05392 shader_addline(buffer, "#Iteration %u, aL=%d\n", iteration, aL); 05393 } 05394 else 05395 { 05396 shader_addline(buffer, "#Iteration %u\n", iteration); 05397 } 05398 05399 LIST_FOR_EACH_ENTRY(rec_ins, ©, struct recorded_instruction, entry) 05400 { 05401 shader_arb_handle_instruction(&rec_ins->ins); 05402 } 05403 05404 if(ins->handler_idx == WINED3DSIH_ENDLOOP) 05405 { 05406 aL += control_frame->loop_control.step; 05407 } 05408 } 05409 shader_addline(buffer, "#end loop/rep\n"); 05410 05411 free_recorded_instruction(©); 05412 HeapFree(GetProcessHeap(), 0, control_frame); 05413 return; /* Instruction is handled */ 05414 } 05415 else 05416 { 05417 /* This is a nested loop. Proceed to the normal recording function */ 05418 HeapFree(GetProcessHeap(), 0, control_frame); 05419 } 05420 } 05421 } 05422 05423 if(priv->recording) 05424 { 05425 record_instruction(&priv->record, ins); 05426 return; 05427 } 05428 05429 /* boolean if */ 05430 if(ins->handler_idx == WINED3DSIH_IF) 05431 { 05432 control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame)); 05433 list_add_head(&priv->control_frames, &control_frame->entry); 05434 control_frame->type = IF; 05435 05436 bool_const = get_bool_const(ins, shader, ins->src[0].reg.idx); 05437 if(ins->src[0].modifiers == WINED3DSPSM_NOT) bool_const = !bool_const; 05438 if (!priv->muted && !bool_const) 05439 { 05440 shader_addline(buffer, "#if(FALSE){\n"); 05441 priv->muted = TRUE; 05442 control_frame->muting = TRUE; 05443 } 05444 else shader_addline(buffer, "#if(TRUE) {\n"); 05445 05446 return; /* Instruction is handled */ 05447 } 05448 else if(ins->handler_idx == WINED3DSIH_IFC) 05449 { 05450 /* IF(bool) and if_cond(a, b) use the same ELSE and ENDIF tokens */ 05451 control_frame = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*control_frame)); 05452 control_frame->type = IFC; 05453 control_frame->no.ifc = priv->num_ifcs++; 05454 list_add_head(&priv->control_frames, &control_frame->entry); 05455 } 05456 else if(ins->handler_idx == WINED3DSIH_ELSE) 05457 { 05458 struct list *e = list_head(&priv->control_frames); 05459 control_frame = LIST_ENTRY(e, struct control_frame, entry); 05460 05461 if(control_frame->type == IF) 05462 { 05463 shader_addline(buffer, "#} else {\n"); 05464 if(!priv->muted && !control_frame->muting) 05465 { 05466 priv->muted = TRUE; 05467 control_frame->muting = TRUE; 05468 } 05469 else if(control_frame->muting) priv->muted = FALSE; 05470 return; /* Instruction is handled. */ 05471 } 05472 /* In case of an ifc, generate a HW shader instruction */ 05473 } 05474 else if(ins->handler_idx == WINED3DSIH_ENDIF) 05475 { 05476 struct list *e = list_head(&priv->control_frames); 05477 control_frame = LIST_ENTRY(e, struct control_frame, entry); 05478 05479 if(control_frame->type == IF) 05480 { 05481 shader_addline(buffer, "#} endif\n"); 05482 if(control_frame->muting) priv->muted = FALSE; 05483 list_remove(&control_frame->entry); 05484 HeapFree(GetProcessHeap(), 0, control_frame); 05485 return; /* Instruction is handled */ 05486 } 05487 } 05488 05489 if(priv->muted) return; 05490 05491 /* Select handler */ 05492 hw_fct = shader_arb_instruction_handler_table[ins->handler_idx]; 05493 05494 /* Unhandled opcode */ 05495 if (!hw_fct) 05496 { 05497 FIXME("Backend can't handle opcode %#x\n", ins->handler_idx); 05498 return; 05499 } 05500 hw_fct(ins); 05501 05502 if(ins->handler_idx == WINED3DSIH_ENDLOOP || ins->handler_idx == WINED3DSIH_ENDREP) 05503 { 05504 struct list *e = list_head(&priv->control_frames); 05505 control_frame = LIST_ENTRY(e, struct control_frame, entry); 05506 list_remove(&control_frame->entry); 05507 HeapFree(GetProcessHeap(), 0, control_frame); 05508 priv->loop_depth--; 05509 } 05510 else if(ins->handler_idx == WINED3DSIH_ENDIF) 05511 { 05512 /* Non-ifc ENDIFs don't reach that place because of the return in the if block above */ 05513 struct list *e = list_head(&priv->control_frames); 05514 control_frame = LIST_ENTRY(e, struct control_frame, entry); 05515 list_remove(&control_frame->entry); 05516 HeapFree(GetProcessHeap(), 0, control_frame); 05517 } 05518 05519 05520 shader_arb_add_instruction_modifiers(ins); 05521 } 05522 05523 const struct wined3d_shader_backend_ops arb_program_shader_backend = 05524 { 05525 shader_arb_handle_instruction, 05526 shader_arb_select, 05527 shader_arb_select_depth_blt, 05528 shader_arb_deselect_depth_blt, 05529 shader_arb_update_float_vertex_constants, 05530 shader_arb_update_float_pixel_constants, 05531 shader_arb_load_constants, 05532 shader_arb_load_np2fixup_constants, 05533 shader_arb_destroy, 05534 shader_arb_alloc, 05535 shader_arb_free, 05536 shader_arb_context_destroyed, 05537 shader_arb_get_caps, 05538 shader_arb_color_fixup_supported, 05539 }; 05540 05541 /* ARB_fragment_program fixed function pipeline replacement definitions */ 05542 #define ARB_FFP_CONST_TFACTOR 0 05543 #define ARB_FFP_CONST_SPECULAR_ENABLE ((ARB_FFP_CONST_TFACTOR) + 1) 05544 #define ARB_FFP_CONST_CONSTANT(i) ((ARB_FFP_CONST_SPECULAR_ENABLE) + 1 + i) 05545 #define ARB_FFP_CONST_BUMPMAT(i) ((ARB_FFP_CONST_CONSTANT(7)) + 1 + i) 05546 #define ARB_FFP_CONST_LUMINANCE(i) ((ARB_FFP_CONST_BUMPMAT(7)) + 1 + i) 05547 05548 struct arbfp_ffp_desc 05549 { 05550 struct ffp_frag_desc parent; 05551 GLuint shader; 05552 unsigned int num_textures_used; 05553 }; 05554 05555 /* Context activation and GL locking are done by the caller. */ 05556 static void arbfp_enable(BOOL enable) 05557 { 05558 if(enable) { 05559 glEnable(GL_FRAGMENT_PROGRAM_ARB); 05560 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)"); 05561 } else { 05562 glDisable(GL_FRAGMENT_PROGRAM_ARB); 05563 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 05564 } 05565 } 05566 05567 static HRESULT arbfp_alloc(struct wined3d_device *device) 05568 { 05569 struct shader_arb_priv *priv; 05570 /* Share private data between the shader backend and the pipeline replacement, if both 05571 * are the arb implementation. This is needed to figure out whether ARBfp should be disabled 05572 * if no pixel shader is bound or not 05573 */ 05574 if (device->shader_backend == &arb_program_shader_backend) 05575 { 05576 device->fragment_priv = device->shader_priv; 05577 } 05578 else 05579 { 05580 device->fragment_priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_arb_priv)); 05581 if (!device->fragment_priv) return E_OUTOFMEMORY; 05582 } 05583 priv = device->fragment_priv; 05584 if (wine_rb_init(&priv->fragment_shaders, &wined3d_ffp_frag_program_rb_functions) == -1) 05585 { 05586 ERR("Failed to initialize rbtree.\n"); 05587 HeapFree(GetProcessHeap(), 0, device->fragment_priv); 05588 return E_OUTOFMEMORY; 05589 } 05590 priv->use_arbfp_fixed_func = TRUE; 05591 return WINED3D_OK; 05592 } 05593 05594 /* Context activation is done by the caller. */ 05595 static void arbfp_free_ffpshader(struct wine_rb_entry *entry, void *context) 05596 { 05597 const struct wined3d_gl_info *gl_info = context; 05598 struct arbfp_ffp_desc *entry_arb = WINE_RB_ENTRY_VALUE(entry, struct arbfp_ffp_desc, parent.entry); 05599 05600 ENTER_GL(); 05601 GL_EXTCALL(glDeleteProgramsARB(1, &entry_arb->shader)); 05602 checkGLcall("glDeleteProgramsARB(1, &entry_arb->shader)"); 05603 HeapFree(GetProcessHeap(), 0, entry_arb); 05604 LEAVE_GL(); 05605 } 05606 05607 /* Context activation is done by the caller. */ 05608 static void arbfp_free(struct wined3d_device *device) 05609 { 05610 struct shader_arb_priv *priv = device->fragment_priv; 05611 05612 wine_rb_destroy(&priv->fragment_shaders, arbfp_free_ffpshader, &device->adapter->gl_info); 05613 priv->use_arbfp_fixed_func = FALSE; 05614 05615 if (device->shader_backend != &arb_program_shader_backend) 05616 { 05617 HeapFree(GetProcessHeap(), 0, device->fragment_priv); 05618 } 05619 } 05620 05621 static void arbfp_get_caps(const struct wined3d_gl_info *gl_info, struct fragment_caps *caps) 05622 { 05623 caps->PrimitiveMiscCaps = WINED3DPMISCCAPS_TSSARGTEMP; 05624 caps->TextureOpCaps = WINED3DTEXOPCAPS_DISABLE | 05625 WINED3DTEXOPCAPS_SELECTARG1 | 05626 WINED3DTEXOPCAPS_SELECTARG2 | 05627 WINED3DTEXOPCAPS_MODULATE4X | 05628 WINED3DTEXOPCAPS_MODULATE2X | 05629 WINED3DTEXOPCAPS_MODULATE | 05630 WINED3DTEXOPCAPS_ADDSIGNED2X | 05631 WINED3DTEXOPCAPS_ADDSIGNED | 05632 WINED3DTEXOPCAPS_ADD | 05633 WINED3DTEXOPCAPS_SUBTRACT | 05634 WINED3DTEXOPCAPS_ADDSMOOTH | 05635 WINED3DTEXOPCAPS_BLENDCURRENTALPHA | 05636 WINED3DTEXOPCAPS_BLENDFACTORALPHA | 05637 WINED3DTEXOPCAPS_BLENDTEXTUREALPHA | 05638 WINED3DTEXOPCAPS_BLENDDIFFUSEALPHA | 05639 WINED3DTEXOPCAPS_BLENDTEXTUREALPHAPM | 05640 WINED3DTEXOPCAPS_MODULATEALPHA_ADDCOLOR | 05641 WINED3DTEXOPCAPS_MODULATECOLOR_ADDALPHA | 05642 WINED3DTEXOPCAPS_MODULATEINVCOLOR_ADDALPHA | 05643 WINED3DTEXOPCAPS_MODULATEINVALPHA_ADDCOLOR | 05644 WINED3DTEXOPCAPS_DOTPRODUCT3 | 05645 WINED3DTEXOPCAPS_MULTIPLYADD | 05646 WINED3DTEXOPCAPS_LERP | 05647 WINED3DTEXOPCAPS_BUMPENVMAP | 05648 WINED3DTEXOPCAPS_BUMPENVMAPLUMINANCE; 05649 05650 /* TODO: Implement WINED3DTEXOPCAPS_PREMODULATE */ 05651 05652 caps->MaxTextureBlendStages = 8; 05653 caps->MaxSimultaneousTextures = min(gl_info->limits.fragment_samplers, 8); 05654 } 05655 05656 static void state_texfactor_arbfp(struct wined3d_context *context, 05657 const struct wined3d_state *state, DWORD state_id) 05658 { 05659 struct wined3d_device *device = context->swapchain->device; 05660 const struct wined3d_gl_info *gl_info = context->gl_info; 05661 float col[4]; 05662 05663 /* Don't load the parameter if we're using an arbfp pixel shader, 05664 * otherwise we'll overwrite application provided constants. */ 05665 if (device->shader_backend == &arb_program_shader_backend) 05666 { 05667 struct shader_arb_priv *priv; 05668 05669 if (use_ps(state)) return; 05670 05671 priv = device->shader_priv; 05672 priv->pshader_const_dirty[ARB_FFP_CONST_TFACTOR] = 1; 05673 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_TFACTOR + 1); 05674 } 05675 05676 D3DCOLORTOGLFLOAT4(state->render_states[WINED3D_RS_TEXTUREFACTOR], col); 05677 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, col)); 05678 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_TFACTOR, col)"); 05679 } 05680 05681 static void state_arb_specularenable(struct wined3d_context *context, 05682 const struct wined3d_state *state, DWORD state_id) 05683 { 05684 struct wined3d_device *device = context->swapchain->device; 05685 const struct wined3d_gl_info *gl_info = context->gl_info; 05686 float col[4]; 05687 05688 /* Don't load the parameter if we're using an arbfp pixel shader, otherwise we'll overwrite 05689 * application provided constants 05690 */ 05691 if (device->shader_backend == &arb_program_shader_backend) 05692 { 05693 struct shader_arb_priv *priv; 05694 05695 if (use_ps(state)) return; 05696 05697 priv = device->shader_priv; 05698 priv->pshader_const_dirty[ARB_FFP_CONST_SPECULAR_ENABLE] = 1; 05699 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_SPECULAR_ENABLE + 1); 05700 } 05701 05702 if (state->render_states[WINED3D_RS_SPECULARENABLE]) 05703 { 05704 /* The specular color has no alpha */ 05705 col[0] = 1.0f; col[1] = 1.0f; 05706 col[2] = 1.0f; col[3] = 0.0f; 05707 } else { 05708 col[0] = 0.0f; col[1] = 0.0f; 05709 col[2] = 0.0f; col[3] = 0.0f; 05710 } 05711 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)); 05712 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_SPECULAR_ENABLE, col)"); 05713 } 05714 05715 static void set_bumpmat_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 05716 { 05717 DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1); 05718 struct wined3d_device *device = context->swapchain->device; 05719 const struct wined3d_gl_info *gl_info = context->gl_info; 05720 float mat[2][2]; 05721 05722 if (use_ps(state)) 05723 { 05724 if (stage && (state->pixel_shader->reg_maps.bumpmat & (1 << stage))) 05725 { 05726 /* The pixel shader has to know the bump env matrix. Do a constants update if it isn't scheduled 05727 * anyway 05728 */ 05729 if (!isStateDirty(context, STATE_PIXELSHADERCONSTANT)) 05730 context_apply_state(context, state, STATE_PIXELSHADERCONSTANT); 05731 } 05732 05733 if(device->shader_backend == &arb_program_shader_backend) { 05734 /* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants */ 05735 return; 05736 } 05737 } 05738 else if (device->shader_backend == &arb_program_shader_backend) 05739 { 05740 struct shader_arb_priv *priv = device->shader_priv; 05741 priv->pshader_const_dirty[ARB_FFP_CONST_BUMPMAT(stage)] = 1; 05742 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_BUMPMAT(stage) + 1); 05743 } 05744 05745 mat[0][0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT00]); 05746 mat[0][1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT01]); 05747 mat[1][0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT10]); 05748 mat[1][1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_MAT11]); 05749 05750 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])); 05751 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_BUMPMAT(stage), &mat[0][0])"); 05752 } 05753 05754 static void tex_bumpenvlum_arbfp(struct wined3d_context *context, 05755 const struct wined3d_state *state, DWORD state_id) 05756 { 05757 DWORD stage = (state_id - STATE_TEXTURESTAGE(0, 0)) / (WINED3D_HIGHEST_TEXTURE_STATE + 1); 05758 struct wined3d_device *device = context->swapchain->device; 05759 const struct wined3d_gl_info *gl_info = context->gl_info; 05760 float param[4]; 05761 05762 if (use_ps(state)) 05763 { 05764 if (stage && (state->pixel_shader->reg_maps.luminanceparams & (1 << stage))) 05765 { 05766 /* The pixel shader has to know the luminance offset. Do a constants update if it 05767 * isn't scheduled anyway 05768 */ 05769 if (!isStateDirty(context, STATE_PIXELSHADERCONSTANT)) 05770 context_apply_state(context, state, STATE_PIXELSHADERCONSTANT); 05771 } 05772 05773 if(device->shader_backend == &arb_program_shader_backend) { 05774 /* Exit now, don't set the bumpmat below, otherwise we may overwrite pixel shader constants */ 05775 return; 05776 } 05777 } 05778 else if (device->shader_backend == &arb_program_shader_backend) 05779 { 05780 struct shader_arb_priv *priv = device->shader_priv; 05781 priv->pshader_const_dirty[ARB_FFP_CONST_LUMINANCE(stage)] = 1; 05782 priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, ARB_FFP_CONST_LUMINANCE(stage) + 1); 05783 } 05784 05785 param[0] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_LSCALE]); 05786 param[1] = *((float *)&state->texture_states[stage][WINED3D_TSS_BUMPENV_LOFFSET]); 05787 param[2] = 0.0f; 05788 param[3] = 0.0f; 05789 05790 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)); 05791 checkGLcall("glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, ARB_FFP_CONST_LUMINANCE(stage), param)"); 05792 } 05793 05794 static const char *get_argreg(struct wined3d_shader_buffer *buffer, DWORD argnum, unsigned int stage, DWORD arg) 05795 { 05796 const char *ret; 05797 05798 if(arg == ARG_UNUSED) return "unused"; /* This is the marker for unused registers */ 05799 05800 switch(arg & WINED3DTA_SELECTMASK) { 05801 case WINED3DTA_DIFFUSE: 05802 ret = "fragment.color.primary"; break; 05803 05804 case WINED3DTA_CURRENT: 05805 if (!stage) ret = "fragment.color.primary"; 05806 else ret = "ret"; 05807 break; 05808 05809 case WINED3DTA_TEXTURE: 05810 switch(stage) { 05811 case 0: ret = "tex0"; break; 05812 case 1: ret = "tex1"; break; 05813 case 2: ret = "tex2"; break; 05814 case 3: ret = "tex3"; break; 05815 case 4: ret = "tex4"; break; 05816 case 5: ret = "tex5"; break; 05817 case 6: ret = "tex6"; break; 05818 case 7: ret = "tex7"; break; 05819 default: ret = "unknown texture"; 05820 } 05821 break; 05822 05823 case WINED3DTA_TFACTOR: 05824 ret = "tfactor"; break; 05825 05826 case WINED3DTA_SPECULAR: 05827 ret = "fragment.color.secondary"; break; 05828 05829 case WINED3DTA_TEMP: 05830 ret = "tempreg"; break; 05831 05832 case WINED3DTA_CONSTANT: 05833 FIXME("Implement perstage constants\n"); 05834 switch(stage) { 05835 case 0: ret = "const0"; break; 05836 case 1: ret = "const1"; break; 05837 case 2: ret = "const2"; break; 05838 case 3: ret = "const3"; break; 05839 case 4: ret = "const4"; break; 05840 case 5: ret = "const5"; break; 05841 case 6: ret = "const6"; break; 05842 case 7: ret = "const7"; break; 05843 default: ret = "unknown constant"; 05844 } 05845 break; 05846 05847 default: 05848 return "unknown"; 05849 } 05850 05851 if(arg & WINED3DTA_COMPLEMENT) { 05852 shader_addline(buffer, "SUB arg%u, const.x, %s;\n", argnum, ret); 05853 if(argnum == 0) ret = "arg0"; 05854 if(argnum == 1) ret = "arg1"; 05855 if(argnum == 2) ret = "arg2"; 05856 } 05857 if(arg & WINED3DTA_ALPHAREPLICATE) { 05858 shader_addline(buffer, "MOV arg%u, %s.w;\n", argnum, ret); 05859 if(argnum == 0) ret = "arg0"; 05860 if(argnum == 1) ret = "arg1"; 05861 if(argnum == 2) ret = "arg2"; 05862 } 05863 return ret; 05864 } 05865 05866 static void gen_ffp_instr(struct wined3d_shader_buffer *buffer, unsigned int stage, BOOL color, 05867 BOOL alpha, DWORD dst, DWORD op, DWORD dw_arg0, DWORD dw_arg1, DWORD dw_arg2) 05868 { 05869 const char *dstmask, *dstreg, *arg0, *arg1, *arg2; 05870 unsigned int mul = 1; 05871 BOOL mul_final_dest = FALSE; 05872 05873 if(color && alpha) dstmask = ""; 05874 else if(color) dstmask = ".xyz"; 05875 else dstmask = ".w"; 05876 05877 if(dst == tempreg) dstreg = "tempreg"; 05878 else dstreg = "ret"; 05879 05880 arg0 = get_argreg(buffer, 0, stage, dw_arg0); 05881 arg1 = get_argreg(buffer, 1, stage, dw_arg1); 05882 arg2 = get_argreg(buffer, 2, stage, dw_arg2); 05883 05884 switch (op) 05885 { 05886 case WINED3D_TOP_DISABLE: 05887 if (!stage) 05888 shader_addline(buffer, "MOV %s%s, fragment.color.primary;\n", dstreg, dstmask); 05889 break; 05890 05891 case WINED3D_TOP_SELECT_ARG2: 05892 arg1 = arg2; 05893 /* FALLTHROUGH */ 05894 case WINED3D_TOP_SELECT_ARG1: 05895 shader_addline(buffer, "MOV %s%s, %s;\n", dstreg, dstmask, arg1); 05896 break; 05897 05898 case WINED3D_TOP_MODULATE_4X: 05899 mul = 2; 05900 /* FALLTHROUGH */ 05901 case WINED3D_TOP_MODULATE_2X: 05902 mul *= 2; 05903 if (!strcmp(dstreg, "result.color")) 05904 { 05905 dstreg = "ret"; 05906 mul_final_dest = TRUE; 05907 } 05908 /* FALLTHROUGH */ 05909 case WINED3D_TOP_MODULATE: 05910 shader_addline(buffer, "MUL %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 05911 break; 05912 05913 case WINED3D_TOP_ADD_SIGNED_2X: 05914 mul = 2; 05915 if (!strcmp(dstreg, "result.color")) 05916 { 05917 dstreg = "ret"; 05918 mul_final_dest = TRUE; 05919 } 05920 /* FALLTHROUGH */ 05921 case WINED3D_TOP_ADD_SIGNED: 05922 shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2); 05923 arg2 = "arg2"; 05924 /* FALLTHROUGH */ 05925 case WINED3D_TOP_ADD: 05926 shader_addline(buffer, "ADD_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 05927 break; 05928 05929 case WINED3D_TOP_SUBTRACT: 05930 shader_addline(buffer, "SUB_SAT %s%s, %s, %s;\n", dstreg, dstmask, arg1, arg2); 05931 break; 05932 05933 case WINED3D_TOP_ADD_SMOOTH: 05934 shader_addline(buffer, "SUB arg1, const.x, %s;\n", arg1); 05935 shader_addline(buffer, "MAD_SAT %s%s, arg1, %s, %s;\n", dstreg, dstmask, arg2, arg1); 05936 break; 05937 05938 case WINED3D_TOP_BLEND_CURRENT_ALPHA: 05939 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_CURRENT); 05940 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 05941 break; 05942 case WINED3D_TOP_BLEND_FACTOR_ALPHA: 05943 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TFACTOR); 05944 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 05945 break; 05946 case WINED3D_TOP_BLEND_TEXTURE_ALPHA: 05947 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE); 05948 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 05949 break; 05950 case WINED3D_TOP_BLEND_DIFFUSE_ALPHA: 05951 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_DIFFUSE); 05952 shader_addline(buffer, "LRP %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 05953 break; 05954 05955 case WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM: 05956 arg0 = get_argreg(buffer, 0, stage, WINED3DTA_TEXTURE); 05957 shader_addline(buffer, "SUB arg0.w, const.x, %s.w;\n", arg0); 05958 shader_addline(buffer, "MAD_SAT %s%s, %s, arg0.w, %s;\n", dstreg, dstmask, arg2, arg1); 05959 break; 05960 05961 /* D3DTOP_PREMODULATE ???? */ 05962 05963 case WINED3D_TOP_MODULATE_INVALPHA_ADD_COLOR: 05964 shader_addline(buffer, "SUB arg0.w, const.x, %s;\n", arg1); 05965 shader_addline(buffer, "MAD_SAT %s%s, arg0.w, %s, %s;\n", dstreg, dstmask, arg2, arg1); 05966 break; 05967 case WINED3D_TOP_MODULATE_ALPHA_ADD_COLOR: 05968 shader_addline(buffer, "MAD_SAT %s%s, %s.w, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg1); 05969 break; 05970 case WINED3D_TOP_MODULATE_INVCOLOR_ADD_ALPHA: 05971 shader_addline(buffer, "SUB arg0, const.x, %s;\n", arg1); 05972 shader_addline(buffer, "MAD_SAT %s%s, arg0, %s, %s.w;\n", dstreg, dstmask, arg2, arg1); 05973 break; 05974 case WINED3D_TOP_MODULATE_COLOR_ADD_ALPHA: 05975 shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s.w;\n", dstreg, dstmask, arg1, arg2, arg1); 05976 break; 05977 05978 case WINED3D_TOP_DOTPRODUCT3: 05979 mul = 4; 05980 if (!strcmp(dstreg, "result.color")) 05981 { 05982 dstreg = "ret"; 05983 mul_final_dest = TRUE; 05984 } 05985 shader_addline(buffer, "SUB arg1, %s, const.w;\n", arg1); 05986 shader_addline(buffer, "SUB arg2, %s, const.w;\n", arg2); 05987 shader_addline(buffer, "DP3_SAT %s%s, arg1, arg2;\n", dstreg, dstmask); 05988 break; 05989 05990 case WINED3D_TOP_MULTIPLY_ADD: 05991 shader_addline(buffer, "MAD_SAT %s%s, %s, %s, %s;\n", dstreg, dstmask, arg1, arg2, arg0); 05992 break; 05993 05994 case WINED3D_TOP_LERP: 05995 /* The msdn is not quite right here */ 05996 shader_addline(buffer, "LRP %s%s, %s, %s, %s;\n", dstreg, dstmask, arg0, arg1, arg2); 05997 break; 05998 05999 case WINED3D_TOP_BUMPENVMAP: 06000 case WINED3D_TOP_BUMPENVMAP_LUMINANCE: 06001 /* Those are handled in the first pass of the shader(generation pass 1 and 2) already */ 06002 break; 06003 06004 default: 06005 FIXME("Unhandled texture op %08x\n", op); 06006 } 06007 06008 if(mul == 2) { 06009 shader_addline(buffer, "MUL_SAT %s%s, %s, const.y;\n", mul_final_dest ? "result.color" : dstreg, dstmask, dstreg); 06010 } else if(mul == 4) { 06011 shader_addline(buffer, "MUL_SAT %s%s, %s, const.z;\n", mul_final_dest ? "result.color" : dstreg, dstmask, dstreg); 06012 } 06013 } 06014 06015 static GLuint gen_arbfp_ffp_shader(const struct ffp_frag_settings *settings, const struct wined3d_gl_info *gl_info) 06016 { 06017 unsigned int stage; 06018 struct wined3d_shader_buffer buffer; 06019 BOOL tex_read[MAX_TEXTURES] = {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE}; 06020 BOOL bump_used[MAX_TEXTURES] = {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE}; 06021 BOOL luminance_used[MAX_TEXTURES] = {FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE}; 06022 UINT lowest_disabled_stage; 06023 const char *textype; 06024 const char *instr, *sat; 06025 char colorcor_dst[8]; 06026 GLuint ret; 06027 DWORD arg0, arg1, arg2; 06028 BOOL tempreg_used = FALSE, tfactor_used = FALSE; 06029 BOOL op_equal; 06030 const char *final_combiner_src = "ret"; 06031 GLint pos; 06032 06033 /* Find out which textures are read */ 06034 for (stage = 0; stage < MAX_TEXTURES; ++stage) 06035 { 06036 if (settings->op[stage].cop == WINED3D_TOP_DISABLE) 06037 break; 06038 arg0 = settings->op[stage].carg0 & WINED3DTA_SELECTMASK; 06039 arg1 = settings->op[stage].carg1 & WINED3DTA_SELECTMASK; 06040 arg2 = settings->op[stage].carg2 & WINED3DTA_SELECTMASK; 06041 if(arg0 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE; 06042 if(arg1 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE; 06043 if(arg2 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE; 06044 06045 if (settings->op[stage].cop == WINED3D_TOP_BLEND_TEXTURE_ALPHA) 06046 tex_read[stage] = TRUE; 06047 if (settings->op[stage].cop == WINED3D_TOP_BLEND_TEXTURE_ALPHA_PM) 06048 tex_read[stage] = TRUE; 06049 if (settings->op[stage].cop == WINED3D_TOP_BUMPENVMAP) 06050 { 06051 bump_used[stage] = TRUE; 06052 tex_read[stage] = TRUE; 06053 } 06054 if (settings->op[stage].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) 06055 { 06056 bump_used[stage] = TRUE; 06057 tex_read[stage] = TRUE; 06058 luminance_used[stage] = TRUE; 06059 } 06060 else if (settings->op[stage].cop == WINED3D_TOP_BLEND_FACTOR_ALPHA) 06061 { 06062 tfactor_used = TRUE; 06063 } 06064 06065 if(arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR) { 06066 tfactor_used = TRUE; 06067 } 06068 06069 if(settings->op[stage].dst == tempreg) tempreg_used = TRUE; 06070 if(arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP) { 06071 tempreg_used = TRUE; 06072 } 06073 06074 if (settings->op[stage].aop == WINED3D_TOP_DISABLE) 06075 continue; 06076 arg0 = settings->op[stage].aarg0 & WINED3DTA_SELECTMASK; 06077 arg1 = settings->op[stage].aarg1 & WINED3DTA_SELECTMASK; 06078 arg2 = settings->op[stage].aarg2 & WINED3DTA_SELECTMASK; 06079 if(arg0 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE; 06080 if(arg1 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE; 06081 if(arg2 == WINED3DTA_TEXTURE) tex_read[stage] = TRUE; 06082 06083 if(arg0 == WINED3DTA_TEMP || arg1 == WINED3DTA_TEMP || arg2 == WINED3DTA_TEMP) { 06084 tempreg_used = TRUE; 06085 } 06086 if(arg0 == WINED3DTA_TFACTOR || arg1 == WINED3DTA_TFACTOR || arg2 == WINED3DTA_TFACTOR) { 06087 tfactor_used = TRUE; 06088 } 06089 } 06090 lowest_disabled_stage = stage; 06091 06092 /* Shader header */ 06093 if (!shader_buffer_init(&buffer)) 06094 { 06095 ERR("Failed to initialize shader buffer.\n"); 06096 return 0; 06097 } 06098 06099 shader_addline(&buffer, "!!ARBfp1.0\n"); 06100 06101 switch(settings->fog) { 06102 case FOG_OFF: break; 06103 case FOG_LINEAR: shader_addline(&buffer, "OPTION ARB_fog_linear;\n"); break; 06104 case FOG_EXP: shader_addline(&buffer, "OPTION ARB_fog_exp;\n"); break; 06105 case FOG_EXP2: shader_addline(&buffer, "OPTION ARB_fog_exp2;\n"); break; 06106 default: FIXME("Unexpected fog setting %d\n", settings->fog); 06107 } 06108 06109 shader_addline(&buffer, "PARAM const = {1, 2, 4, 0.5};\n"); 06110 shader_addline(&buffer, "TEMP TMP;\n"); 06111 shader_addline(&buffer, "TEMP ret;\n"); 06112 if(tempreg_used || settings->sRGB_write) shader_addline(&buffer, "TEMP tempreg;\n"); 06113 shader_addline(&buffer, "TEMP arg0;\n"); 06114 shader_addline(&buffer, "TEMP arg1;\n"); 06115 shader_addline(&buffer, "TEMP arg2;\n"); 06116 for(stage = 0; stage < MAX_TEXTURES; stage++) { 06117 if(!tex_read[stage]) continue; 06118 shader_addline(&buffer, "TEMP tex%u;\n", stage); 06119 if(!bump_used[stage]) continue; 06120 shader_addline(&buffer, "PARAM bumpmat%u = program.env[%u];\n", stage, ARB_FFP_CONST_BUMPMAT(stage)); 06121 if(!luminance_used[stage]) continue; 06122 shader_addline(&buffer, "PARAM luminance%u = program.env[%u];\n", stage, ARB_FFP_CONST_LUMINANCE(stage)); 06123 } 06124 if(tfactor_used) { 06125 shader_addline(&buffer, "PARAM tfactor = program.env[%u];\n", ARB_FFP_CONST_TFACTOR); 06126 } 06127 shader_addline(&buffer, "PARAM specular_enable = program.env[%u];\n", ARB_FFP_CONST_SPECULAR_ENABLE); 06128 06129 if(settings->sRGB_write) { 06130 shader_addline(&buffer, "PARAM srgb_consts1 = {%f, %f, %f, %f};\n", 06131 srgb_mul_low, srgb_cmp, srgb_pow, srgb_mul_high); 06132 shader_addline(&buffer, "PARAM srgb_consts2 = {%f, %f, %f, %f};\n", 06133 srgb_sub_high, 0.0, 0.0, 0.0); 06134 } 06135 06136 if (lowest_disabled_stage < 7 && settings->emul_clipplanes) 06137 shader_addline(&buffer, "KIL fragment.texcoord[7];\n"); 06138 06139 /* Generate texture sampling instructions) */ 06140 for (stage = 0; stage < MAX_TEXTURES && settings->op[stage].cop != WINED3D_TOP_DISABLE; ++stage) 06141 { 06142 if (!tex_read[stage]) 06143 continue; 06144 06145 switch(settings->op[stage].tex_type) { 06146 case tex_1d: textype = "1D"; break; 06147 case tex_2d: textype = "2D"; break; 06148 case tex_3d: textype = "3D"; break; 06149 case tex_cube: textype = "CUBE"; break; 06150 case tex_rect: textype = "RECT"; break; 06151 default: textype = "unexpected_textype"; break; 06152 } 06153 06154 if (settings->op[stage].cop == WINED3D_TOP_BUMPENVMAP 06155 || settings->op[stage].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) 06156 sat = ""; 06157 else 06158 sat = "_SAT"; 06159 06160 if(settings->op[stage].projected == proj_none) { 06161 instr = "TEX"; 06162 } else if(settings->op[stage].projected == proj_count4 || 06163 settings->op[stage].projected == proj_count3) { 06164 instr = "TXP"; 06165 } else { 06166 FIXME("Unexpected projection mode %d\n", settings->op[stage].projected); 06167 instr = "TXP"; 06168 } 06169 06170 if (stage > 0 06171 && (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP 06172 || settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE)) 06173 { 06174 shader_addline(&buffer, "SWZ arg1, bumpmat%u, x, z, 0, 0;\n", stage - 1); 06175 shader_addline(&buffer, "DP3 ret.x, arg1, tex%u;\n", stage - 1); 06176 shader_addline(&buffer, "SWZ arg1, bumpmat%u, y, w, 0, 0;\n", stage - 1); 06177 shader_addline(&buffer, "DP3 ret.y, arg1, tex%u;\n", stage - 1); 06178 06179 /* with projective textures, texbem only divides the static texture coord, not the displacement, 06180 * so multiply the displacement with the dividing parameter before passing it to TXP 06181 */ 06182 if (settings->op[stage].projected != proj_none) { 06183 if(settings->op[stage].projected == proj_count4) { 06184 shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].w;\n", stage); 06185 shader_addline(&buffer, "MUL ret.xyz, ret, fragment.texcoord[%u].w, fragment.texcoord[%u];\n", stage, stage); 06186 } else { 06187 shader_addline(&buffer, "MOV ret.w, fragment.texcoord[%u].z;\n", stage); 06188 shader_addline(&buffer, "MAD ret.xyz, ret, fragment.texcoord[%u].z, fragment.texcoord[%u];\n", stage, stage); 06189 } 06190 } else { 06191 shader_addline(&buffer, "ADD ret, ret, fragment.texcoord[%u];\n", stage); 06192 } 06193 06194 shader_addline(&buffer, "%s%s tex%u, ret, texture[%u], %s;\n", 06195 instr, sat, stage, stage, textype); 06196 if (settings->op[stage - 1].cop == WINED3D_TOP_BUMPENVMAP_LUMINANCE) 06197 { 06198 shader_addline(&buffer, "MAD_SAT ret.x, tex%u.z, luminance%u.x, luminance%u.y;\n", 06199 stage - 1, stage - 1, stage - 1); 06200 shader_addline(&buffer, "MUL tex%u, tex%u, ret.x;\n", stage, stage); 06201 } 06202 } else if(settings->op[stage].projected == proj_count3) { 06203 shader_addline(&buffer, "MOV ret, fragment.texcoord[%u];\n", stage); 06204 shader_addline(&buffer, "MOV ret.w, ret.z;\n"); 06205 shader_addline(&buffer, "%s%s tex%u, ret, texture[%u], %s;\n", 06206 instr, sat, stage, stage, textype); 06207 } else { 06208 shader_addline(&buffer, "%s%s tex%u, fragment.texcoord[%u], texture[%u], %s;\n", 06209 instr, sat, stage, stage, stage, textype); 06210 } 06211 06212 sprintf(colorcor_dst, "tex%u", stage); 06213 gen_color_correction(&buffer, colorcor_dst, WINED3DSP_WRITEMASK_ALL, "const.x", "const.y", 06214 settings->op[stage].color_fixup); 06215 } 06216 06217 /* Generate the main shader */ 06218 for (stage = 0; stage < MAX_TEXTURES; ++stage) 06219 { 06220 if (settings->op[stage].cop == WINED3D_TOP_DISABLE) 06221 { 06222 if (!stage) 06223 final_combiner_src = "fragment.color.primary"; 06224 break; 06225 } 06226 06227 if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1 06228 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1) 06229 op_equal = settings->op[stage].carg1 == settings->op[stage].aarg1; 06230 else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG1 06231 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2) 06232 op_equal = settings->op[stage].carg1 == settings->op[stage].aarg2; 06233 else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2 06234 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG1) 06235 op_equal = settings->op[stage].carg2 == settings->op[stage].aarg1; 06236 else if (settings->op[stage].cop == WINED3D_TOP_SELECT_ARG2 06237 && settings->op[stage].aop == WINED3D_TOP_SELECT_ARG2) 06238 op_equal = settings->op[stage].carg2 == settings->op[stage].aarg2; 06239 else 06240 op_equal = settings->op[stage].aop == settings->op[stage].cop 06241 && settings->op[stage].carg0 == settings->op[stage].aarg0 06242 && settings->op[stage].carg1 == settings->op[stage].aarg1 06243 && settings->op[stage].carg2 == settings->op[stage].aarg2; 06244 06245 if (settings->op[stage].aop == WINED3D_TOP_DISABLE) 06246 { 06247 gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst, 06248 settings->op[stage].cop, settings->op[stage].carg0, 06249 settings->op[stage].carg1, settings->op[stage].carg2); 06250 if (!stage) 06251 shader_addline(&buffer, "MOV ret.w, fragment.color.primary.w;\n"); 06252 } 06253 else if (op_equal) 06254 { 06255 gen_ffp_instr(&buffer, stage, TRUE, TRUE, settings->op[stage].dst, 06256 settings->op[stage].cop, settings->op[stage].carg0, 06257 settings->op[stage].carg1, settings->op[stage].carg2); 06258 } else { 06259 gen_ffp_instr(&buffer, stage, TRUE, FALSE, settings->op[stage].dst, 06260 settings->op[stage].cop, settings->op[stage].carg0, 06261 settings->op[stage].carg1, settings->op[stage].carg2); 06262 gen_ffp_instr(&buffer, stage, FALSE, TRUE, settings->op[stage].dst, 06263 settings->op[stage].aop, settings->op[stage].aarg0, 06264 settings->op[stage].aarg1, settings->op[stage].aarg2); 06265 } 06266 } 06267 06268 if(settings->sRGB_write) { 06269 shader_addline(&buffer, "MAD ret, fragment.color.secondary, specular_enable, %s;\n", final_combiner_src); 06270 arbfp_add_sRGB_correction(&buffer, "ret", "arg0", "arg1", "arg2", "tempreg", FALSE); 06271 shader_addline(&buffer, "MOV result.color, ret;\n"); 06272 } else { 06273 shader_addline(&buffer, "MAD result.color, fragment.color.secondary, specular_enable, %s;\n", final_combiner_src); 06274 } 06275 06276 /* Footer */ 06277 shader_addline(&buffer, "END\n"); 06278 06279 /* Generate the shader */ 06280 GL_EXTCALL(glGenProgramsARB(1, &ret)); 06281 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, ret)); 06282 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, 06283 strlen(buffer.buffer), buffer.buffer)); 06284 checkGLcall("glProgramStringARB()"); 06285 06286 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); 06287 if (pos != -1) 06288 { 06289 FIXME("Fragment program error at position %d: %s\n\n", pos, 06290 debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 06291 shader_arb_dump_program_source(buffer.buffer); 06292 } 06293 else 06294 { 06295 GLint native; 06296 06297 GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 06298 checkGLcall("glGetProgramivARB()"); 06299 if (!native) WARN("Program exceeds native resource limits.\n"); 06300 } 06301 06302 shader_buffer_free(&buffer); 06303 return ret; 06304 } 06305 06306 static void fragment_prog_arbfp(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 06307 { 06308 const struct wined3d_device *device = context->swapchain->device; 06309 const struct wined3d_gl_info *gl_info = context->gl_info; 06310 struct shader_arb_priv *priv = device->fragment_priv; 06311 BOOL use_vshader = use_vs(state); 06312 BOOL use_pshader = use_ps(state); 06313 struct ffp_frag_settings settings; 06314 const struct arbfp_ffp_desc *desc; 06315 unsigned int i; 06316 06317 TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); 06318 06319 if (isStateDirty(context, STATE_RENDER(WINED3D_RS_FOGENABLE))) 06320 { 06321 if (!use_pshader && device->shader_backend == &arb_program_shader_backend && context->last_was_pshader) 06322 { 06323 /* Reload fixed function constants since they collide with the 06324 * pixel shader constants. */ 06325 for (i = 0; i < MAX_TEXTURES; ++i) 06326 { 06327 set_bumpmat_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_BUMPENV_MAT00)); 06328 } 06329 state_texfactor_arbfp(context, state, STATE_RENDER(WINED3D_RS_TEXTUREFACTOR)); 06330 state_arb_specularenable(context, state, STATE_RENDER(WINED3D_RS_SPECULARENABLE)); 06331 } 06332 else if (use_pshader && !isStateDirty(context, context->state_table[STATE_VSHADER].representative)) 06333 { 06334 device->shader_backend->shader_select(context, use_pshader, use_vshader); 06335 } 06336 return; 06337 } 06338 06339 if (!use_pshader) 06340 { 06341 /* Find or create a shader implementing the fixed function pipeline 06342 * settings, then activate it. */ 06343 gen_ffp_frag_op(device, state, &settings, FALSE); 06344 desc = (const struct arbfp_ffp_desc *)find_ffp_frag_shader(&priv->fragment_shaders, &settings); 06345 if(!desc) { 06346 struct arbfp_ffp_desc *new_desc = HeapAlloc(GetProcessHeap(), 0, sizeof(*new_desc)); 06347 if (!new_desc) 06348 { 06349 ERR("Out of memory\n"); 06350 return; 06351 } 06352 new_desc->num_textures_used = 0; 06353 for (i = 0; i < gl_info->limits.texture_stages; ++i) 06354 { 06355 if (settings.op[i].cop == WINED3D_TOP_DISABLE) 06356 break; 06357 new_desc->num_textures_used = i; 06358 } 06359 06360 memcpy(&new_desc->parent.settings, &settings, sizeof(settings)); 06361 new_desc->shader = gen_arbfp_ffp_shader(&settings, gl_info); 06362 add_ffp_frag_shader(&priv->fragment_shaders, &new_desc->parent); 06363 TRACE("Allocated fixed function replacement shader descriptor %p\n", new_desc); 06364 desc = new_desc; 06365 } 06366 06367 /* Now activate the replacement program. GL_FRAGMENT_PROGRAM_ARB is already active(however, note the 06368 * comment above the shader_select call below). If e.g. GLSL is active, the shader_select call will 06369 * deactivate it. 06370 */ 06371 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)); 06372 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, desc->shader)"); 06373 priv->current_fprogram_id = desc->shader; 06374 06375 if (device->shader_backend == &arb_program_shader_backend && context->last_was_pshader) 06376 { 06377 /* Reload fixed function constants since they collide with the 06378 * pixel shader constants. */ 06379 for (i = 0; i < MAX_TEXTURES; ++i) 06380 { 06381 set_bumpmat_arbfp(context, state, STATE_TEXTURESTAGE(i, WINED3D_TSS_BUMPENV_MAT00)); 06382 } 06383 state_texfactor_arbfp(context, state, STATE_RENDER(WINED3D_RS_TEXTUREFACTOR)); 06384 state_arb_specularenable(context, state, STATE_RENDER(WINED3D_RS_SPECULARENABLE)); 06385 } 06386 context->last_was_pshader = FALSE; 06387 } else { 06388 context->last_was_pshader = TRUE; 06389 } 06390 06391 /* Finally, select the shader. If a pixel shader is used, it will be set and enabled by the shader backend. 06392 * If this shader backend is arbfp(most likely), then it will simply overwrite the last fixed function replace- 06393 * ment shader. If the shader backend is not ARB, it currently is important that the opengl implementation 06394 * type overwrites GL_ARB_fragment_program. This is currently the case with GLSL. If we really want to use 06395 * atifs or nvrc pixel shaders with arb fragment programs we'd have to disable GL_FRAGMENT_PROGRAM_ARB here 06396 * 06397 * Don't call shader_select if the vertex shader is dirty, because it will be called later on by the vertex 06398 * shader handler 06399 */ 06400 if (!isStateDirty(context, context->state_table[STATE_VSHADER].representative)) 06401 { 06402 device->shader_backend->shader_select(context, use_pshader, use_vshader); 06403 06404 if (!isStateDirty(context, STATE_VERTEXSHADERCONSTANT) && (use_vshader || use_pshader)) 06405 context_apply_state(context, state, STATE_VERTEXSHADERCONSTANT); 06406 } 06407 if (use_pshader) 06408 context_apply_state(context, state, STATE_PIXELSHADERCONSTANT); 06409 } 06410 06411 /* We can't link the fog states to the fragment state directly since the 06412 * vertex pipeline links them to FOGENABLE. A different linking in different 06413 * pipeline parts can't be expressed in the combined state table, so we need 06414 * to handle that with a forwarding function. The other invisible side effect 06415 * is that changing the fog start and fog end (which links to FOGENABLE in 06416 * vertex) results in the fragment_prog_arbfp function being called because 06417 * FOGENABLE is dirty, which calls this function here. */ 06418 static void state_arbfp_fog(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 06419 { 06420 enum fogsource new_source; 06421 06422 TRACE("context %p, state %p, state_id %#x.\n", context, state, state_id); 06423 06424 if (!isStateDirty(context, STATE_PIXELSHADER)) 06425 fragment_prog_arbfp(context, state, state_id); 06426 06427 if (!state->render_states[WINED3D_RS_FOGENABLE]) 06428 return; 06429 06430 if (state->render_states[WINED3D_RS_FOGTABLEMODE] == WINED3D_FOG_NONE) 06431 { 06432 if (use_vs(state)) 06433 { 06434 new_source = FOGSOURCE_VS; 06435 } 06436 else 06437 { 06438 if (state->render_states[WINED3D_RS_FOGVERTEXMODE] == WINED3D_FOG_NONE || context->last_was_rhw) 06439 new_source = FOGSOURCE_COORD; 06440 else 06441 new_source = FOGSOURCE_FFP; 06442 } 06443 } 06444 else 06445 { 06446 new_source = FOGSOURCE_FFP; 06447 } 06448 06449 if (new_source != context->fog_source) 06450 { 06451 context->fog_source = new_source; 06452 state_fogstartend(context, state, STATE_RENDER(WINED3D_RS_FOGSTART)); 06453 } 06454 } 06455 06456 static void textransform(struct wined3d_context *context, const struct wined3d_state *state, DWORD state_id) 06457 { 06458 if (!isStateDirty(context, STATE_PIXELSHADER)) 06459 fragment_prog_arbfp(context, state, state_id); 06460 } 06461 06462 static const struct StateEntryTemplate arbfp_fragmentstate_template[] = 06463 { 06464 {STATE_RENDER(WINED3D_RS_TEXTUREFACTOR), { STATE_RENDER(WINED3D_RS_TEXTUREFACTOR), state_texfactor_arbfp }, WINED3D_GL_EXT_NONE }, 06465 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06466 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06467 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06468 {STATE_TEXTURESTAGE(0, WINED3D_TSS_COLOR_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06469 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06470 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06471 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06472 {STATE_TEXTURESTAGE(0, WINED3D_TSS_ALPHA_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06473 {STATE_TEXTURESTAGE(0, WINED3D_TSS_RESULT_ARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06474 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 06475 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06476 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06477 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06478 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 06479 {STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(0, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 06480 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06481 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06482 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06483 {STATE_TEXTURESTAGE(1, WINED3D_TSS_COLOR_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06484 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06485 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06486 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06487 {STATE_TEXTURESTAGE(1, WINED3D_TSS_ALPHA_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06488 {STATE_TEXTURESTAGE(1, WINED3D_TSS_RESULT_ARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06489 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 06490 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06491 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06492 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06493 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 06494 {STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(1, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 06495 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06496 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06497 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06498 {STATE_TEXTURESTAGE(2, WINED3D_TSS_COLOR_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06499 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06500 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06501 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06502 {STATE_TEXTURESTAGE(2, WINED3D_TSS_ALPHA_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06503 {STATE_TEXTURESTAGE(2, WINED3D_TSS_RESULT_ARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06504 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 06505 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06506 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06507 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06508 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 06509 {STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(2, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 06510 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06511 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06512 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06513 {STATE_TEXTURESTAGE(3, WINED3D_TSS_COLOR_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06514 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06515 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06516 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06517 {STATE_TEXTURESTAGE(3, WINED3D_TSS_ALPHA_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06518 {STATE_TEXTURESTAGE(3, WINED3D_TSS_RESULT_ARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06519 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 06520 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06521 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06522 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06523 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 06524 {STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(3, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 06525 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06526 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06527 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06528 {STATE_TEXTURESTAGE(4, WINED3D_TSS_COLOR_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06529 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06530 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06531 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06532 {STATE_TEXTURESTAGE(4, WINED3D_TSS_ALPHA_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06533 {STATE_TEXTURESTAGE(4, WINED3D_TSS_RESULT_ARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06534 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 06535 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06536 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06537 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06538 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 06539 {STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(4, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 06540 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06541 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06542 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06543 {STATE_TEXTURESTAGE(5, WINED3D_TSS_COLOR_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06544 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06545 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06546 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06547 {STATE_TEXTURESTAGE(5, WINED3D_TSS_ALPHA_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06548 {STATE_TEXTURESTAGE(5, WINED3D_TSS_RESULT_ARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06549 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 06550 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06551 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06552 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06553 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 06554 {STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(5, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 06555 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06556 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06557 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06558 {STATE_TEXTURESTAGE(6, WINED3D_TSS_COLOR_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06559 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06560 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06561 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06562 {STATE_TEXTURESTAGE(6, WINED3D_TSS_ALPHA_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06563 {STATE_TEXTURESTAGE(6, WINED3D_TSS_RESULT_ARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06564 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 06565 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06566 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06567 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06568 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 06569 {STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(6, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 06570 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06571 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06572 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06573 {STATE_TEXTURESTAGE(7, WINED3D_TSS_COLOR_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06574 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_OP), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06575 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG1), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06576 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG2), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06577 {STATE_TEXTURESTAGE(7, WINED3D_TSS_ALPHA_ARG0), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06578 {STATE_TEXTURESTAGE(7, WINED3D_TSS_RESULT_ARG), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06579 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), set_bumpmat_arbfp }, WINED3D_GL_EXT_NONE }, 06580 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT01), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06581 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT10), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06582 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT11), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_MAT00), NULL }, WINED3D_GL_EXT_NONE }, 06583 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE), tex_bumpenvlum_arbfp }, WINED3D_GL_EXT_NONE }, 06584 {STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LOFFSET), { STATE_TEXTURESTAGE(7, WINED3D_TSS_BUMPENV_LSCALE), NULL }, WINED3D_GL_EXT_NONE }, 06585 {STATE_PIXELSHADER, { STATE_PIXELSHADER, fragment_prog_arbfp }, WINED3D_GL_EXT_NONE }, 06586 {STATE_RENDER(WINED3D_RS_FOGENABLE), { STATE_RENDER(WINED3D_RS_FOGENABLE), state_arbfp_fog }, WINED3D_GL_EXT_NONE }, 06587 {STATE_RENDER(WINED3D_RS_FOGTABLEMODE), { STATE_RENDER(WINED3D_RS_FOGENABLE), NULL }, WINED3D_GL_EXT_NONE }, 06588 {STATE_RENDER(WINED3D_RS_FOGVERTEXMODE), { STATE_RENDER(WINED3D_RS_FOGENABLE), NULL }, WINED3D_GL_EXT_NONE }, 06589 {STATE_RENDER(WINED3D_RS_FOGSTART), { STATE_RENDER(WINED3D_RS_FOGSTART), state_fogstartend }, WINED3D_GL_EXT_NONE }, 06590 {STATE_RENDER(WINED3D_RS_FOGEND), { STATE_RENDER(WINED3D_RS_FOGSTART), NULL }, WINED3D_GL_EXT_NONE }, 06591 {STATE_RENDER(WINED3D_RS_SRGBWRITEENABLE), { STATE_PIXELSHADER, NULL }, WINED3D_GL_EXT_NONE }, 06592 {STATE_RENDER(WINED3D_RS_FOGCOLOR), { STATE_RENDER(WINED3D_RS_FOGCOLOR), state_fogcolor }, WINED3D_GL_EXT_NONE }, 06593 {STATE_RENDER(WINED3D_RS_FOGDENSITY), { STATE_RENDER(WINED3D_RS_FOGDENSITY), state_fogdensity }, WINED3D_GL_EXT_NONE }, 06594 {STATE_TEXTURESTAGE(0,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(0, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform }, WINED3D_GL_EXT_NONE }, 06595 {STATE_TEXTURESTAGE(1,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(1, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform }, WINED3D_GL_EXT_NONE }, 06596 {STATE_TEXTURESTAGE(2,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(2, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform }, WINED3D_GL_EXT_NONE }, 06597 {STATE_TEXTURESTAGE(3,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(3, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform }, WINED3D_GL_EXT_NONE }, 06598 {STATE_TEXTURESTAGE(4,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(4, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform }, WINED3D_GL_EXT_NONE }, 06599 {STATE_TEXTURESTAGE(5,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(5, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform }, WINED3D_GL_EXT_NONE }, 06600 {STATE_TEXTURESTAGE(6,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(6, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform }, WINED3D_GL_EXT_NONE }, 06601 {STATE_TEXTURESTAGE(7,WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), {STATE_TEXTURESTAGE(7, WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS), textransform }, WINED3D_GL_EXT_NONE }, 06602 {STATE_RENDER(WINED3D_RS_SPECULARENABLE), { STATE_RENDER(WINED3D_RS_SPECULARENABLE), state_arb_specularenable}, WINED3D_GL_EXT_NONE }, 06603 {0 /* Terminate */, { 0, 0 }, WINED3D_GL_EXT_NONE }, 06604 }; 06605 06606 const struct fragment_pipeline arbfp_fragment_pipeline = { 06607 arbfp_enable, 06608 arbfp_get_caps, 06609 arbfp_alloc, 06610 arbfp_free, 06611 shader_arb_color_fixup_supported, 06612 arbfp_fragmentstate_template, 06613 TRUE /* We can disable projected textures */ 06614 }; 06615 06616 struct arbfp_blit_priv { 06617 GLenum yuy2_rect_shader, yuy2_2d_shader; 06618 GLenum uyvy_rect_shader, uyvy_2d_shader; 06619 GLenum yv12_rect_shader, yv12_2d_shader; 06620 GLenum p8_rect_shader, p8_2d_shader; 06621 GLuint palette_texture; 06622 }; 06623 06624 static HRESULT arbfp_blit_alloc(struct wined3d_device *device) 06625 { 06626 device->blit_priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct arbfp_blit_priv)); 06627 if(!device->blit_priv) { 06628 ERR("Out of memory\n"); 06629 return E_OUTOFMEMORY; 06630 } 06631 return WINED3D_OK; 06632 } 06633 06634 /* Context activation is done by the caller. */ 06635 static void arbfp_blit_free(struct wined3d_device *device) 06636 { 06637 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 06638 struct arbfp_blit_priv *priv = device->blit_priv; 06639 06640 ENTER_GL(); 06641 GL_EXTCALL(glDeleteProgramsARB(1, &priv->yuy2_rect_shader)); 06642 GL_EXTCALL(glDeleteProgramsARB(1, &priv->yuy2_2d_shader)); 06643 GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_rect_shader)); 06644 GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_2d_shader)); 06645 GL_EXTCALL(glDeleteProgramsARB(1, &priv->yv12_rect_shader)); 06646 GL_EXTCALL(glDeleteProgramsARB(1, &priv->yv12_2d_shader)); 06647 GL_EXTCALL(glDeleteProgramsARB(1, &priv->p8_rect_shader)); 06648 GL_EXTCALL(glDeleteProgramsARB(1, &priv->p8_2d_shader)); 06649 checkGLcall("Delete yuv and p8 programs"); 06650 06651 if(priv->palette_texture) glDeleteTextures(1, &priv->palette_texture); 06652 LEAVE_GL(); 06653 06654 HeapFree(GetProcessHeap(), 0, device->blit_priv); 06655 device->blit_priv = NULL; 06656 } 06657 06658 static BOOL gen_planar_yuv_read(struct wined3d_shader_buffer *buffer, enum complex_fixup fixup, 06659 GLenum textype, char *luminance) 06660 { 06661 char chroma; 06662 const char *tex, *texinstr; 06663 06664 if (fixup == COMPLEX_FIXUP_UYVY) { 06665 chroma = 'x'; 06666 *luminance = 'w'; 06667 } else { 06668 chroma = 'w'; 06669 *luminance = 'x'; 06670 } 06671 switch(textype) { 06672 case GL_TEXTURE_2D: tex = "2D"; texinstr = "TXP"; break; 06673 case GL_TEXTURE_RECTANGLE_ARB: tex = "RECT"; texinstr = "TEX"; break; 06674 default: 06675 /* This is more tricky than just replacing the texture type - we have to navigate 06676 * properly in the texture to find the correct chroma values 06677 */ 06678 FIXME("Implement yuv correction for non-2d, non-rect textures\n"); 06679 return FALSE; 06680 } 06681 06682 /* First we have to read the chroma values. This means we need at least two pixels(no filtering), 06683 * or 4 pixels(with filtering). To get the unmodified chromas, we have to rid ourselves of the 06684 * filtering when we sample the texture. 06685 * 06686 * These are the rules for reading the chroma: 06687 * 06688 * Even pixel: Cr 06689 * Even pixel: U 06690 * Odd pixel: V 06691 * 06692 * So we have to get the sampling x position in non-normalized coordinates in integers 06693 */ 06694 if(textype != GL_TEXTURE_RECTANGLE_ARB) { 06695 shader_addline(buffer, "MUL texcrd.xy, fragment.texcoord[0], size.x;\n"); 06696 shader_addline(buffer, "MOV texcrd.w, size.x;\n"); 06697 } else { 06698 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 06699 } 06700 /* We must not allow filtering between pixel x and x+1, this would mix U and V 06701 * Vertical filtering is ok. However, bear in mind that the pixel center is at 06702 * 0.5, so add 0.5. 06703 */ 06704 shader_addline(buffer, "FLR texcrd.x, texcrd.x;\n"); 06705 shader_addline(buffer, "ADD texcrd.x, texcrd.x, coef.y;\n"); 06706 06707 /* Divide the x coordinate by 0.5 and get the fraction. This gives 0.25 and 0.75 for the 06708 * even and odd pixels respectively 06709 */ 06710 shader_addline(buffer, "MUL texcrd2, texcrd, coef.y;\n"); 06711 shader_addline(buffer, "FRC texcrd2, texcrd2;\n"); 06712 06713 /* Sample Pixel 1 */ 06714 shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex); 06715 06716 /* Put the value into either of the chroma values */ 06717 shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n"); 06718 shader_addline(buffer, "MUL chroma.x, luminance.%c, temp.x;\n", chroma); 06719 shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n"); 06720 shader_addline(buffer, "MUL chroma.y, luminance.%c, temp.x;\n", chroma); 06721 06722 /* Sample pixel 2. If we read an even pixel(SLT above returned 1), sample 06723 * the pixel right to the current one. Otherwise, sample the left pixel. 06724 * Bias and scale the SLT result to -1;1 and add it to the texcrd.x. 06725 */ 06726 shader_addline(buffer, "MAD temp.x, temp.x, coef.z, -coef.x;\n"); 06727 shader_addline(buffer, "ADD texcrd.x, texcrd, temp.x;\n"); 06728 shader_addline(buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex); 06729 06730 /* Put the value into the other chroma */ 06731 shader_addline(buffer, "SGE temp.x, texcrd2.x, coef.y;\n"); 06732 shader_addline(buffer, "MAD chroma.y, luminance.%c, temp.x, chroma.y;\n", chroma); 06733 shader_addline(buffer, "SLT temp.x, texcrd2.x, coef.y;\n"); 06734 shader_addline(buffer, "MAD chroma.x, luminance.%c, temp.x, chroma.x;\n", chroma); 06735 06736 /* TODO: If filtering is enabled, sample a 2nd pair of pixels left or right of 06737 * the current one and lerp the two U and V values 06738 */ 06739 06740 /* This gives the correctly filtered luminance value */ 06741 shader_addline(buffer, "TEX luminance, fragment.texcoord[0], texture[0], %s;\n", tex); 06742 06743 return TRUE; 06744 } 06745 06746 static BOOL gen_yv12_read(struct wined3d_shader_buffer *buffer, GLenum textype, char *luminance) 06747 { 06748 const char *tex; 06749 06750 switch(textype) { 06751 case GL_TEXTURE_2D: tex = "2D"; break; 06752 case GL_TEXTURE_RECTANGLE_ARB: tex = "RECT"; break; 06753 default: 06754 FIXME("Implement yv12 correction for non-2d, non-rect textures\n"); 06755 return FALSE; 06756 } 06757 06758 /* YV12 surfaces contain a WxH sized luminance plane, followed by a (W/2)x(H/2) 06759 * V and a (W/2)x(H/2) U plane, each with 8 bit per pixel. So the effective 06760 * bitdepth is 12 bits per pixel. Since the U and V planes have only half the 06761 * pitch of the luminance plane, the packing into the gl texture is a bit 06762 * unfortunate. If the whole texture is interpreted as luminance data it looks 06763 * approximately like this: 06764 * 06765 * +----------------------------------+---- 06766 * | | 06767 * | | 06768 * | | 06769 * | | 06770 * | | 2 06771 * | LUMINANCE | - 06772 * | | 3 06773 * | | 06774 * | | 06775 * | | 06776 * | | 06777 * +----------------+-----------------+---- 06778 * | | | 06779 * | U even rows | U odd rows | 06780 * | | | 1 06781 * +----------------+------------------ - 06782 * | | | 3 06783 * | V even rows | V odd rows | 06784 * | | | 06785 * +----------------+-----------------+---- 06786 * | | | 06787 * | 0.5 | 0.5 | 06788 * 06789 * So it appears as if there are 4 chroma images, but in fact the odd rows 06790 * in the chroma images are in the same row as the even ones. So its is 06791 * kinda tricky to read 06792 * 06793 * When reading from rectangle textures, keep in mind that the input y coordinates 06794 * go from 0 to d3d_height, whereas the opengl texture height is 1.5 * d3d_height 06795 */ 06796 shader_addline(buffer, "PARAM yv12_coef = {%f, %f, %f, %f};\n", 06797 2.0f / 3.0f, 1.0f / 6.0f, (2.0f / 3.0f) + (1.0f / 6.0f), 1.0f / 3.0f); 06798 06799 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 06800 /* the chroma planes have only half the width */ 06801 shader_addline(buffer, "MUL texcrd.x, texcrd.x, coef.y;\n"); 06802 06803 /* The first value is between 2/3 and 5/6th of the texture's height, so scale+bias 06804 * the coordinate. Also read the right side of the image when reading odd lines 06805 * 06806 * Don't forget to clamp the y values in into the range, otherwise we'll get filtering 06807 * bleeding 06808 */ 06809 if(textype == GL_TEXTURE_2D) { 06810 06811 shader_addline(buffer, "RCP chroma.w, size.y;\n"); 06812 06813 shader_addline(buffer, "MUL texcrd2.y, texcrd.y, size.y;\n"); 06814 06815 shader_addline(buffer, "FLR texcrd2.y, texcrd2.y;\n"); 06816 shader_addline(buffer, "MAD texcrd.y, texcrd.y, yv12_coef.y, yv12_coef.x;\n"); 06817 06818 /* Read odd lines from the right side(add size * 0.5 to the x coordinate */ 06819 shader_addline(buffer, "ADD texcrd2.x, texcrd2.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */ 06820 shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n"); 06821 shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n"); 06822 shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n"); 06823 06824 /* clamp, keep the half pixel origin in mind */ 06825 shader_addline(buffer, "MAD temp.y, coef.y, chroma.w, yv12_coef.x;\n"); 06826 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 06827 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.z;\n"); 06828 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 06829 } else { 06830 /* Read from [size - size+size/4] */ 06831 shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n"); 06832 shader_addline(buffer, "MAD texcrd.y, texcrd.y, coef.w, size.y;\n"); 06833 06834 /* Read odd lines from the right side(add size * 0.5 to the x coordinate */ 06835 shader_addline(buffer, "ADD texcrd2.x, texcrd.y, yv12_coef.y;\n"); /* To avoid 0.5 == 0.5 comparisons */ 06836 shader_addline(buffer, "FRC texcrd2.x, texcrd2.x;\n"); 06837 shader_addline(buffer, "SGE texcrd2.x, texcrd2.x, coef.y;\n"); 06838 shader_addline(buffer, "MUL texcrd2.x, texcrd2.x, size.x;\n"); 06839 shader_addline(buffer, "MAD texcrd.x, texcrd2.x, coef.y, texcrd.x;\n"); 06840 06841 /* Make sure to read exactly from the pixel center */ 06842 shader_addline(buffer, "FLR texcrd.y, texcrd.y;\n"); 06843 shader_addline(buffer, "ADD texcrd.y, texcrd.y, coef.y;\n"); 06844 06845 /* Clamp */ 06846 shader_addline(buffer, "MAD temp.y, size.y, coef.w, size.y;\n"); 06847 shader_addline(buffer, "ADD temp.y, temp.y, -coef.y;\n"); 06848 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 06849 shader_addline(buffer, "ADD temp.y, size.y, -coef.y;\n"); 06850 shader_addline(buffer, "MAX texcrd.y, temp.y, texcrd.y;\n"); 06851 } 06852 /* Read the texture, put the result into the output register */ 06853 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 06854 shader_addline(buffer, "MOV chroma.x, temp.w;\n"); 06855 06856 /* The other chroma value is 1/6th of the texture lower, from 5/6th to 6/6th 06857 * No need to clamp because we're just reusing the already clamped value from above 06858 */ 06859 if(textype == GL_TEXTURE_2D) { 06860 shader_addline(buffer, "ADD texcrd.y, texcrd.y, yv12_coef.y;\n"); 06861 } else { 06862 shader_addline(buffer, "MAD texcrd.y, size.y, coef.w, texcrd.y;\n"); 06863 } 06864 shader_addline(buffer, "TEX temp, texcrd, texture[0], %s;\n", tex); 06865 shader_addline(buffer, "MOV chroma.y, temp.w;\n"); 06866 06867 /* Sample the luminance value. It is in the top 2/3rd of the texture, so scale the y coordinate. 06868 * Clamp the y coordinate to prevent the chroma values from bleeding into the sampled luminance 06869 * values due to filtering 06870 */ 06871 shader_addline(buffer, "MOV texcrd, fragment.texcoord[0];\n"); 06872 if(textype == GL_TEXTURE_2D) { 06873 /* Multiply the y coordinate by 2/3 and clamp it */ 06874 shader_addline(buffer, "MUL texcrd.y, texcrd.y, yv12_coef.x;\n"); 06875 shader_addline(buffer, "MAD temp.y, -coef.y, chroma.w, yv12_coef.x;\n"); 06876 shader_addline(buffer, "MIN texcrd.y, temp.y, texcrd.y;\n"); 06877 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 06878 } else { 06879 /* Reading from texture_rectangles is pretty straightforward, just use the unmodified 06880 * texture coordinate. It is still a good idea to clamp it though, since the opengl texture 06881 * is bigger 06882 */ 06883 shader_addline(buffer, "ADD temp.x, size.y, -coef.y;\n"); 06884 shader_addline(buffer, "MIN texcrd.y, texcrd.y, size.x;\n"); 06885 shader_addline(buffer, "TEX luminance, texcrd, texture[0], %s;\n", tex); 06886 } 06887 *luminance = 'a'; 06888 06889 return TRUE; 06890 } 06891 06892 static GLuint gen_p8_shader(struct arbfp_blit_priv *priv, 06893 const struct wined3d_gl_info *gl_info, GLenum textype) 06894 { 06895 GLenum shader; 06896 struct wined3d_shader_buffer buffer; 06897 GLint pos; 06898 06899 /* Shader header */ 06900 if (!shader_buffer_init(&buffer)) 06901 { 06902 ERR("Failed to initialize shader buffer.\n"); 06903 return 0; 06904 } 06905 06906 ENTER_GL(); 06907 GL_EXTCALL(glGenProgramsARB(1, &shader)); 06908 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 06909 LEAVE_GL(); 06910 if(!shader) { 06911 shader_buffer_free(&buffer); 06912 return 0; 06913 } 06914 06915 shader_addline(&buffer, "!!ARBfp1.0\n"); 06916 shader_addline(&buffer, "TEMP index;\n"); 06917 06918 /* { 255/256, 0.5/255*255/256, 0, 0 } */ 06919 shader_addline(&buffer, "PARAM constants = { 0.996, 0.00195, 0, 0 };\n"); 06920 06921 /* The alpha-component contains the palette index */ 06922 if(textype == GL_TEXTURE_RECTANGLE_ARB) 06923 shader_addline(&buffer, "TXP index, fragment.texcoord[0], texture[0], RECT;\n"); 06924 else 06925 shader_addline(&buffer, "TEX index, fragment.texcoord[0], texture[0], 2D;\n"); 06926 06927 /* Scale the index by 255/256 and add a bias of '0.5' in order to sample in the middle */ 06928 shader_addline(&buffer, "MAD index.a, index.a, constants.x, constants.y;\n"); 06929 06930 /* Use the alpha-component as an index in the palette to get the final color */ 06931 shader_addline(&buffer, "TEX result.color, index.a, texture[1], 1D;\n"); 06932 shader_addline(&buffer, "END\n"); 06933 06934 ENTER_GL(); 06935 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, 06936 strlen(buffer.buffer), buffer.buffer)); 06937 checkGLcall("glProgramStringARB()"); 06938 06939 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); 06940 if (pos != -1) 06941 { 06942 FIXME("Fragment program error at position %d: %s\n\n", pos, 06943 debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 06944 shader_arb_dump_program_source(buffer.buffer); 06945 } 06946 06947 if (textype == GL_TEXTURE_RECTANGLE_ARB) 06948 priv->p8_rect_shader = shader; 06949 else 06950 priv->p8_2d_shader = shader; 06951 06952 shader_buffer_free(&buffer); 06953 LEAVE_GL(); 06954 06955 return shader; 06956 } 06957 06958 /* Context activation is done by the caller. */ 06959 static void upload_palette(const struct wined3d_surface *surface, struct wined3d_context *context) 06960 { 06961 BYTE table[256][4]; 06962 struct wined3d_device *device = surface->resource.device; 06963 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 06964 struct arbfp_blit_priv *priv = device->blit_priv; 06965 BOOL colorkey = (surface->CKeyFlags & WINEDDSD_CKSRCBLT) ? TRUE : FALSE; 06966 06967 d3dfmt_p8_init_palette(surface, table, colorkey); 06968 06969 ENTER_GL(); 06970 06971 if (gl_info->supported[APPLE_CLIENT_STORAGE]) 06972 { 06973 glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE); 06974 checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_FALSE)"); 06975 } 06976 06977 if (!priv->palette_texture) 06978 glGenTextures(1, &priv->palette_texture); 06979 06980 GL_EXTCALL(glActiveTextureARB(GL_TEXTURE1)); 06981 glBindTexture(GL_TEXTURE_1D, priv->palette_texture); 06982 06983 glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_REPLACE); 06984 06985 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); 06986 /* Make sure we have discrete color levels. */ 06987 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); 06988 glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); 06989 /* Upload the palette */ 06990 /* TODO: avoid unneeded uploads in the future by adding some SFLAG_PALETTE_DIRTY mechanism */ 06991 glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, 256, 0, GL_RGBA, GL_UNSIGNED_BYTE, table); 06992 06993 if (gl_info->supported[APPLE_CLIENT_STORAGE]) 06994 { 06995 glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE); 06996 checkGLcall("glPixelStorei(GL_UNPACK_CLIENT_STORAGE_APPLE, GL_TRUE)"); 06997 } 06998 06999 /* Switch back to unit 0 in which the 2D texture will be stored. */ 07000 context_active_texture(context, gl_info, 0); 07001 LEAVE_GL(); 07002 } 07003 07004 /* Context activation is done by the caller. */ 07005 static GLuint gen_yuv_shader(struct arbfp_blit_priv *priv, const struct wined3d_gl_info *gl_info, 07006 enum complex_fixup yuv_fixup, GLenum textype) 07007 { 07008 GLenum shader; 07009 struct wined3d_shader_buffer buffer; 07010 char luminance_component; 07011 GLint pos; 07012 07013 /* Shader header */ 07014 if (!shader_buffer_init(&buffer)) 07015 { 07016 ERR("Failed to initialize shader buffer.\n"); 07017 return 0; 07018 } 07019 07020 ENTER_GL(); 07021 GL_EXTCALL(glGenProgramsARB(1, &shader)); 07022 checkGLcall("GL_EXTCALL(glGenProgramsARB(1, &shader))"); 07023 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 07024 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)"); 07025 LEAVE_GL(); 07026 if(!shader) { 07027 shader_buffer_free(&buffer); 07028 return 0; 07029 } 07030 07031 /* The YUY2 and UYVY formats contain two pixels packed into a 32 bit macropixel, 07032 * giving effectively 16 bit per pixel. The color consists of a luminance(Y) and 07033 * two chroma(U and V) values. Each macropixel has two luminance values, one for 07034 * each single pixel it contains, and one U and one V value shared between both 07035 * pixels. 07036 * 07037 * The data is loaded into an A8L8 texture. With YUY2, the luminance component 07038 * contains the luminance and alpha the chroma. With UYVY it is vice versa. Thus 07039 * take the format into account when generating the read swizzles 07040 * 07041 * Reading the Y value is straightforward - just sample the texture. The hardware 07042 * takes care of filtering in the horizontal and vertical direction. 07043 * 07044 * Reading the U and V values is harder. We have to avoid filtering horizontally, 07045 * because that would mix the U and V values of one pixel or two adjacent pixels. 07046 * Thus floor the texture coordinate and add 0.5 to get an unfiltered read, 07047 * regardless of the filtering setting. Vertical filtering works automatically 07048 * though - the U and V values of two rows are mixed nicely. 07049 * 07050 * Appart of avoiding filtering issues, the code has to know which value it just 07051 * read, and where it can find the other one. To determine this, it checks if 07052 * it sampled an even or odd pixel, and shifts the 2nd read accordingly. 07053 * 07054 * Handling horizontal filtering of U and V values requires reading a 2nd pair 07055 * of pixels, extracting U and V and mixing them. This is not implemented yet. 07056 * 07057 * An alternative implementation idea is to load the texture as A8R8G8B8 texture, 07058 * with width / 2. This way one read gives all 3 values, finding U and V is easy 07059 * in an unfiltered situation. Finding the luminance on the other hand requires 07060 * finding out if it is an odd or even pixel. The real drawback of this approach 07061 * is filtering. This would have to be emulated completely in the shader, reading 07062 * up two 2 packed pixels in up to 2 rows and interpolating both horizontally and 07063 * vertically. Beyond that it would require adjustments to the texture handling 07064 * code to deal with the width scaling 07065 */ 07066 shader_addline(&buffer, "!!ARBfp1.0\n"); 07067 shader_addline(&buffer, "TEMP luminance;\n"); 07068 shader_addline(&buffer, "TEMP temp;\n"); 07069 shader_addline(&buffer, "TEMP chroma;\n"); 07070 shader_addline(&buffer, "TEMP texcrd;\n"); 07071 shader_addline(&buffer, "TEMP texcrd2;\n"); 07072 shader_addline(&buffer, "PARAM coef = {1.0, 0.5, 2.0, 0.25};\n"); 07073 shader_addline(&buffer, "PARAM yuv_coef = {1.403, 0.344, 0.714, 1.770};\n"); 07074 shader_addline(&buffer, "PARAM size = program.local[0];\n"); 07075 07076 switch (yuv_fixup) 07077 { 07078 case COMPLEX_FIXUP_UYVY: 07079 case COMPLEX_FIXUP_YUY2: 07080 if (!gen_planar_yuv_read(&buffer, yuv_fixup, textype, &luminance_component)) 07081 { 07082 shader_buffer_free(&buffer); 07083 return 0; 07084 } 07085 break; 07086 07087 case COMPLEX_FIXUP_YV12: 07088 if (!gen_yv12_read(&buffer, textype, &luminance_component)) 07089 { 07090 shader_buffer_free(&buffer); 07091 return 0; 07092 } 07093 break; 07094 07095 default: 07096 FIXME("Unsupported YUV fixup %#x\n", yuv_fixup); 07097 shader_buffer_free(&buffer); 07098 return 0; 07099 } 07100 07101 /* Calculate the final result. Formula is taken from 07102 * http://www.fourcc.org/fccyvrgb.php. Note that the chroma 07103 * ranges from -0.5 to 0.5 07104 */ 07105 shader_addline(&buffer, "SUB chroma.xy, chroma, coef.y;\n"); 07106 07107 shader_addline(&buffer, "MAD result.color.x, chroma.x, yuv_coef.x, luminance.%c;\n", luminance_component); 07108 shader_addline(&buffer, "MAD temp.x, -chroma.y, yuv_coef.y, luminance.%c;\n", luminance_component); 07109 shader_addline(&buffer, "MAD result.color.y, -chroma.x, yuv_coef.z, temp.x;\n"); 07110 shader_addline(&buffer, "MAD result.color.z, chroma.y, yuv_coef.w, luminance.%c;\n", luminance_component); 07111 shader_addline(&buffer, "END\n"); 07112 07113 ENTER_GL(); 07114 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, 07115 strlen(buffer.buffer), buffer.buffer)); 07116 checkGLcall("glProgramStringARB()"); 07117 07118 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos); 07119 if (pos != -1) 07120 { 07121 FIXME("Fragment program error at position %d: %s\n\n", pos, 07122 debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB))); 07123 shader_arb_dump_program_source(buffer.buffer); 07124 } 07125 else 07126 { 07127 GLint native; 07128 07129 GL_EXTCALL(glGetProgramivARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native)); 07130 checkGLcall("glGetProgramivARB()"); 07131 if (!native) WARN("Program exceeds native resource limits.\n"); 07132 } 07133 07134 shader_buffer_free(&buffer); 07135 LEAVE_GL(); 07136 07137 switch (yuv_fixup) 07138 { 07139 case COMPLEX_FIXUP_YUY2: 07140 if (textype == GL_TEXTURE_RECTANGLE_ARB) priv->yuy2_rect_shader = shader; 07141 else priv->yuy2_2d_shader = shader; 07142 break; 07143 07144 case COMPLEX_FIXUP_UYVY: 07145 if (textype == GL_TEXTURE_RECTANGLE_ARB) priv->uyvy_rect_shader = shader; 07146 else priv->uyvy_2d_shader = shader; 07147 break; 07148 07149 case COMPLEX_FIXUP_YV12: 07150 if (textype == GL_TEXTURE_RECTANGLE_ARB) priv->yv12_rect_shader = shader; 07151 else priv->yv12_2d_shader = shader; 07152 break; 07153 default: 07154 ERR("Unsupported complex fixup: %d\n", yuv_fixup); 07155 } 07156 07157 return shader; 07158 } 07159 07160 /* Context activation is done by the caller. */ 07161 static HRESULT arbfp_blit_set(void *blit_priv, struct wined3d_context *context, const struct wined3d_surface *surface) 07162 { 07163 GLenum shader; 07164 float size[4] = {(float) surface->pow2Width, (float) surface->pow2Height, 1.0f, 1.0f}; 07165 struct arbfp_blit_priv *priv = blit_priv; 07166 enum complex_fixup fixup; 07167 GLenum textype = surface->texture_target; 07168 const struct wined3d_gl_info *gl_info = context->gl_info; 07169 07170 if (surface->flags & SFLAG_CONVERTED) 07171 { 07172 ENTER_GL(); 07173 glEnable(textype); 07174 checkGLcall("glEnable(textype)"); 07175 LEAVE_GL(); 07176 return WINED3D_OK; 07177 } 07178 07179 if (!is_complex_fixup(surface->resource.format->color_fixup)) 07180 { 07181 TRACE("Fixup:\n"); 07182 dump_color_fixup_desc(surface->resource.format->color_fixup); 07183 /* Don't bother setting up a shader for unconverted formats */ 07184 ENTER_GL(); 07185 glEnable(textype); 07186 checkGLcall("glEnable(textype)"); 07187 LEAVE_GL(); 07188 return WINED3D_OK; 07189 } 07190 07191 fixup = get_complex_fixup(surface->resource.format->color_fixup); 07192 07193 switch(fixup) 07194 { 07195 case COMPLEX_FIXUP_YUY2: 07196 shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->yuy2_rect_shader : priv->yuy2_2d_shader; 07197 break; 07198 07199 case COMPLEX_FIXUP_UYVY: 07200 shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->uyvy_rect_shader : priv->uyvy_2d_shader; 07201 break; 07202 07203 case COMPLEX_FIXUP_YV12: 07204 shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->yv12_rect_shader : priv->yv12_2d_shader; 07205 break; 07206 07207 case COMPLEX_FIXUP_P8: 07208 shader = textype == GL_TEXTURE_RECTANGLE_ARB ? priv->p8_rect_shader : priv->p8_2d_shader; 07209 if (!shader) shader = gen_p8_shader(priv, gl_info, textype); 07210 07211 upload_palette(surface, context); 07212 break; 07213 07214 default: 07215 FIXME("Unsupported complex fixup %#x, not setting a shader\n", fixup); 07216 ENTER_GL(); 07217 glEnable(textype); 07218 checkGLcall("glEnable(textype)"); 07219 LEAVE_GL(); 07220 return E_NOTIMPL; 07221 } 07222 07223 if (!shader) shader = gen_yuv_shader(priv, gl_info, fixup, textype); 07224 07225 ENTER_GL(); 07226 glEnable(GL_FRAGMENT_PROGRAM_ARB); 07227 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)"); 07228 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)); 07229 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)"); 07230 GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0, size)); 07231 checkGLcall("glProgramLocalParameter4fvARB"); 07232 LEAVE_GL(); 07233 07234 return WINED3D_OK; 07235 } 07236 07237 /* Context activation is done by the caller. */ 07238 static void arbfp_blit_unset(const struct wined3d_gl_info *gl_info) 07239 { 07240 ENTER_GL(); 07241 glDisable(GL_FRAGMENT_PROGRAM_ARB); 07242 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)"); 07243 glDisable(GL_TEXTURE_2D); 07244 checkGLcall("glDisable(GL_TEXTURE_2D)"); 07245 if (gl_info->supported[ARB_TEXTURE_CUBE_MAP]) 07246 { 07247 glDisable(GL_TEXTURE_CUBE_MAP_ARB); 07248 checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)"); 07249 } 07250 if (gl_info->supported[ARB_TEXTURE_RECTANGLE]) 07251 { 07252 glDisable(GL_TEXTURE_RECTANGLE_ARB); 07253 checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)"); 07254 } 07255 LEAVE_GL(); 07256 } 07257 07258 static BOOL arbfp_blit_supported(const struct wined3d_gl_info *gl_info, enum wined3d_blit_op blit_op, 07259 const RECT *src_rect, DWORD src_usage, enum wined3d_pool src_pool, const struct wined3d_format *src_format, 07260 const RECT *dst_rect, DWORD dst_usage, enum wined3d_pool dst_pool, const struct wined3d_format *dst_format) 07261 { 07262 enum complex_fixup src_fixup; 07263 07264 if (!gl_info->supported[ARB_FRAGMENT_PROGRAM]) 07265 return FALSE; 07266 07267 if (blit_op != WINED3D_BLIT_OP_COLOR_BLIT) 07268 { 07269 TRACE("Unsupported blit_op=%d\n", blit_op); 07270 return FALSE; 07271 } 07272 07273 if (src_pool == WINED3D_POOL_SYSTEM_MEM || dst_pool == WINED3D_POOL_SYSTEM_MEM) 07274 return FALSE; 07275 07276 src_fixup = get_complex_fixup(src_format->color_fixup); 07277 if (TRACE_ON(d3d_shader) && TRACE_ON(d3d)) 07278 { 07279 TRACE("Checking support for fixup:\n"); 07280 dump_color_fixup_desc(src_format->color_fixup); 07281 } 07282 07283 if (!is_identity_fixup(dst_format->color_fixup)) 07284 { 07285 TRACE("Destination fixups are not supported\n"); 07286 return FALSE; 07287 } 07288 07289 if (is_identity_fixup(src_format->color_fixup)) 07290 { 07291 TRACE("[OK]\n"); 07292 return TRUE; 07293 } 07294 07295 /* We only support YUV conversions. */ 07296 if (!is_complex_fixup(src_format->color_fixup)) 07297 { 07298 TRACE("[FAILED]\n"); 07299 return FALSE; 07300 } 07301 07302 switch(src_fixup) 07303 { 07304 case COMPLEX_FIXUP_YUY2: 07305 case COMPLEX_FIXUP_UYVY: 07306 case COMPLEX_FIXUP_YV12: 07307 case COMPLEX_FIXUP_P8: 07308 TRACE("[OK]\n"); 07309 return TRUE; 07310 07311 default: 07312 FIXME("Unsupported YUV fixup %#x\n", src_fixup); 07313 TRACE("[FAILED]\n"); 07314 return FALSE; 07315 } 07316 } 07317 07318 HRESULT arbfp_blit_surface(struct wined3d_device *device, DWORD filter, 07319 struct wined3d_surface *src_surface, const RECT *src_rect_in, 07320 struct wined3d_surface *dst_surface, const RECT *dst_rect_in) 07321 { 07322 struct wined3d_context *context; 07323 RECT src_rect = *src_rect_in; 07324 RECT dst_rect = *dst_rect_in; 07325 07326 /* Now load the surface */ 07327 if (wined3d_settings.offscreen_rendering_mode != ORM_FBO 07328 && (src_surface->flags & (SFLAG_INTEXTURE | SFLAG_INDRAWABLE)) == SFLAG_INDRAWABLE) 07329 { 07330 /* Without FBO blits transferring from the drawable to the texture is 07331 * expensive, because we have to flip the data in sysmem. Since we can 07332 * flip in the blitter, we don't actually need that flip anyway. So we 07333 * use the surface's texture as scratch texture, and flip the source 07334 * rectangle instead. */ 07335 surface_load_fb_texture(src_surface, FALSE); 07336 07337 src_rect.top = src_surface->resource.height - src_rect.top; 07338 src_rect.bottom = src_surface->resource.height - src_rect.bottom; 07339 } 07340 else 07341 surface_internal_preload(src_surface, SRGB_RGB); 07342 07343 /* Activate the destination context, set it up for blitting */ 07344 context = context_acquire(device, dst_surface); 07345 context_apply_blit_state(context, device); 07346 07347 if (!surface_is_offscreen(dst_surface)) 07348 surface_translate_drawable_coords(dst_surface, context->win_handle, &dst_rect); 07349 07350 arbfp_blit_set(device->blit_priv, context, src_surface); 07351 07352 ENTER_GL(); 07353 07354 /* Draw a textured quad */ 07355 draw_textured_quad(src_surface, context, &src_rect, &dst_rect, filter); 07356 07357 LEAVE_GL(); 07358 07359 /* Leave the opengl state valid for blitting */ 07360 arbfp_blit_unset(context->gl_info); 07361 07362 if (wined3d_settings.strict_draw_ordering 07363 || (dst_surface->container.type == WINED3D_CONTAINER_SWAPCHAIN 07364 && (dst_surface->container.u.swapchain->front_buffer == dst_surface))) 07365 wglFlush(); /* Flush to ensure ordering across contexts. */ 07366 07367 context_release(context); 07368 07369 surface_modify_location(dst_surface, dst_surface->draw_binding, TRUE); 07370 return WINED3D_OK; 07371 } 07372 07373 /* Do not call while under the GL lock. */ 07374 static HRESULT arbfp_blit_color_fill(struct wined3d_device *device, struct wined3d_surface *dst_surface, 07375 const RECT *dst_rect, const struct wined3d_color *color) 07376 { 07377 FIXME("Color filling not implemented by arbfp_blit\n"); 07378 return WINED3DERR_INVALIDCALL; 07379 } 07380 07381 /* Do not call while under the GL lock. */ 07382 static HRESULT arbfp_blit_depth_fill(struct wined3d_device *device, 07383 struct wined3d_surface *surface, const RECT *rect, float depth) 07384 { 07385 FIXME("Depth filling not implemented by arbfp_blit.\n"); 07386 return WINED3DERR_INVALIDCALL; 07387 } 07388 07389 const struct blit_shader arbfp_blit = { 07390 arbfp_blit_alloc, 07391 arbfp_blit_free, 07392 arbfp_blit_set, 07393 arbfp_blit_unset, 07394 arbfp_blit_supported, 07395 arbfp_blit_color_fill, 07396 arbfp_blit_depth_fill, 07397 }; Generated on Fri May 25 2012 04:20:12 for ReactOS by
1.7.6.1
|