Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenshader.c
Go to the documentation of this file.
00001 /* 00002 * Copyright 2002-2003 Jason Edmeades 00003 * Copyright 2002-2003 Raphael Junqueira 00004 * Copyright 2004 Christian Costa 00005 * Copyright 2005 Oliver Stieber 00006 * Copyright 2006 Ivan Gyurdiev 00007 * Copyright 2007-2008 Stefan Dösinger for CodeWeavers 00008 * Copyright 2009-2011 Henri Verbeet for CodeWeavers 00009 * 00010 * This library is free software; you can redistribute it and/or 00011 * modify it under the terms of the GNU Lesser General Public 00012 * License as published by the Free Software Foundation; either 00013 * version 2.1 of the License, or (at your option) any later version. 00014 * 00015 * This library is distributed in the hope that it will be useful, 00016 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00017 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00018 * Lesser General Public License for more details. 00019 * 00020 * You should have received a copy of the GNU Lesser General Public 00021 * License along with this library; if not, write to the Free Software 00022 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 00023 */ 00024 00025 #include "config.h" 00026 00027 #include <math.h> 00028 #include <stdio.h> 00029 #include <string.h> 00030 00031 #include "wined3d_private.h" 00032 00033 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader); 00034 WINE_DECLARE_DEBUG_CHANNEL(d3d); 00035 00036 static const char * const shader_opcode_names[] = 00037 { 00038 /* WINED3DSIH_ABS */ "abs", 00039 /* WINED3DSIH_ADD */ "add", 00040 /* WINED3DSIH_AND */ "and", 00041 /* WINED3DSIH_BEM */ "bem", 00042 /* WINED3DSIH_BREAK */ "break", 00043 /* WINED3DSIH_BREAKC */ "breakc", 00044 /* WINED3DSIH_BREAKP */ "breakp", 00045 /* WINED3DSIH_CALL */ "call", 00046 /* WINED3DSIH_CALLNZ */ "callnz", 00047 /* WINED3DSIH_CMP */ "cmp", 00048 /* WINED3DSIH_CND */ "cnd", 00049 /* WINED3DSIH_CRS */ "crs", 00050 /* WINED3DSIH_CUT */ "cut", 00051 /* WINED3DSIH_DCL */ "dcl", 00052 /* WINED3DSIH_DEF */ "def", 00053 /* WINED3DSIH_DEFB */ "defb", 00054 /* WINED3DSIH_DEFI */ "defi", 00055 /* WINED3DSIH_DIV */ "div", 00056 /* WINED3DSIH_DP2ADD */ "dp2add", 00057 /* WINED3DSIH_DP3 */ "dp3", 00058 /* WINED3DSIH_DP4 */ "dp4", 00059 /* WINED3DSIH_DST */ "dst", 00060 /* WINED3DSIH_DSX */ "dsx", 00061 /* WINED3DSIH_DSY */ "dsy", 00062 /* WINED3DSIH_ELSE */ "else", 00063 /* WINED3DSIH_EMIT */ "emit", 00064 /* WINED3DSIH_ENDIF */ "endif", 00065 /* WINED3DSIH_ENDLOOP */ "endloop", 00066 /* WINED3DSIH_ENDREP */ "endrep", 00067 /* WINED3DSIH_EQ */ "eq", 00068 /* WINED3DSIH_EXP */ "exp", 00069 /* WINED3DSIH_EXPP */ "expp", 00070 /* WINED3DSIH_FRC */ "frc", 00071 /* WINED3DSIH_FTOI */ "ftoi", 00072 /* WINED3DSIH_GE */ "ge", 00073 /* WINED3DSIH_IADD */ "iadd", 00074 /* WINED3DSIH_IEQ */ "ieq", 00075 /* WINED3DSIH_IF */ "if", 00076 /* WINED3DSIH_IFC */ "ifc", 00077 /* WINED3DSIH_IGE */ "ige", 00078 /* WINED3DSIH_IMUL */ "imul", 00079 /* WINED3DSIH_ITOF */ "itof", 00080 /* WINED3DSIH_LABEL */ "label", 00081 /* WINED3DSIH_LD */ "ld", 00082 /* WINED3DSIH_LIT */ "lit", 00083 /* WINED3DSIH_LOG */ "log", 00084 /* WINED3DSIH_LOGP */ "logp", 00085 /* WINED3DSIH_LOOP */ "loop", 00086 /* WINED3DSIH_LRP */ "lrp", 00087 /* WINED3DSIH_LT */ "lt", 00088 /* WINED3DSIH_M3x2 */ "m3x2", 00089 /* WINED3DSIH_M3x3 */ "m3x3", 00090 /* WINED3DSIH_M3x4 */ "m3x4", 00091 /* WINED3DSIH_M4x3 */ "m4x3", 00092 /* WINED3DSIH_M4x4 */ "m4x4", 00093 /* WINED3DSIH_MAD */ "mad", 00094 /* WINED3DSIH_MAX */ "max", 00095 /* WINED3DSIH_MIN */ "min", 00096 /* WINED3DSIH_MOV */ "mov", 00097 /* WINED3DSIH_MOVA */ "mova", 00098 /* WINED3DSIH_MOVC */ "movc", 00099 /* WINED3DSIH_MUL */ "mul", 00100 /* WINED3DSIH_NOP */ "nop", 00101 /* WINED3DSIH_NRM */ "nrm", 00102 /* WINED3DSIH_PHASE */ "phase", 00103 /* WINED3DSIH_POW */ "pow", 00104 /* WINED3DSIH_RCP */ "rcp", 00105 /* WINED3DSIH_REP */ "rep", 00106 /* WINED3DSIH_RET */ "ret", 00107 /* WINED3DSIH_ROUND_NI */ "round_ni", 00108 /* WINED3DSIH_RSQ */ "rsq", 00109 /* WINED3DSIH_SAMPLE */ "sample", 00110 /* WINED3DSIH_SAMPLE_GRAD */ "sample_d", 00111 /* WINED3DSIH_SAMPLE_LOD */ "sample_l", 00112 /* WINED3DSIH_SETP */ "setp", 00113 /* WINED3DSIH_SGE */ "sge", 00114 /* WINED3DSIH_SGN */ "sgn", 00115 /* WINED3DSIH_SINCOS */ "sincos", 00116 /* WINED3DSIH_SLT */ "slt", 00117 /* WINED3DSIH_SQRT */ "sqrt", 00118 /* WINED3DSIH_SUB */ "sub", 00119 /* WINED3DSIH_TEX */ "texld", 00120 /* WINED3DSIH_TEXBEM */ "texbem", 00121 /* WINED3DSIH_TEXBEML */ "texbeml", 00122 /* WINED3DSIH_TEXCOORD */ "texcrd", 00123 /* WINED3DSIH_TEXDEPTH */ "texdepth", 00124 /* WINED3DSIH_TEXDP3 */ "texdp3", 00125 /* WINED3DSIH_TEXDP3TEX */ "texdp3tex", 00126 /* WINED3DSIH_TEXKILL */ "texkill", 00127 /* WINED3DSIH_TEXLDD */ "texldd", 00128 /* WINED3DSIH_TEXLDL */ "texldl", 00129 /* WINED3DSIH_TEXM3x2DEPTH */ "texm3x2depth", 00130 /* WINED3DSIH_TEXM3x2PAD */ "texm3x2pad", 00131 /* WINED3DSIH_TEXM3x2TEX */ "texm3x2tex", 00132 /* WINED3DSIH_TEXM3x3 */ "texm3x3", 00133 /* WINED3DSIH_TEXM3x3DIFF */ "texm3x3diff", 00134 /* WINED3DSIH_TEXM3x3PAD */ "texm3x3pad", 00135 /* WINED3DSIH_TEXM3x3SPEC */ "texm3x3spec", 00136 /* WINED3DSIH_TEXM3x3TEX */ "texm3x3tex", 00137 /* WINED3DSIH_TEXM3x3VSPEC */ "texm3x3vspec", 00138 /* WINED3DSIH_TEXREG2AR */ "texreg2ar", 00139 /* WINED3DSIH_TEXREG2GB */ "texreg2gb", 00140 /* WINED3DSIH_TEXREG2RGB */ "texreg2rgb", 00141 /* WINED3DSIH_UDIV */ "udiv", 00142 /* WINED3DSIH_USHR */ "ushr", 00143 /* WINED3DSIH_UTOF */ "utof", 00144 /* WINED3DSIH_XOR */ "xor", 00145 }; 00146 00147 static const char * const semantic_names[] = 00148 { 00149 /* WINED3DDECLUSAGE_POSITION */ "SV_POSITION", 00150 /* WINED3DDECLUSAGE_BLENDWEIGHT */ "BLENDWEIGHT", 00151 /* WINED3DDECLUSAGE_BLENDINDICES */ "BLENDINDICES", 00152 /* WINED3DDECLUSAGE_NORMAL */ "NORMAL", 00153 /* WINED3DDECLUSAGE_PSIZE */ "PSIZE", 00154 /* WINED3DDECLUSAGE_TEXCOORD */ "TEXCOORD", 00155 /* WINED3DDECLUSAGE_TANGENT */ "TANGENT", 00156 /* WINED3DDECLUSAGE_BINORMAL */ "BINORMAL", 00157 /* WINED3DDECLUSAGE_TESSFACTOR */ "TESSFACTOR", 00158 /* WINED3DDECLUSAGE_POSITIONT */ "POSITIONT", 00159 /* WINED3DDECLUSAGE_COLOR */ "COLOR", 00160 /* WINED3DDECLUSAGE_FOG */ "FOG", 00161 /* WINED3DDECLUSAGE_DEPTH */ "DEPTH", 00162 /* WINED3DDECLUSAGE_SAMPLE */ "SAMPLE", 00163 }; 00164 00165 static const char *shader_semantic_name_from_usage(WINED3DDECLUSAGE usage) 00166 { 00167 if (usage >= sizeof(semantic_names) / sizeof(*semantic_names)) 00168 { 00169 FIXME("Unrecognized usage %#x.\n", usage); 00170 return "UNRECOGNIZED"; 00171 } 00172 00173 return semantic_names[usage]; 00174 } 00175 00176 static WINED3DDECLUSAGE shader_usage_from_semantic_name(const char *name) 00177 { 00178 unsigned int i; 00179 00180 for (i = 0; i < sizeof(semantic_names) / sizeof(*semantic_names); ++i) 00181 { 00182 if (!strcmp(name, semantic_names[i])) return i; 00183 } 00184 00185 return ~0U; 00186 } 00187 00188 BOOL shader_match_semantic(const char *semantic_name, WINED3DDECLUSAGE usage) 00189 { 00190 return !strcmp(semantic_name, shader_semantic_name_from_usage(usage)); 00191 } 00192 00193 static void shader_signature_from_semantic(struct wined3d_shader_signature_element *e, 00194 const struct wined3d_shader_semantic *s) 00195 { 00196 e->semantic_name = shader_semantic_name_from_usage(s->usage); 00197 e->semantic_idx = s->usage_idx; 00198 e->sysval_semantic = 0; 00199 e->component_type = 0; 00200 e->register_idx = s->reg.reg.idx; 00201 e->mask = s->reg.write_mask; 00202 } 00203 00204 static void shader_signature_from_usage(struct wined3d_shader_signature_element *e, 00205 WINED3DDECLUSAGE usage, UINT usage_idx, UINT reg_idx, DWORD write_mask) 00206 { 00207 e->semantic_name = shader_semantic_name_from_usage(usage); 00208 e->semantic_idx = usage_idx; 00209 e->sysval_semantic = 0; 00210 e->component_type = 0; 00211 e->register_idx = reg_idx; 00212 e->mask = write_mask; 00213 } 00214 00215 static const struct wined3d_shader_frontend *shader_select_frontend(DWORD version_token) 00216 { 00217 switch (version_token >> 16) 00218 { 00219 case WINED3D_SM1_VS: 00220 case WINED3D_SM1_PS: 00221 return &sm1_shader_frontend; 00222 00223 case WINED3D_SM4_PS: 00224 case WINED3D_SM4_VS: 00225 case WINED3D_SM4_GS: 00226 return &sm4_shader_frontend; 00227 00228 default: 00229 FIXME("Unrecognised version token %#x\n", version_token); 00230 return NULL; 00231 } 00232 } 00233 00234 void shader_buffer_clear(struct wined3d_shader_buffer *buffer) 00235 { 00236 buffer->buffer[0] = '\0'; 00237 buffer->bsize = 0; 00238 buffer->lineNo = 0; 00239 buffer->newline = TRUE; 00240 } 00241 00242 BOOL shader_buffer_init(struct wined3d_shader_buffer *buffer) 00243 { 00244 buffer->buffer = HeapAlloc(GetProcessHeap(), 0, SHADER_PGMSIZE); 00245 if (!buffer->buffer) 00246 { 00247 ERR("Failed to allocate shader buffer memory.\n"); 00248 return FALSE; 00249 } 00250 00251 shader_buffer_clear(buffer); 00252 return TRUE; 00253 } 00254 00255 void shader_buffer_free(struct wined3d_shader_buffer *buffer) 00256 { 00257 HeapFree(GetProcessHeap(), 0, buffer->buffer); 00258 } 00259 00260 int shader_vaddline(struct wined3d_shader_buffer *buffer, const char *format, va_list args) 00261 { 00262 char *base = buffer->buffer + buffer->bsize; 00263 int rc; 00264 00265 rc = vsnprintf(base, SHADER_PGMSIZE - 1 - buffer->bsize, format, args); 00266 00267 if (rc < 0 /* C89 */ || (unsigned int)rc > SHADER_PGMSIZE - 1 - buffer->bsize /* C99 */) 00268 { 00269 ERR("The buffer allocated for the shader program string " 00270 "is too small at %d bytes.\n", SHADER_PGMSIZE); 00271 buffer->bsize = SHADER_PGMSIZE - 1; 00272 return -1; 00273 } 00274 00275 if (buffer->newline) 00276 { 00277 TRACE("GL HW (%u, %u) : %s", buffer->lineNo + 1, buffer->bsize, base); 00278 buffer->newline = FALSE; 00279 } 00280 else 00281 { 00282 TRACE("%s", base); 00283 } 00284 00285 buffer->bsize += rc; 00286 if (buffer->buffer[buffer->bsize-1] == '\n') 00287 { 00288 ++buffer->lineNo; 00289 buffer->newline = TRUE; 00290 } 00291 00292 return 0; 00293 } 00294 00295 int shader_addline(struct wined3d_shader_buffer *buffer, const char *format, ...) 00296 { 00297 va_list args; 00298 int ret; 00299 00300 va_start(args, format); 00301 ret = shader_vaddline(buffer, format, args); 00302 va_end(args); 00303 00304 return ret; 00305 } 00306 00307 static void shader_init(struct wined3d_shader *shader, struct wined3d_device *device, 00308 void *parent, const struct wined3d_parent_ops *parent_ops) 00309 { 00310 shader->ref = 1; 00311 shader->device = device; 00312 shader->parent = parent; 00313 shader->parent_ops = parent_ops; 00314 list_init(&shader->linked_programs); 00315 list_add_head(&device->shaders, &shader->shader_list_entry); 00316 } 00317 00318 /* Convert floating point offset relative to a register file to an absolute 00319 * offset for float constants. */ 00320 static unsigned int shader_get_float_offset(enum wined3d_shader_register_type register_type, UINT register_idx) 00321 { 00322 switch (register_type) 00323 { 00324 case WINED3DSPR_CONST: return register_idx; 00325 case WINED3DSPR_CONST2: return 2048 + register_idx; 00326 case WINED3DSPR_CONST3: return 4096 + register_idx; 00327 case WINED3DSPR_CONST4: return 6144 + register_idx; 00328 default: 00329 FIXME("Unsupported register type: %u.\n", register_type); 00330 return register_idx; 00331 } 00332 } 00333 00334 static void shader_delete_constant_list(struct list *clist) 00335 { 00336 struct wined3d_shader_lconst *constant; 00337 struct list *ptr; 00338 00339 ptr = list_head(clist); 00340 while (ptr) 00341 { 00342 constant = LIST_ENTRY(ptr, struct wined3d_shader_lconst, entry); 00343 ptr = list_next(clist, ptr); 00344 HeapFree(GetProcessHeap(), 0, constant); 00345 } 00346 list_init(clist); 00347 } 00348 00349 static inline void set_bitmap_bit(DWORD *bitmap, DWORD bit) 00350 { 00351 DWORD idx, shift; 00352 idx = bit >> 5; 00353 shift = bit & 0x1f; 00354 bitmap[idx] |= (1 << shift); 00355 } 00356 00357 static void shader_record_register_usage(struct wined3d_shader *shader, struct wined3d_shader_reg_maps *reg_maps, 00358 const struct wined3d_shader_register *reg, enum wined3d_shader_type shader_type) 00359 { 00360 switch (reg->type) 00361 { 00362 case WINED3DSPR_TEXTURE: /* WINED3DSPR_ADDR */ 00363 if (shader_type == WINED3D_SHADER_TYPE_PIXEL) reg_maps->texcoord |= 1 << reg->idx; 00364 else reg_maps->address |= 1 << reg->idx; 00365 break; 00366 00367 case WINED3DSPR_TEMP: 00368 reg_maps->temporary |= 1 << reg->idx; 00369 break; 00370 00371 case WINED3DSPR_INPUT: 00372 if (shader_type == WINED3D_SHADER_TYPE_PIXEL) 00373 { 00374 if (reg->rel_addr) 00375 { 00376 /* If relative addressing is used, we must assume that all registers 00377 * are used. Even if it is a construct like v3[aL], we can't assume 00378 * that v0, v1 and v2 aren't read because aL can be negative */ 00379 unsigned int i; 00380 for (i = 0; i < MAX_REG_INPUT; ++i) 00381 { 00382 shader->u.ps.input_reg_used[i] = TRUE; 00383 } 00384 } 00385 else 00386 { 00387 shader->u.ps.input_reg_used[reg->idx] = TRUE; 00388 } 00389 } 00390 else reg_maps->input_registers |= 1 << reg->idx; 00391 break; 00392 00393 case WINED3DSPR_RASTOUT: 00394 if (reg->idx == 1) reg_maps->fog = 1; 00395 break; 00396 00397 case WINED3DSPR_MISCTYPE: 00398 if (shader_type == WINED3D_SHADER_TYPE_PIXEL) 00399 { 00400 if (!reg->idx) reg_maps->vpos = 1; 00401 else if (reg->idx == 1) reg_maps->usesfacing = 1; 00402 } 00403 break; 00404 00405 case WINED3DSPR_CONST: 00406 if (reg->rel_addr) 00407 { 00408 if (reg->idx < reg_maps->min_rel_offset) reg_maps->min_rel_offset = reg->idx; 00409 if (reg->idx > reg_maps->max_rel_offset) reg_maps->max_rel_offset = reg->idx; 00410 reg_maps->usesrelconstF = TRUE; 00411 } 00412 else 00413 { 00414 set_bitmap_bit(reg_maps->constf, reg->idx); 00415 } 00416 break; 00417 00418 case WINED3DSPR_CONSTINT: 00419 reg_maps->integer_constants |= (1 << reg->idx); 00420 break; 00421 00422 case WINED3DSPR_CONSTBOOL: 00423 reg_maps->boolean_constants |= (1 << reg->idx); 00424 break; 00425 00426 case WINED3DSPR_COLOROUT: 00427 reg_maps->rt_mask |= (1 << reg->idx); 00428 break; 00429 00430 default: 00431 TRACE("Not recording register of type %#x and idx %u\n", reg->type, reg->idx); 00432 break; 00433 } 00434 } 00435 00436 static unsigned int get_instr_extra_regcount(enum WINED3D_SHADER_INSTRUCTION_HANDLER instr, unsigned int param) 00437 { 00438 switch (instr) 00439 { 00440 case WINED3DSIH_M4x4: 00441 case WINED3DSIH_M3x4: 00442 return param == 1 ? 3 : 0; 00443 00444 case WINED3DSIH_M4x3: 00445 case WINED3DSIH_M3x3: 00446 return param == 1 ? 2 : 0; 00447 00448 case WINED3DSIH_M3x2: 00449 return param == 1 ? 1 : 0; 00450 00451 default: 00452 return 0; 00453 } 00454 } 00455 00456 /* Note that this does not count the loop register as an address register. */ 00457 static HRESULT shader_get_registers_used(struct wined3d_shader *shader, const struct wined3d_shader_frontend *fe, 00458 struct wined3d_shader_reg_maps *reg_maps, struct wined3d_shader_signature_element *input_signature, 00459 struct wined3d_shader_signature_element *output_signature, const DWORD *byte_code, DWORD constf_size) 00460 { 00461 unsigned int cur_loop_depth = 0, max_loop_depth = 0; 00462 void *fe_data = shader->frontend_data; 00463 struct wined3d_shader_version shader_version; 00464 const DWORD *ptr = byte_code; 00465 00466 memset(reg_maps, 0, sizeof(*reg_maps)); 00467 reg_maps->min_rel_offset = ~0U; 00468 00469 fe->shader_read_header(fe_data, &ptr, &shader_version); 00470 reg_maps->shader_version = shader_version; 00471 00472 reg_maps->constf = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, 00473 sizeof(*reg_maps->constf) * ((constf_size + 31) / 32)); 00474 if (!reg_maps->constf) 00475 { 00476 ERR("Failed to allocate constant map memory.\n"); 00477 return E_OUTOFMEMORY; 00478 } 00479 00480 while (!fe->shader_is_end(fe_data, &ptr)) 00481 { 00482 struct wined3d_shader_instruction ins; 00483 const char *comment; 00484 UINT comment_size; 00485 UINT param_size; 00486 00487 /* Skip comments. */ 00488 fe->shader_read_comment(&ptr, &comment, &comment_size); 00489 if (comment) continue; 00490 00491 /* Fetch opcode. */ 00492 fe->shader_read_opcode(fe_data, &ptr, &ins, ¶m_size); 00493 00494 /* Unhandled opcode, and its parameters. */ 00495 if (ins.handler_idx == WINED3DSIH_TABLE_SIZE) 00496 { 00497 TRACE("Skipping unrecognized instruction.\n"); 00498 ptr += param_size; 00499 continue; 00500 } 00501 00502 /* Handle declarations. */ 00503 if (ins.handler_idx == WINED3DSIH_DCL) 00504 { 00505 struct wined3d_shader_semantic semantic; 00506 00507 fe->shader_read_semantic(&ptr, &semantic); 00508 00509 switch (semantic.reg.reg.type) 00510 { 00511 /* Mark input registers used. */ 00512 case WINED3DSPR_INPUT: 00513 reg_maps->input_registers |= 1 << semantic.reg.reg.idx; 00514 shader_signature_from_semantic(&input_signature[semantic.reg.reg.idx], &semantic); 00515 break; 00516 00517 /* Vertex shader: mark 3.0 output registers used, save token. */ 00518 case WINED3DSPR_OUTPUT: 00519 reg_maps->output_registers |= 1 << semantic.reg.reg.idx; 00520 shader_signature_from_semantic(&output_signature[semantic.reg.reg.idx], &semantic); 00521 if (semantic.usage == WINED3DDECLUSAGE_FOG) reg_maps->fog = 1; 00522 break; 00523 00524 /* Save sampler usage token. */ 00525 case WINED3DSPR_SAMPLER: 00526 reg_maps->sampler_type[semantic.reg.reg.idx] = semantic.sampler_type; 00527 break; 00528 00529 default: 00530 TRACE("Not recording DCL register type %#x.\n", semantic.reg.reg.type); 00531 break; 00532 } 00533 } 00534 else if (ins.handler_idx == WINED3DSIH_DEF) 00535 { 00536 struct wined3d_shader_src_param rel_addr; 00537 struct wined3d_shader_dst_param dst; 00538 00539 struct wined3d_shader_lconst *lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(*lconst)); 00540 if (!lconst) return E_OUTOFMEMORY; 00541 00542 fe->shader_read_dst_param(fe_data, &ptr, &dst, &rel_addr); 00543 lconst->idx = dst.reg.idx; 00544 00545 memcpy(lconst->value, ptr, 4 * sizeof(DWORD)); 00546 ptr += 4; 00547 00548 /* In pixel shader 1.X shaders, the constants are clamped between [-1;1] */ 00549 if (shader_version.major == 1 && shader_version.type == WINED3D_SHADER_TYPE_PIXEL) 00550 { 00551 float *value = (float *)lconst->value; 00552 if (value[0] < -1.0f) value[0] = -1.0f; 00553 else if (value[0] > 1.0f) value[0] = 1.0f; 00554 if (value[1] < -1.0f) value[1] = -1.0f; 00555 else if (value[1] > 1.0f) value[1] = 1.0f; 00556 if (value[2] < -1.0f) value[2] = -1.0f; 00557 else if (value[2] > 1.0f) value[2] = 1.0f; 00558 if (value[3] < -1.0f) value[3] = -1.0f; 00559 else if (value[3] > 1.0f) value[3] = 1.0f; 00560 } 00561 00562 list_add_head(&shader->constantsF, &lconst->entry); 00563 } 00564 else if (ins.handler_idx == WINED3DSIH_DEFI) 00565 { 00566 struct wined3d_shader_src_param rel_addr; 00567 struct wined3d_shader_dst_param dst; 00568 00569 struct wined3d_shader_lconst *lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(*lconst)); 00570 if (!lconst) return E_OUTOFMEMORY; 00571 00572 fe->shader_read_dst_param(fe_data, &ptr, &dst, &rel_addr); 00573 lconst->idx = dst.reg.idx; 00574 00575 memcpy(lconst->value, ptr, 4 * sizeof(DWORD)); 00576 ptr += 4; 00577 00578 list_add_head(&shader->constantsI, &lconst->entry); 00579 reg_maps->local_int_consts |= (1 << dst.reg.idx); 00580 } 00581 else if (ins.handler_idx == WINED3DSIH_DEFB) 00582 { 00583 struct wined3d_shader_src_param rel_addr; 00584 struct wined3d_shader_dst_param dst; 00585 00586 struct wined3d_shader_lconst *lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(*lconst)); 00587 if (!lconst) return E_OUTOFMEMORY; 00588 00589 fe->shader_read_dst_param(fe_data, &ptr, &dst, &rel_addr); 00590 lconst->idx = dst.reg.idx; 00591 00592 memcpy(lconst->value, ptr, sizeof(DWORD)); 00593 ++ptr; 00594 00595 list_add_head(&shader->constantsB, &lconst->entry); 00596 reg_maps->local_bool_consts |= (1 << dst.reg.idx); 00597 } 00598 /* For subroutine prototypes. */ 00599 else if (ins.handler_idx == WINED3DSIH_LABEL) 00600 { 00601 struct wined3d_shader_src_param src, rel_addr; 00602 00603 fe->shader_read_src_param(fe_data, &ptr, &src, &rel_addr); 00604 reg_maps->labels |= 1 << src.reg.idx; 00605 } 00606 /* Set texture, address, temporary registers. */ 00607 else 00608 { 00609 BOOL color0_mov = FALSE; 00610 unsigned int i, limit; 00611 00612 /* This will loop over all the registers and try to 00613 * make a bitmask of the ones we're interested in. 00614 * 00615 * Relative addressing tokens are ignored, but that's 00616 * okay, since we'll catch any address registers when 00617 * they are initialized (required by spec). */ 00618 for (i = 0; i < ins.dst_count; ++i) 00619 { 00620 struct wined3d_shader_src_param dst_rel_addr; 00621 struct wined3d_shader_dst_param dst_param; 00622 00623 fe->shader_read_dst_param(fe_data, &ptr, &dst_param, &dst_rel_addr); 00624 00625 shader_record_register_usage(shader, reg_maps, &dst_param.reg, shader_version.type); 00626 00627 /* WINED3DSPR_TEXCRDOUT is the same as WINED3DSPR_OUTPUT. _OUTPUT can be > MAX_REG_TEXCRD and 00628 * is used in >= 3.0 shaders. Filter 3.0 shaders to prevent overflows, and also filter pixel 00629 * shaders because TECRDOUT isn't used in them, but future register types might cause issues */ 00630 if (shader_version.type == WINED3D_SHADER_TYPE_VERTEX && shader_version.major < 3) 00631 { 00632 UINT idx = dst_param.reg.idx; 00633 00634 switch (dst_param.reg.type) 00635 { 00636 case WINED3DSPR_RASTOUT: 00637 switch (idx) 00638 { 00639 case 0: /* oPos */ 00640 reg_maps->output_registers |= 1 << 10; 00641 shader_signature_from_usage(&output_signature[10], 00642 WINED3DDECLUSAGE_POSITION, 0, 10, WINED3DSP_WRITEMASK_ALL); 00643 break; 00644 00645 case 1: /* oFog */ 00646 reg_maps->output_registers |= 1 << 11; 00647 shader_signature_from_usage(&output_signature[11], 00648 WINED3DDECLUSAGE_FOG, 0, 11, WINED3DSP_WRITEMASK_0); 00649 break; 00650 00651 case 2: /* oPts */ 00652 reg_maps->output_registers |= 1 << 11; 00653 shader_signature_from_usage(&output_signature[11], 00654 WINED3DDECLUSAGE_PSIZE, 0, 11, WINED3DSP_WRITEMASK_1); 00655 break; 00656 } 00657 break; 00658 00659 case WINED3DSPR_ATTROUT: 00660 if (idx < 2) 00661 { 00662 idx += 8; 00663 if (reg_maps->output_registers & (1 << idx)) 00664 { 00665 output_signature[idx].mask |= dst_param.write_mask; 00666 } 00667 else 00668 { 00669 reg_maps->output_registers |= 1 << idx; 00670 shader_signature_from_usage(&output_signature[idx], 00671 WINED3DDECLUSAGE_COLOR, idx - 8, idx, dst_param.write_mask); 00672 } 00673 } 00674 break; 00675 00676 case WINED3DSPR_TEXCRDOUT: 00677 00678 reg_maps->texcoord_mask[idx] |= dst_param.write_mask; 00679 if (reg_maps->output_registers & (1 << idx)) 00680 { 00681 output_signature[idx].mask |= dst_param.write_mask; 00682 } 00683 else 00684 { 00685 reg_maps->output_registers |= 1 << idx; 00686 shader_signature_from_usage(&output_signature[idx], 00687 WINED3DDECLUSAGE_TEXCOORD, idx, idx, dst_param.write_mask); 00688 } 00689 break; 00690 00691 default: 00692 break; 00693 } 00694 } 00695 00696 if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL) 00697 { 00698 if (dst_param.reg.type == WINED3DSPR_COLOROUT && !dst_param.reg.idx) 00699 { 00700 /* Many 2.0 and 3.0 pixel shaders end with a MOV from a temp register to 00701 * COLOROUT 0. If we know this in advance, the ARB shader backend can skip 00702 * the mov and perform the sRGB write correction from the source register. 00703 * 00704 * However, if the mov is only partial, we can't do this, and if the write 00705 * comes from an instruction other than MOV it is hard to do as well. If 00706 * COLOROUT 0 is overwritten partially later, the marker is dropped again. */ 00707 shader->u.ps.color0_mov = FALSE; 00708 if (ins.handler_idx == WINED3DSIH_MOV 00709 && dst_param.write_mask == WINED3DSP_WRITEMASK_ALL) 00710 { 00711 /* Used later when the source register is read. */ 00712 color0_mov = TRUE; 00713 } 00714 } 00715 /* Also drop the MOV marker if the source register is overwritten prior to the shader 00716 * end 00717 */ 00718 else if (dst_param.reg.type == WINED3DSPR_TEMP 00719 && dst_param.reg.idx == shader->u.ps.color0_reg) 00720 { 00721 shader->u.ps.color0_mov = FALSE; 00722 } 00723 } 00724 00725 /* Declare 1.x samplers implicitly, based on the destination reg. number. */ 00726 if (shader_version.major == 1 00727 && (ins.handler_idx == WINED3DSIH_TEX 00728 || ins.handler_idx == WINED3DSIH_TEXBEM 00729 || ins.handler_idx == WINED3DSIH_TEXBEML 00730 || ins.handler_idx == WINED3DSIH_TEXDP3TEX 00731 || ins.handler_idx == WINED3DSIH_TEXM3x2TEX 00732 || ins.handler_idx == WINED3DSIH_TEXM3x3SPEC 00733 || ins.handler_idx == WINED3DSIH_TEXM3x3TEX 00734 || ins.handler_idx == WINED3DSIH_TEXM3x3VSPEC 00735 || ins.handler_idx == WINED3DSIH_TEXREG2AR 00736 || ins.handler_idx == WINED3DSIH_TEXREG2GB 00737 || ins.handler_idx == WINED3DSIH_TEXREG2RGB)) 00738 { 00739 /* Fake sampler usage, only set reserved bit and type. */ 00740 DWORD sampler_code = dst_param.reg.idx; 00741 00742 TRACE("Setting fake 2D sampler for 1.x pixelshader.\n"); 00743 reg_maps->sampler_type[sampler_code] = WINED3DSTT_2D; 00744 00745 /* texbem is only valid with < 1.4 pixel shaders */ 00746 if (ins.handler_idx == WINED3DSIH_TEXBEM 00747 || ins.handler_idx == WINED3DSIH_TEXBEML) 00748 { 00749 reg_maps->bumpmat |= 1 << dst_param.reg.idx; 00750 if (ins.handler_idx == WINED3DSIH_TEXBEML) 00751 { 00752 reg_maps->luminanceparams |= 1 << dst_param.reg.idx; 00753 } 00754 } 00755 } 00756 else if (ins.handler_idx == WINED3DSIH_BEM) 00757 { 00758 reg_maps->bumpmat |= 1 << dst_param.reg.idx; 00759 } 00760 } 00761 00762 if (ins.handler_idx == WINED3DSIH_NRM) reg_maps->usesnrm = 1; 00763 else if (ins.handler_idx == WINED3DSIH_DSY) reg_maps->usesdsy = 1; 00764 else if (ins.handler_idx == WINED3DSIH_DSX) reg_maps->usesdsx = 1; 00765 else if (ins.handler_idx == WINED3DSIH_TEXLDD) reg_maps->usestexldd = 1; 00766 else if (ins.handler_idx == WINED3DSIH_TEXLDL) reg_maps->usestexldl = 1; 00767 else if (ins.handler_idx == WINED3DSIH_MOVA) reg_maps->usesmova = 1; 00768 else if (ins.handler_idx == WINED3DSIH_IFC) reg_maps->usesifc = 1; 00769 else if (ins.handler_idx == WINED3DSIH_CALL) reg_maps->usescall = 1; 00770 else if (ins.handler_idx == WINED3DSIH_POW) reg_maps->usespow = 1; 00771 else if (ins.handler_idx == WINED3DSIH_LOOP 00772 || ins.handler_idx == WINED3DSIH_REP) 00773 { 00774 ++cur_loop_depth; 00775 if (cur_loop_depth > max_loop_depth) 00776 max_loop_depth = cur_loop_depth; 00777 } 00778 else if (ins.handler_idx == WINED3DSIH_ENDLOOP 00779 || ins.handler_idx == WINED3DSIH_ENDREP) 00780 --cur_loop_depth; 00781 00782 limit = ins.src_count + (ins.predicate ? 1 : 0); 00783 for (i = 0; i < limit; ++i) 00784 { 00785 struct wined3d_shader_src_param src_param, src_rel_addr; 00786 unsigned int count; 00787 00788 fe->shader_read_src_param(fe_data, &ptr, &src_param, &src_rel_addr); 00789 count = get_instr_extra_regcount(ins.handler_idx, i); 00790 00791 shader_record_register_usage(shader, reg_maps, &src_param.reg, shader_version.type); 00792 while (count) 00793 { 00794 ++src_param.reg.idx; 00795 shader_record_register_usage(shader, reg_maps, &src_param.reg, shader_version.type); 00796 --count; 00797 } 00798 00799 if (color0_mov) 00800 { 00801 if (src_param.reg.type == WINED3DSPR_TEMP 00802 && src_param.swizzle == WINED3DSP_NOSWIZZLE) 00803 { 00804 shader->u.ps.color0_mov = TRUE; 00805 shader->u.ps.color0_reg = src_param.reg.idx; 00806 } 00807 } 00808 } 00809 } 00810 } 00811 reg_maps->loop_depth = max_loop_depth; 00812 00813 /* PS before 2.0 don't have explicit color outputs. Instead the value of 00814 * R0 is written to the render target. */ 00815 if (shader_version.major < 2 && shader_version.type == WINED3D_SHADER_TYPE_PIXEL) 00816 reg_maps->rt_mask |= (1 << 0); 00817 00818 shader->functionLength = ((const char *)ptr - (const char *)byte_code); 00819 00820 return WINED3D_OK; 00821 } 00822 00823 unsigned int shader_find_free_input_register(const struct wined3d_shader_reg_maps *reg_maps, unsigned int max) 00824 { 00825 DWORD map = 1 << max; 00826 map |= map - 1; 00827 map &= reg_maps->shader_version.major < 3 ? ~reg_maps->texcoord : ~reg_maps->input_registers; 00828 00829 return wined3d_log2i(map); 00830 } 00831 00832 static void shader_dump_decl_usage(const struct wined3d_shader_semantic *semantic, 00833 const struct wined3d_shader_version *shader_version) 00834 { 00835 TRACE("dcl"); 00836 00837 if (semantic->reg.reg.type == WINED3DSPR_SAMPLER) 00838 { 00839 switch (semantic->sampler_type) 00840 { 00841 case WINED3DSTT_2D: TRACE("_2d"); break; 00842 case WINED3DSTT_CUBE: TRACE("_cube"); break; 00843 case WINED3DSTT_VOLUME: TRACE("_volume"); break; 00844 default: TRACE("_unknown_ttype(0x%08x)", semantic->sampler_type); 00845 } 00846 } 00847 else 00848 { 00849 /* Pixel shaders 3.0 don't have usage semantics. */ 00850 if (shader_version->major < 3 && shader_version->type == WINED3D_SHADER_TYPE_PIXEL) return; 00851 else TRACE("_"); 00852 00853 switch (semantic->usage) 00854 { 00855 case WINED3DDECLUSAGE_POSITION: 00856 TRACE("position%u", semantic->usage_idx); 00857 break; 00858 00859 case WINED3DDECLUSAGE_BLENDINDICES: 00860 TRACE("blend"); 00861 break; 00862 00863 case WINED3DDECLUSAGE_BLENDWEIGHT: 00864 TRACE("weight"); 00865 break; 00866 00867 case WINED3DDECLUSAGE_NORMAL: 00868 TRACE("normal%u", semantic->usage_idx); 00869 break; 00870 00871 case WINED3DDECLUSAGE_PSIZE: 00872 TRACE("psize"); 00873 break; 00874 00875 case WINED3DDECLUSAGE_COLOR: 00876 if (!semantic->usage_idx) TRACE("color"); 00877 else TRACE("specular%u", (semantic->usage_idx - 1)); 00878 break; 00879 00880 case WINED3DDECLUSAGE_TEXCOORD: 00881 TRACE("texture%u", semantic->usage_idx); 00882 break; 00883 00884 case WINED3DDECLUSAGE_TANGENT: 00885 TRACE("tangent"); 00886 break; 00887 00888 case WINED3DDECLUSAGE_BINORMAL: 00889 TRACE("binormal"); 00890 break; 00891 00892 case WINED3DDECLUSAGE_TESSFACTOR: 00893 TRACE("tessfactor"); 00894 break; 00895 00896 case WINED3DDECLUSAGE_POSITIONT: 00897 TRACE("positionT%u", semantic->usage_idx); 00898 break; 00899 00900 case WINED3DDECLUSAGE_FOG: 00901 TRACE("fog"); 00902 break; 00903 00904 case WINED3DDECLUSAGE_DEPTH: 00905 TRACE("depth"); 00906 break; 00907 00908 case WINED3DDECLUSAGE_SAMPLE: 00909 TRACE("sample"); 00910 break; 00911 00912 default: 00913 FIXME("unknown_semantics(0x%08x)", semantic->usage); 00914 } 00915 } 00916 } 00917 00918 static void shader_dump_register(const struct wined3d_shader_register *reg, 00919 const struct wined3d_shader_version *shader_version) 00920 { 00921 static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"}; 00922 static const char * const misctype_reg_names[] = {"vPos", "vFace"}; 00923 UINT offset = reg->idx; 00924 00925 switch (reg->type) 00926 { 00927 case WINED3DSPR_TEMP: 00928 TRACE("r"); 00929 break; 00930 00931 case WINED3DSPR_INPUT: 00932 TRACE("v"); 00933 break; 00934 00935 case WINED3DSPR_CONST: 00936 case WINED3DSPR_CONST2: 00937 case WINED3DSPR_CONST3: 00938 case WINED3DSPR_CONST4: 00939 TRACE("c"); 00940 offset = shader_get_float_offset(reg->type, reg->idx); 00941 break; 00942 00943 case WINED3DSPR_TEXTURE: /* vs: case WINED3DSPR_ADDR */ 00944 TRACE("%c", shader_version->type == WINED3D_SHADER_TYPE_PIXEL ? 't' : 'a'); 00945 break; 00946 00947 case WINED3DSPR_RASTOUT: 00948 TRACE("%s", rastout_reg_names[reg->idx]); 00949 break; 00950 00951 case WINED3DSPR_COLOROUT: 00952 TRACE("oC"); 00953 break; 00954 00955 case WINED3DSPR_DEPTHOUT: 00956 TRACE("oDepth"); 00957 break; 00958 00959 case WINED3DSPR_ATTROUT: 00960 TRACE("oD"); 00961 break; 00962 00963 case WINED3DSPR_TEXCRDOUT: 00964 /* Vertex shaders >= 3.0 use general purpose output registers 00965 * (WINED3DSPR_OUTPUT), which can include an address token. */ 00966 if (shader_version->major >= 3) TRACE("o"); 00967 else TRACE("oT"); 00968 break; 00969 00970 case WINED3DSPR_CONSTINT: 00971 TRACE("i"); 00972 break; 00973 00974 case WINED3DSPR_CONSTBOOL: 00975 TRACE("b"); 00976 break; 00977 00978 case WINED3DSPR_LABEL: 00979 TRACE("l"); 00980 break; 00981 00982 case WINED3DSPR_LOOP: 00983 TRACE("aL"); 00984 break; 00985 00986 case WINED3DSPR_SAMPLER: 00987 TRACE("s"); 00988 break; 00989 00990 case WINED3DSPR_MISCTYPE: 00991 if (reg->idx > 1) FIXME("Unhandled misctype register %u.\n", reg->idx); 00992 else TRACE("%s", misctype_reg_names[reg->idx]); 00993 break; 00994 00995 case WINED3DSPR_PREDICATE: 00996 TRACE("p"); 00997 break; 00998 00999 case WINED3DSPR_IMMCONST: 01000 TRACE("l"); 01001 break; 01002 01003 case WINED3DSPR_CONSTBUFFER: 01004 TRACE("cb"); 01005 break; 01006 01007 case WINED3DSPR_NULL: 01008 TRACE("null"); 01009 break; 01010 01011 case WINED3DSPR_RESOURCE: 01012 TRACE("t"); 01013 break; 01014 01015 default: 01016 TRACE("unhandled_rtype(%#x)", reg->type); 01017 break; 01018 } 01019 01020 if (reg->type == WINED3DSPR_IMMCONST) 01021 { 01022 TRACE("("); 01023 switch (reg->immconst_type) 01024 { 01025 case WINED3D_IMMCONST_SCALAR: 01026 TRACE("%.8e", *(const float *)reg->immconst_data); 01027 break; 01028 01029 case WINED3D_IMMCONST_VEC4: 01030 TRACE("%.8e, %.8e, %.8e, %.8e", 01031 *(const float *)®->immconst_data[0], *(const float *)®->immconst_data[1], 01032 *(const float *)®->immconst_data[2], *(const float *)®->immconst_data[3]); 01033 break; 01034 01035 default: 01036 TRACE("<unhandled immconst_type %#x>", reg->immconst_type); 01037 break; 01038 } 01039 TRACE(")"); 01040 } 01041 else if (reg->type != WINED3DSPR_RASTOUT 01042 && reg->type != WINED3DSPR_MISCTYPE 01043 && reg->type != WINED3DSPR_NULL) 01044 { 01045 if (reg->array_idx != ~0U) 01046 { 01047 TRACE("%u[%u", offset, reg->array_idx); 01048 if (reg->rel_addr) 01049 { 01050 TRACE(" + "); 01051 shader_dump_src_param(reg->rel_addr, shader_version); 01052 } 01053 TRACE("]"); 01054 } 01055 else 01056 { 01057 if (reg->rel_addr) 01058 { 01059 TRACE("["); 01060 shader_dump_src_param(reg->rel_addr, shader_version); 01061 TRACE(" + "); 01062 } 01063 TRACE("%u", offset); 01064 if (reg->rel_addr) TRACE("]"); 01065 } 01066 } 01067 } 01068 01069 void shader_dump_dst_param(const struct wined3d_shader_dst_param *param, 01070 const struct wined3d_shader_version *shader_version) 01071 { 01072 DWORD write_mask = param->write_mask; 01073 01074 shader_dump_register(¶m->reg, shader_version); 01075 01076 if (write_mask && write_mask != WINED3DSP_WRITEMASK_ALL) 01077 { 01078 static const char *write_mask_chars = "xyzw"; 01079 01080 TRACE("."); 01081 if (write_mask & WINED3DSP_WRITEMASK_0) TRACE("%c", write_mask_chars[0]); 01082 if (write_mask & WINED3DSP_WRITEMASK_1) TRACE("%c", write_mask_chars[1]); 01083 if (write_mask & WINED3DSP_WRITEMASK_2) TRACE("%c", write_mask_chars[2]); 01084 if (write_mask & WINED3DSP_WRITEMASK_3) TRACE("%c", write_mask_chars[3]); 01085 } 01086 } 01087 01088 void shader_dump_src_param(const struct wined3d_shader_src_param *param, 01089 const struct wined3d_shader_version *shader_version) 01090 { 01091 enum wined3d_shader_src_modifier src_modifier = param->modifiers; 01092 DWORD swizzle = param->swizzle; 01093 01094 if (src_modifier == WINED3DSPSM_NEG 01095 || src_modifier == WINED3DSPSM_BIASNEG 01096 || src_modifier == WINED3DSPSM_SIGNNEG 01097 || src_modifier == WINED3DSPSM_X2NEG 01098 || src_modifier == WINED3DSPSM_ABSNEG) 01099 TRACE("-"); 01100 else if (src_modifier == WINED3DSPSM_COMP) 01101 TRACE("1-"); 01102 else if (src_modifier == WINED3DSPSM_NOT) 01103 TRACE("!"); 01104 01105 if (src_modifier == WINED3DSPSM_ABS || src_modifier == WINED3DSPSM_ABSNEG) 01106 TRACE("abs("); 01107 01108 shader_dump_register(¶m->reg, shader_version); 01109 01110 if (src_modifier) 01111 { 01112 switch (src_modifier) 01113 { 01114 case WINED3DSPSM_NONE: break; 01115 case WINED3DSPSM_NEG: break; 01116 case WINED3DSPSM_NOT: break; 01117 case WINED3DSPSM_BIAS: TRACE("_bias"); break; 01118 case WINED3DSPSM_BIASNEG: TRACE("_bias"); break; 01119 case WINED3DSPSM_SIGN: TRACE("_bx2"); break; 01120 case WINED3DSPSM_SIGNNEG: TRACE("_bx2"); break; 01121 case WINED3DSPSM_COMP: break; 01122 case WINED3DSPSM_X2: TRACE("_x2"); break; 01123 case WINED3DSPSM_X2NEG: TRACE("_x2"); break; 01124 case WINED3DSPSM_DZ: TRACE("_dz"); break; 01125 case WINED3DSPSM_DW: TRACE("_dw"); break; 01126 case WINED3DSPSM_ABSNEG: TRACE(")"); break; 01127 case WINED3DSPSM_ABS: TRACE(")"); break; 01128 default: TRACE("_unknown_modifier(%#x)", src_modifier); 01129 } 01130 } 01131 01132 if (swizzle != WINED3DSP_NOSWIZZLE) 01133 { 01134 static const char *swizzle_chars = "xyzw"; 01135 DWORD swizzle_x = swizzle & 0x03; 01136 DWORD swizzle_y = (swizzle >> 2) & 0x03; 01137 DWORD swizzle_z = (swizzle >> 4) & 0x03; 01138 DWORD swizzle_w = (swizzle >> 6) & 0x03; 01139 01140 if (swizzle_x == swizzle_y 01141 && swizzle_x == swizzle_z 01142 && swizzle_x == swizzle_w) 01143 { 01144 TRACE(".%c", swizzle_chars[swizzle_x]); 01145 } 01146 else 01147 { 01148 TRACE(".%c%c%c%c", swizzle_chars[swizzle_x], swizzle_chars[swizzle_y], 01149 swizzle_chars[swizzle_z], swizzle_chars[swizzle_w]); 01150 } 01151 } 01152 } 01153 01154 /* Shared code in order to generate the bulk of the shader string. 01155 * NOTE: A description of how to parse tokens can be found on MSDN. */ 01156 void shader_generate_main(const struct wined3d_shader *shader, struct wined3d_shader_buffer *buffer, 01157 const struct wined3d_shader_reg_maps *reg_maps, const DWORD *byte_code, void *backend_ctx) 01158 { 01159 struct wined3d_device *device = shader->device; 01160 const struct wined3d_shader_frontend *fe = shader->frontend; 01161 void *fe_data = shader->frontend_data; 01162 struct wined3d_shader_src_param dst_rel_addr[2]; 01163 struct wined3d_shader_src_param src_rel_addr[4]; 01164 struct wined3d_shader_dst_param dst_param[2]; 01165 struct wined3d_shader_src_param src_param[4]; 01166 struct wined3d_shader_version shader_version; 01167 struct wined3d_shader_loop_state loop_state; 01168 struct wined3d_shader_instruction ins; 01169 struct wined3d_shader_tex_mx tex_mx; 01170 struct wined3d_shader_context ctx; 01171 const DWORD *ptr = byte_code; 01172 DWORD i; 01173 01174 /* Initialize current parsing state. */ 01175 tex_mx.current_row = 0; 01176 loop_state.current_depth = 0; 01177 loop_state.current_reg = 0; 01178 01179 ctx.shader = shader; 01180 ctx.gl_info = &device->adapter->gl_info; 01181 ctx.reg_maps = reg_maps; 01182 ctx.buffer = buffer; 01183 ctx.tex_mx = &tex_mx; 01184 ctx.loop_state = &loop_state; 01185 ctx.backend_data = backend_ctx; 01186 01187 ins.ctx = &ctx; 01188 ins.dst = dst_param; 01189 ins.src = src_param; 01190 01191 fe->shader_read_header(fe_data, &ptr, &shader_version); 01192 01193 while (!fe->shader_is_end(fe_data, &ptr)) 01194 { 01195 const char *comment; 01196 UINT comment_size; 01197 UINT param_size; 01198 01199 /* Skip comment tokens. */ 01200 fe->shader_read_comment(&ptr, &comment, &comment_size); 01201 if (comment) continue; 01202 01203 /* Read opcode. */ 01204 fe->shader_read_opcode(fe_data, &ptr, &ins, ¶m_size); 01205 01206 /* Unknown opcode and its parameters. */ 01207 if (ins.handler_idx == WINED3DSIH_TABLE_SIZE) 01208 { 01209 TRACE("Skipping unrecognized instruction.\n"); 01210 ptr += param_size; 01211 continue; 01212 } 01213 01214 /* Nothing to do. */ 01215 if (ins.handler_idx == WINED3DSIH_DCL 01216 || ins.handler_idx == WINED3DSIH_NOP 01217 || ins.handler_idx == WINED3DSIH_DEF 01218 || ins.handler_idx == WINED3DSIH_DEFI 01219 || ins.handler_idx == WINED3DSIH_DEFB 01220 || ins.handler_idx == WINED3DSIH_PHASE) 01221 { 01222 ptr += param_size; 01223 continue; 01224 } 01225 01226 /* Destination tokens */ 01227 for (i = 0; i < ins.dst_count; ++i) 01228 { 01229 fe->shader_read_dst_param(fe_data, &ptr, &dst_param[i], &dst_rel_addr[i]); 01230 } 01231 01232 /* Predication token */ 01233 if (ins.predicate) 01234 { 01235 FIXME("Predicates not implemented.\n"); 01236 ins.predicate = *ptr++; 01237 } 01238 01239 /* Other source tokens */ 01240 for (i = 0; i < ins.src_count; ++i) 01241 { 01242 fe->shader_read_src_param(fe_data, &ptr, &src_param[i], &src_rel_addr[i]); 01243 } 01244 01245 /* Call appropriate function for output target */ 01246 device->shader_backend->shader_handle_instruction(&ins); 01247 } 01248 } 01249 01250 static void shader_dump_ins_modifiers(const struct wined3d_shader_dst_param *dst) 01251 { 01252 DWORD mmask = dst->modifiers; 01253 01254 switch (dst->shift) 01255 { 01256 case 0: break; 01257 case 13: TRACE("_d8"); break; 01258 case 14: TRACE("_d4"); break; 01259 case 15: TRACE("_d2"); break; 01260 case 1: TRACE("_x2"); break; 01261 case 2: TRACE("_x4"); break; 01262 case 3: TRACE("_x8"); break; 01263 default: TRACE("_unhandled_shift(%d)", dst->shift); break; 01264 } 01265 01266 if (mmask & WINED3DSPDM_SATURATE) TRACE("_sat"); 01267 if (mmask & WINED3DSPDM_PARTIALPRECISION) TRACE("_pp"); 01268 if (mmask & WINED3DSPDM_MSAMPCENTROID) TRACE("_centroid"); 01269 01270 mmask &= ~(WINED3DSPDM_SATURATE | WINED3DSPDM_PARTIALPRECISION | WINED3DSPDM_MSAMPCENTROID); 01271 if (mmask) FIXME("_unrecognized_modifier(%#x)", mmask); 01272 } 01273 01274 static void shader_trace_init(const struct wined3d_shader_frontend *fe, void *fe_data, const DWORD *byte_code) 01275 { 01276 struct wined3d_shader_version shader_version; 01277 const DWORD *ptr = byte_code; 01278 const char *type_prefix; 01279 DWORD i; 01280 01281 TRACE("Parsing %p.\n", byte_code); 01282 01283 fe->shader_read_header(fe_data, &ptr, &shader_version); 01284 01285 switch (shader_version.type) 01286 { 01287 case WINED3D_SHADER_TYPE_VERTEX: 01288 type_prefix = "vs"; 01289 break; 01290 01291 case WINED3D_SHADER_TYPE_GEOMETRY: 01292 type_prefix = "gs"; 01293 break; 01294 01295 case WINED3D_SHADER_TYPE_PIXEL: 01296 type_prefix = "ps"; 01297 break; 01298 01299 default: 01300 FIXME("Unhandled shader type %#x.\n", shader_version.type); 01301 type_prefix = "unknown"; 01302 break; 01303 } 01304 01305 TRACE("%s_%u_%u\n", type_prefix, shader_version.major, shader_version.minor); 01306 01307 while (!fe->shader_is_end(fe_data, &ptr)) 01308 { 01309 struct wined3d_shader_instruction ins; 01310 const char *comment; 01311 UINT comment_size; 01312 UINT param_size; 01313 01314 /* comment */ 01315 fe->shader_read_comment(&ptr, &comment, &comment_size); 01316 if (comment) 01317 { 01318 if (comment_size > 4 && *(const DWORD *)comment == WINEMAKEFOURCC('T', 'E', 'X', 'T')) 01319 { 01320 const char *end = comment + comment_size; 01321 const char *ptr = comment + 4; 01322 const char *line = ptr; 01323 01324 TRACE("// TEXT\n"); 01325 while (ptr != end) 01326 { 01327 if (*ptr == '\n') 01328 { 01329 UINT len = ptr - line; 01330 if (len && *(ptr - 1) == '\r') --len; 01331 TRACE("// %s\n", debugstr_an(line, len)); 01332 line = ++ptr; 01333 } 01334 else ++ptr; 01335 } 01336 if (line != ptr) TRACE("// %s\n", debugstr_an(line, ptr - line)); 01337 } 01338 else TRACE("// %s\n", debugstr_an(comment, comment_size)); 01339 continue; 01340 } 01341 01342 fe->shader_read_opcode(fe_data, &ptr, &ins, ¶m_size); 01343 if (ins.handler_idx == WINED3DSIH_TABLE_SIZE) 01344 { 01345 TRACE("Skipping unrecognized instruction.\n"); 01346 ptr += param_size; 01347 continue; 01348 } 01349 01350 if (ins.handler_idx == WINED3DSIH_DCL) 01351 { 01352 struct wined3d_shader_semantic semantic; 01353 01354 fe->shader_read_semantic(&ptr, &semantic); 01355 01356 shader_dump_decl_usage(&semantic, &shader_version); 01357 shader_dump_ins_modifiers(&semantic.reg); 01358 TRACE(" "); 01359 shader_dump_dst_param(&semantic.reg, &shader_version); 01360 } 01361 else if (ins.handler_idx == WINED3DSIH_DEF) 01362 { 01363 struct wined3d_shader_src_param rel_addr; 01364 struct wined3d_shader_dst_param dst; 01365 01366 fe->shader_read_dst_param(fe_data, &ptr, &dst, &rel_addr); 01367 01368 TRACE("def c%u = %f, %f, %f, %f", shader_get_float_offset(dst.reg.type, dst.reg.idx), 01369 *(const float *)(ptr), 01370 *(const float *)(ptr + 1), 01371 *(const float *)(ptr + 2), 01372 *(const float *)(ptr + 3)); 01373 ptr += 4; 01374 } 01375 else if (ins.handler_idx == WINED3DSIH_DEFI) 01376 { 01377 struct wined3d_shader_src_param rel_addr; 01378 struct wined3d_shader_dst_param dst; 01379 01380 fe->shader_read_dst_param(fe_data, &ptr, &dst, &rel_addr); 01381 01382 TRACE("defi i%u = %d, %d, %d, %d", dst.reg.idx, 01383 *(ptr), 01384 *(ptr + 1), 01385 *(ptr + 2), 01386 *(ptr + 3)); 01387 ptr += 4; 01388 } 01389 else if (ins.handler_idx == WINED3DSIH_DEFB) 01390 { 01391 struct wined3d_shader_src_param rel_addr; 01392 struct wined3d_shader_dst_param dst; 01393 01394 fe->shader_read_dst_param(fe_data, &ptr, &dst, &rel_addr); 01395 01396 TRACE("defb b%u = %s", dst.reg.idx, *ptr ? "true" : "false"); 01397 ++ptr; 01398 } 01399 else 01400 { 01401 struct wined3d_shader_src_param dst_rel_addr[2]; 01402 struct wined3d_shader_src_param src_rel_addr; 01403 struct wined3d_shader_dst_param dst_param[2]; 01404 struct wined3d_shader_src_param src_param; 01405 01406 for (i = 0; i < ins.dst_count; ++i) 01407 { 01408 fe->shader_read_dst_param(fe_data, &ptr, &dst_param[i], &dst_rel_addr[i]); 01409 } 01410 01411 /* Print out predication source token first - it follows 01412 * the destination token. */ 01413 if (ins.predicate) 01414 { 01415 fe->shader_read_src_param(fe_data, &ptr, &src_param, &src_rel_addr); 01416 TRACE("("); 01417 shader_dump_src_param(&src_param, &shader_version); 01418 TRACE(") "); 01419 } 01420 01421 /* PixWin marks instructions with the coissue flag with a '+' */ 01422 if (ins.coissue) TRACE("+"); 01423 01424 TRACE("%s", shader_opcode_names[ins.handler_idx]); 01425 01426 if (ins.handler_idx == WINED3DSIH_IFC 01427 || ins.handler_idx == WINED3DSIH_BREAKC) 01428 { 01429 switch (ins.flags) 01430 { 01431 case WINED3D_SHADER_REL_OP_GT: TRACE("_gt"); break; 01432 case WINED3D_SHADER_REL_OP_EQ: TRACE("_eq"); break; 01433 case WINED3D_SHADER_REL_OP_GE: TRACE("_ge"); break; 01434 case WINED3D_SHADER_REL_OP_LT: TRACE("_lt"); break; 01435 case WINED3D_SHADER_REL_OP_NE: TRACE("_ne"); break; 01436 case WINED3D_SHADER_REL_OP_LE: TRACE("_le"); break; 01437 default: TRACE("_(%u)", ins.flags); 01438 } 01439 } 01440 else if (ins.handler_idx == WINED3DSIH_TEX 01441 && shader_version.major >= 2 01442 && (ins.flags & WINED3DSI_TEXLD_PROJECT)) 01443 { 01444 TRACE("p"); 01445 } 01446 01447 /* We already read the destination tokens, print them. */ 01448 for (i = 0; i < ins.dst_count; ++i) 01449 { 01450 shader_dump_ins_modifiers(&dst_param[i]); 01451 TRACE(!i ? " " : ", "); 01452 shader_dump_dst_param(&dst_param[i], &shader_version); 01453 } 01454 01455 /* Other source tokens */ 01456 for (i = ins.dst_count; i < (ins.dst_count + ins.src_count); ++i) 01457 { 01458 fe->shader_read_src_param(fe_data, &ptr, &src_param, &src_rel_addr); 01459 TRACE(!i ? " " : ", "); 01460 shader_dump_src_param(&src_param, &shader_version); 01461 } 01462 } 01463 TRACE("\n"); 01464 } 01465 } 01466 01467 static void shader_cleanup(struct wined3d_shader *shader) 01468 { 01469 shader->device->shader_backend->shader_destroy(shader); 01470 HeapFree(GetProcessHeap(), 0, shader->reg_maps.constf); 01471 HeapFree(GetProcessHeap(), 0, shader->function); 01472 shader_delete_constant_list(&shader->constantsF); 01473 shader_delete_constant_list(&shader->constantsB); 01474 shader_delete_constant_list(&shader->constantsI); 01475 list_remove(&shader->shader_list_entry); 01476 01477 if (shader->frontend && shader->frontend_data) 01478 shader->frontend->shader_free(shader->frontend_data); 01479 } 01480 01481 static void shader_none_handle_instruction(const struct wined3d_shader_instruction *ins) {} 01482 static void shader_none_select(const struct wined3d_context *context, BOOL usePS, BOOL useVS) {} 01483 static void shader_none_select_depth_blt(void *shader_priv, const struct wined3d_gl_info *gl_info, 01484 enum tex_types tex_type, const SIZE *ds_mask_size) {} 01485 static void shader_none_deselect_depth_blt(void *shader_priv, const struct wined3d_gl_info *gl_info) {} 01486 static void shader_none_update_float_vertex_constants(struct wined3d_device *device, UINT start, UINT count) {} 01487 static void shader_none_update_float_pixel_constants(struct wined3d_device *device, UINT start, UINT count) {} 01488 static void shader_none_load_constants(const struct wined3d_context *context, char usePS, char useVS) {} 01489 static void shader_none_load_np2fixup_constants(void *shader_priv, 01490 const struct wined3d_gl_info *gl_info, const struct wined3d_state *state) {} 01491 static void shader_none_destroy(struct wined3d_shader *shader) {} 01492 static HRESULT shader_none_alloc(struct wined3d_device *device) {return WINED3D_OK;} 01493 static void shader_none_free(struct wined3d_device *device) {} 01494 static void shader_none_context_destroyed(void *shader_priv, const struct wined3d_context *context) {} 01495 01496 static void shader_none_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *caps) 01497 { 01498 /* Set the shader caps to 0 for the none shader backend */ 01499 caps->VertexShaderVersion = 0; 01500 caps->MaxVertexShaderConst = 0; 01501 caps->PixelShaderVersion = 0; 01502 caps->PixelShader1xMaxValue = 0.0f; 01503 caps->MaxPixelShaderConst = 0; 01504 caps->VSClipping = FALSE; 01505 } 01506 01507 static BOOL shader_none_color_fixup_supported(struct color_fixup_desc fixup) 01508 { 01509 if (TRACE_ON(d3d_shader) && TRACE_ON(d3d)) 01510 { 01511 TRACE("Checking support for fixup:\n"); 01512 dump_color_fixup_desc(fixup); 01513 } 01514 01515 /* Faked to make some apps happy. */ 01516 if (!is_complex_fixup(fixup)) 01517 { 01518 TRACE("[OK]\n"); 01519 return TRUE; 01520 } 01521 01522 TRACE("[FAILED]\n"); 01523 return FALSE; 01524 } 01525 01526 const struct wined3d_shader_backend_ops none_shader_backend = 01527 { 01528 shader_none_handle_instruction, 01529 shader_none_select, 01530 shader_none_select_depth_blt, 01531 shader_none_deselect_depth_blt, 01532 shader_none_update_float_vertex_constants, 01533 shader_none_update_float_pixel_constants, 01534 shader_none_load_constants, 01535 shader_none_load_np2fixup_constants, 01536 shader_none_destroy, 01537 shader_none_alloc, 01538 shader_none_free, 01539 shader_none_context_destroyed, 01540 shader_none_get_caps, 01541 shader_none_color_fixup_supported, 01542 }; 01543 01544 static HRESULT shader_set_function(struct wined3d_shader *shader, const DWORD *byte_code, 01545 const struct wined3d_shader_signature *output_signature, DWORD float_const_count, 01546 enum wined3d_shader_type type, unsigned int max_version) 01547 { 01548 struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 01549 const struct wined3d_shader_frontend *fe; 01550 HRESULT hr; 01551 unsigned int backend_version; 01552 01553 TRACE("shader %p, byte_code %p, output_signature %p, float_const_count %u.\n", 01554 shader, byte_code, output_signature, float_const_count); 01555 01556 fe = shader_select_frontend(*byte_code); 01557 if (!fe) 01558 { 01559 FIXME("Unable to find frontend for shader.\n"); 01560 return WINED3DERR_INVALIDCALL; 01561 } 01562 shader->frontend = fe; 01563 shader->frontend_data = fe->shader_init(byte_code, output_signature); 01564 if (!shader->frontend_data) 01565 { 01566 FIXME("Failed to initialize frontend.\n"); 01567 return WINED3DERR_INVALIDCALL; 01568 } 01569 01570 /* First pass: trace shader. */ 01571 if (TRACE_ON(d3d_shader)) 01572 shader_trace_init(fe, shader->frontend_data, byte_code); 01573 01574 /* Initialize immediate constant lists. */ 01575 list_init(&shader->constantsF); 01576 list_init(&shader->constantsB); 01577 list_init(&shader->constantsI); 01578 01579 /* Second pass: figure out which registers are used, what the semantics are, etc. */ 01580 hr = shader_get_registers_used(shader, fe, 01581 reg_maps, shader->input_signature, shader->output_signature, 01582 byte_code, float_const_count); 01583 if (FAILED(hr)) return hr; 01584 01585 if (reg_maps->shader_version.type != type) 01586 { 01587 WARN("Wrong shader type %d.\n", reg_maps->shader_version.type); 01588 return WINED3DERR_INVALIDCALL; 01589 } 01590 if (reg_maps->shader_version.major > max_version) 01591 { 01592 WARN("Shader version %d not supported by this D3D API version.\n", reg_maps->shader_version.major); 01593 return WINED3DERR_INVALIDCALL; 01594 } 01595 switch (type) 01596 { 01597 case WINED3D_SHADER_TYPE_VERTEX: 01598 backend_version = shader->device->vshader_version; 01599 break; 01600 case WINED3D_SHADER_TYPE_PIXEL: 01601 backend_version = shader->device->pshader_version; 01602 break; 01603 default: 01604 FIXME("No backend version-checking for this shader type\n"); 01605 backend_version = 0; 01606 } 01607 if (reg_maps->shader_version.major > backend_version) 01608 { 01609 WARN("Shader version %d.%d not supported by your GPU with the current shader backend.\n", 01610 reg_maps->shader_version.major, reg_maps->shader_version.minor); 01611 return WINED3DERR_INVALIDCALL; 01612 } 01613 01614 shader->function = HeapAlloc(GetProcessHeap(), 0, shader->functionLength); 01615 if (!shader->function) 01616 return E_OUTOFMEMORY; 01617 memcpy(shader->function, byte_code, shader->functionLength); 01618 01619 return WINED3D_OK; 01620 } 01621 01622 ULONG CDECL wined3d_shader_incref(struct wined3d_shader *shader) 01623 { 01624 ULONG refcount = InterlockedIncrement(&shader->ref); 01625 01626 TRACE("%p increasing refcount to %u.\n", shader, refcount); 01627 01628 return refcount; 01629 } 01630 01631 /* Do not call while under the GL lock. */ 01632 ULONG CDECL wined3d_shader_decref(struct wined3d_shader *shader) 01633 { 01634 ULONG refcount = InterlockedDecrement(&shader->ref); 01635 01636 TRACE("%p decreasing refcount to %u.\n", shader, refcount); 01637 01638 if (!refcount) 01639 { 01640 shader_cleanup(shader); 01641 shader->parent_ops->wined3d_object_destroyed(shader->parent); 01642 HeapFree(GetProcessHeap(), 0, shader); 01643 } 01644 01645 return refcount; 01646 } 01647 01648 void * CDECL wined3d_shader_get_parent(const struct wined3d_shader *shader) 01649 { 01650 TRACE("shader %p.\n", shader); 01651 01652 return shader->parent; 01653 } 01654 01655 HRESULT CDECL wined3d_shader_get_byte_code(const struct wined3d_shader *shader, 01656 void *byte_code, UINT *byte_code_size) 01657 { 01658 TRACE("shader %p, byte_code %p, byte_code_size %p.\n", shader, byte_code, byte_code_size); 01659 01660 if (!byte_code) 01661 { 01662 *byte_code_size = shader->functionLength; 01663 return WINED3D_OK; 01664 } 01665 01666 if (*byte_code_size < shader->functionLength) 01667 { 01668 /* MSDN claims (for d3d8 at least) that if *byte_code_size is smaller 01669 * than the required size we should write the required size and 01670 * return D3DERR_MOREDATA. That's not actually true. */ 01671 return WINED3DERR_INVALIDCALL; 01672 } 01673 01674 memcpy(byte_code, shader->function, shader->functionLength); 01675 01676 return WINED3D_OK; 01677 } 01678 01679 /* Set local constants for d3d8 shaders. */ 01680 HRESULT CDECL wined3d_shader_set_local_constants_float(struct wined3d_shader *shader, 01681 UINT start_idx, const float *src_data, UINT count) 01682 { 01683 UINT end_idx = start_idx + count; 01684 UINT i; 01685 01686 TRACE("shader %p, start_idx %u, src_data %p, count %u.\n", shader, start_idx, src_data, count); 01687 01688 if (end_idx > shader->limits.constant_float) 01689 { 01690 WARN("end_idx %u > float constants limit %u.\n", 01691 end_idx, shader->limits.constant_float); 01692 end_idx = shader->limits.constant_float; 01693 } 01694 01695 for (i = start_idx; i < end_idx; ++i) 01696 { 01697 struct wined3d_shader_lconst *lconst = HeapAlloc(GetProcessHeap(), 0, sizeof(*lconst)); 01698 if (!lconst) 01699 return E_OUTOFMEMORY; 01700 01701 lconst->idx = i; 01702 memcpy(lconst->value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float)); 01703 list_add_head(&shader->constantsF, &lconst->entry); 01704 } 01705 01706 return WINED3D_OK; 01707 } 01708 01709 void find_vs_compile_args(const struct wined3d_state *state, 01710 const struct wined3d_shader *shader, struct vs_compile_args *args) 01711 { 01712 args->fog_src = state->render_states[WINED3D_RS_FOGTABLEMODE] 01713 == WINED3D_FOG_NONE ? VS_FOG_COORD : VS_FOG_Z; 01714 args->clip_enabled = state->render_states[WINED3D_RS_CLIPPING] 01715 && state->render_states[WINED3D_RS_CLIPPLANEENABLE]; 01716 args->swizzle_map = shader->device->strided_streams.swizzle_map; 01717 } 01718 01719 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2) 01720 { 01721 if (usage_idx1 != usage_idx2) return FALSE; 01722 if (usage1 == usage2) return TRUE; 01723 if (usage1 == WINED3DDECLUSAGE_POSITION && usage2 == WINED3DDECLUSAGE_POSITIONT) return TRUE; 01724 if (usage2 == WINED3DDECLUSAGE_POSITION && usage1 == WINED3DDECLUSAGE_POSITIONT) return TRUE; 01725 01726 return FALSE; 01727 } 01728 01729 BOOL vshader_get_input(const struct wined3d_shader *shader, 01730 BYTE usage_req, BYTE usage_idx_req, unsigned int *regnum) 01731 { 01732 WORD map = shader->reg_maps.input_registers; 01733 unsigned int i; 01734 01735 for (i = 0; map; map >>= 1, ++i) 01736 { 01737 if (!(map & 1)) continue; 01738 01739 if (match_usage(shader->u.vs.attributes[i].usage, 01740 shader->u.vs.attributes[i].usage_idx, usage_req, usage_idx_req)) 01741 { 01742 *regnum = i; 01743 return TRUE; 01744 } 01745 } 01746 return FALSE; 01747 } 01748 01749 static void vertexshader_set_limits(struct wined3d_shader *shader) 01750 { 01751 DWORD shader_version = WINED3D_SHADER_VERSION(shader->reg_maps.shader_version.major, 01752 shader->reg_maps.shader_version.minor); 01753 struct wined3d_device *device = shader->device; 01754 01755 shader->limits.texcoord = 0; 01756 shader->limits.attributes = 16; 01757 shader->limits.packed_input = 0; 01758 01759 switch (shader_version) 01760 { 01761 case WINED3D_SHADER_VERSION(1, 0): 01762 case WINED3D_SHADER_VERSION(1, 1): 01763 shader->limits.temporary = 12; 01764 shader->limits.constant_bool = 0; 01765 shader->limits.constant_int = 0; 01766 shader->limits.address = 1; 01767 shader->limits.packed_output = 12; 01768 shader->limits.sampler = 0; 01769 shader->limits.label = 0; 01770 /* TODO: vs_1_1 has a minimum of 96 constants. What happens when 01771 * a vs_1_1 shader is used on a vs_3_0 capable card that has 256 01772 * constants? */ 01773 shader->limits.constant_float = min(256, device->d3d_vshader_constantF); 01774 break; 01775 01776 case WINED3D_SHADER_VERSION(2, 0): 01777 case WINED3D_SHADER_VERSION(2, 1): 01778 shader->limits.temporary = 12; 01779 shader->limits.constant_bool = 16; 01780 shader->limits.constant_int = 16; 01781 shader->limits.address = 1; 01782 shader->limits.packed_output = 12; 01783 shader->limits.sampler = 0; 01784 shader->limits.label = 16; 01785 shader->limits.constant_float = min(256, device->d3d_vshader_constantF); 01786 break; 01787 01788 case WINED3D_SHADER_VERSION(4, 0): 01789 FIXME("Using 3.0 limits for 4.0 shader.\n"); 01790 /* Fall through. */ 01791 01792 case WINED3D_SHADER_VERSION(3, 0): 01793 shader->limits.temporary = 32; 01794 shader->limits.constant_bool = 32; 01795 shader->limits.constant_int = 32; 01796 shader->limits.address = 1; 01797 shader->limits.packed_output = 12; 01798 shader->limits.sampler = 4; 01799 shader->limits.label = 16; /* FIXME: 2048 */ 01800 /* DX10 cards on Windows advertise a d3d9 constant limit of 256 01801 * even though they are capable of supporting much more (GL 01802 * drivers advertise 1024). d3d9.dll and d3d8.dll clamp the 01803 * wined3d-advertised maximum. Clamp the constant limit for <= 3.0 01804 * shaders to 256. */ 01805 shader->limits.constant_float = min(256, device->d3d_vshader_constantF); 01806 break; 01807 01808 default: 01809 shader->limits.temporary = 12; 01810 shader->limits.constant_bool = 16; 01811 shader->limits.constant_int = 16; 01812 shader->limits.address = 1; 01813 shader->limits.packed_output = 12; 01814 shader->limits.sampler = 0; 01815 shader->limits.label = 16; 01816 shader->limits.constant_float = min(256, device->d3d_vshader_constantF); 01817 FIXME("Unrecognized vertex shader version \"%u.%u\".\n", 01818 shader->reg_maps.shader_version.major, 01819 shader->reg_maps.shader_version.minor); 01820 } 01821 } 01822 01823 static HRESULT vertexshader_init(struct wined3d_shader *shader, struct wined3d_device *device, 01824 const DWORD *byte_code, const struct wined3d_shader_signature *output_signature, 01825 void *parent, const struct wined3d_parent_ops *parent_ops, unsigned int max_version) 01826 { 01827 struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps; 01828 unsigned int i; 01829 HRESULT hr; 01830 WORD map; 01831 01832 if (!byte_code) return WINED3DERR_INVALIDCALL; 01833 01834 shader_init(shader, device, parent, parent_ops); 01835 hr = shader_set_function(shader, byte_code, output_signature, device->d3d_vshader_constantF, 01836 WINED3D_SHADER_TYPE_VERTEX, max_version); 01837 if (FAILED(hr)) 01838 { 01839 WARN("Failed to set function, hr %#x.\n", hr); 01840 shader_cleanup(shader); 01841 return hr; 01842 } 01843 01844 map = reg_maps->input_registers; 01845 for (i = 0; map; map >>= 1, ++i) 01846 { 01847 if (!(map & 1) || !shader->input_signature[i].semantic_name) 01848 continue; 01849 01850 shader->u.vs.attributes[i].usage = 01851 shader_usage_from_semantic_name(shader->input_signature[i].semantic_name); 01852 shader->u.vs.attributes[i].usage_idx = shader->input_signature[i].semantic_idx; 01853 } 01854 01855 if (output_signature) 01856 { 01857 for (i = 0; i < output_signature->element_count; ++i) 01858 { 01859 struct wined3d_shader_signature_element *e = &output_signature->elements[i]; 01860 reg_maps->output_registers |= 1 << e->register_idx; 01861 shader->output_signature[e->register_idx] = *e; 01862 } 01863 } 01864 01865 vertexshader_set_limits(shader); 01866 01867 shader->load_local_constsF = reg_maps->usesrelconstF 01868 && !list_empty(&shader->constantsF); 01869 01870 return WINED3D_OK; 01871 } 01872 01873 static HRESULT geometryshader_init(struct wined3d_shader *shader, struct wined3d_device *device, 01874 const DWORD *byte_code, const struct wined3d_shader_signature *output_signature, 01875 void *parent, const struct wined3d_parent_ops *parent_ops, unsigned int max_version) 01876 { 01877 HRESULT hr; 01878 01879 shader_init(shader, device, parent, parent_ops); 01880 hr = shader_set_function(shader, byte_code, output_signature, 0, 01881 WINED3D_SHADER_TYPE_GEOMETRY, max_version); 01882 if (FAILED(hr)) 01883 { 01884 WARN("Failed to set function, hr %#x.\n", hr); 01885 shader_cleanup(shader); 01886 return hr; 01887 } 01888 01889 shader->load_local_constsF = FALSE; 01890 01891 return WINED3D_OK; 01892 } 01893 01894 void find_ps_compile_args(const struct wined3d_state *state, 01895 const struct wined3d_shader *shader, struct ps_compile_args *args) 01896 { 01897 struct wined3d_device *device = shader->device; 01898 const struct wined3d_texture *texture; 01899 UINT i; 01900 01901 memset(args, 0, sizeof(*args)); /* FIXME: Make sure all bits are set. */ 01902 if (state->render_states[WINED3D_RS_SRGBWRITEENABLE]) 01903 { 01904 const struct wined3d_surface *rt = state->fb->render_targets[0]; 01905 if (rt->resource.format->flags & WINED3DFMT_FLAG_SRGB_WRITE) args->srgb_correction = 1; 01906 } 01907 01908 if (shader->reg_maps.shader_version.major == 1 01909 && shader->reg_maps.shader_version.minor <= 3) 01910 { 01911 for (i = 0; i < 4; ++i) 01912 { 01913 DWORD flags = state->texture_states[i][WINED3D_TSS_TEXTURE_TRANSFORM_FLAGS]; 01914 01915 if (flags & WINED3D_TTFF_PROJECTED) 01916 { 01917 enum wined3d_sampler_texture_type sampler_type = shader->reg_maps.sampler_type[i]; 01918 DWORD tex_transform = flags & ~WINED3D_TTFF_PROJECTED; 01919 DWORD max_valid = WINED3D_TTFF_COUNT4; 01920 01921 if (!state->vertex_shader) 01922 { 01923 unsigned int j; 01924 unsigned int index = state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX]; 01925 for (j = 0; j < state->vertex_declaration->element_count; ++j) 01926 { 01927 struct wined3d_vertex_declaration_element *element = 01928 &state->vertex_declaration->elements[j]; 01929 01930 if (element->usage == WINED3DDECLUSAGE_TEXCOORD 01931 && element->usage_idx == index) 01932 { 01933 max_valid = element->format->component_count; 01934 break; 01935 } 01936 } 01937 } 01938 01939 if (!tex_transform || tex_transform > max_valid) 01940 { 01941 WARN("Fixing up projected texture transform flags from %#x to %#x.\n", 01942 tex_transform, max_valid); 01943 tex_transform = max_valid; 01944 } 01945 01946 if ((sampler_type == WINED3DSTT_1D && tex_transform > WINED3D_TTFF_COUNT1) 01947 || (sampler_type == WINED3DSTT_2D && tex_transform > WINED3D_TTFF_COUNT2) 01948 || (sampler_type == WINED3DSTT_VOLUME && tex_transform > WINED3D_TTFF_COUNT3)) 01949 tex_transform |= WINED3D_PSARGS_PROJECTED; 01950 else 01951 WARN("Application requested projected texture with unsuitable texture coordinates.\n"); 01952 01953 args->tex_transform |= tex_transform << i * WINED3D_PSARGS_TEXTRANSFORM_SHIFT; 01954 } 01955 } 01956 } 01957 01958 for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) 01959 { 01960 if (!shader->reg_maps.sampler_type[i]) 01961 continue; 01962 01963 texture = state->textures[i]; 01964 if (!texture) 01965 { 01966 args->color_fixup[i] = COLOR_FIXUP_IDENTITY; 01967 continue; 01968 } 01969 args->color_fixup[i] = texture->resource.format->color_fixup; 01970 01971 if (texture->resource.format->flags & WINED3DFMT_FLAG_SHADOW) 01972 args->shadow |= 1 << i; 01973 01974 /* Flag samplers that need NP2 texcoord fixup. */ 01975 if (!(texture->flags & WINED3D_TEXTURE_POW2_MAT_IDENT)) 01976 args->np2_fixup |= (1 << i); 01977 } 01978 if (shader->reg_maps.shader_version.major >= 3) 01979 { 01980 if (device->strided_streams.position_transformed) 01981 { 01982 args->vp_mode = pretransformed; 01983 } 01984 else if (use_vs(state)) 01985 { 01986 args->vp_mode = vertexshader; 01987 } 01988 else 01989 { 01990 args->vp_mode = fixedfunction; 01991 } 01992 args->fog = FOG_OFF; 01993 } 01994 else 01995 { 01996 args->vp_mode = vertexshader; 01997 if (state->render_states[WINED3D_RS_FOGENABLE]) 01998 { 01999 switch (state->render_states[WINED3D_RS_FOGTABLEMODE]) 02000 { 02001 case WINED3D_FOG_NONE: 02002 if (device->strided_streams.position_transformed || use_vs(state)) 02003 { 02004 args->fog = FOG_LINEAR; 02005 break; 02006 } 02007 02008 switch (state->render_states[WINED3D_RS_FOGVERTEXMODE]) 02009 { 02010 case WINED3D_FOG_NONE: /* Fall through. */ 02011 case WINED3D_FOG_LINEAR: args->fog = FOG_LINEAR; break; 02012 case WINED3D_FOG_EXP: args->fog = FOG_EXP; break; 02013 case WINED3D_FOG_EXP2: args->fog = FOG_EXP2; break; 02014 } 02015 break; 02016 02017 case WINED3D_FOG_LINEAR: args->fog = FOG_LINEAR; break; 02018 case WINED3D_FOG_EXP: args->fog = FOG_EXP; break; 02019 case WINED3D_FOG_EXP2: args->fog = FOG_EXP2; break; 02020 } 02021 } 02022 else 02023 { 02024 args->fog = FOG_OFF; 02025 } 02026 } 02027 } 02028 02029 static void pixelshader_set_limits(struct wined3d_shader *shader) 02030 { 02031 DWORD shader_version = WINED3D_SHADER_VERSION(shader->reg_maps.shader_version.major, 02032 shader->reg_maps.shader_version.minor); 02033 02034 shader->limits.attributes = 0; 02035 shader->limits.address = 0; 02036 shader->limits.packed_output = 0; 02037 02038 switch (shader_version) 02039 { 02040 case WINED3D_SHADER_VERSION(1, 0): 02041 case WINED3D_SHADER_VERSION(1, 1): 02042 case WINED3D_SHADER_VERSION(1, 2): 02043 case WINED3D_SHADER_VERSION(1, 3): 02044 shader->limits.temporary = 2; 02045 shader->limits.constant_float = 8; 02046 shader->limits.constant_int = 0; 02047 shader->limits.constant_bool = 0; 02048 shader->limits.texcoord = 4; 02049 shader->limits.sampler = 4; 02050 shader->limits.packed_input = 0; 02051 shader->limits.label = 0; 02052 break; 02053 02054 case WINED3D_SHADER_VERSION(1, 4): 02055 shader->limits.temporary = 6; 02056 shader->limits.constant_float = 8; 02057 shader->limits.constant_int = 0; 02058 shader->limits.constant_bool = 0; 02059 shader->limits.texcoord = 6; 02060 shader->limits.sampler = 6; 02061 shader->limits.packed_input = 0; 02062 shader->limits.label = 0; 02063 break; 02064 02065 /* FIXME: Temporaries must match D3DPSHADERCAPS2_0.NumTemps. */ 02066 case WINED3D_SHADER_VERSION(2, 0): 02067 shader->limits.temporary = 32; 02068 shader->limits.constant_float = 32; 02069 shader->limits.constant_int = 16; 02070 shader->limits.constant_bool = 16; 02071 shader->limits.texcoord = 8; 02072 shader->limits.sampler = 16; 02073 shader->limits.packed_input = 0; 02074 break; 02075 02076 case WINED3D_SHADER_VERSION(2, 1): 02077 shader->limits.temporary = 32; 02078 shader->limits.constant_float = 32; 02079 shader->limits.constant_int = 16; 02080 shader->limits.constant_bool = 16; 02081 shader->limits.texcoord = 8; 02082 shader->limits.sampler = 16; 02083 shader->limits.packed_input = 0; 02084 shader->limits.label = 16; 02085 break; 02086 02087 case WINED3D_SHADER_VERSION(4, 0): 02088 FIXME("Using 3.0 limits for 4.0 shader.\n"); 02089 /* Fall through. */ 02090 02091 case WINED3D_SHADER_VERSION(3, 0): 02092 shader->limits.temporary = 32; 02093 shader->limits.constant_float = 224; 02094 shader->limits.constant_int = 16; 02095 shader->limits.constant_bool = 16; 02096 shader->limits.texcoord = 0; 02097 shader->limits.sampler = 16; 02098 shader->limits.packed_input = 12; 02099 shader->limits.label = 16; /* FIXME: 2048 */ 02100 break; 02101 02102 default: 02103 shader->limits.temporary = 32; 02104 shader->limits.constant_float = 32; 02105 shader->limits.constant_int = 16; 02106 shader->limits.constant_bool = 16; 02107 shader->limits.texcoord = 8; 02108 shader->limits.sampler = 16; 02109 shader->limits.packed_input = 0; 02110 shader->limits.label = 0; 02111 FIXME("Unrecognized pixel shader version %u.%u\n", 02112 shader->reg_maps.shader_version.major, 02113 shader->reg_maps.shader_version.minor); 02114 } 02115 } 02116 02117 static HRESULT pixelshader_init(struct wined3d_shader *shader, struct wined3d_device *device, 02118 const DWORD *byte_code, const struct wined3d_shader_signature *output_signature, 02119 void *parent, const struct wined3d_parent_ops *parent_ops, unsigned int max_version) 02120 { 02121 const struct wined3d_gl_info *gl_info = &device->adapter->gl_info; 02122 unsigned int i, highest_reg_used = 0, num_regs_used = 0; 02123 HRESULT hr; 02124 02125 if (!byte_code) return WINED3DERR_INVALIDCALL; 02126 02127 shader_init(shader, device, parent, parent_ops); 02128 hr = shader_set_function(shader, byte_code, output_signature, device->d3d_pshader_constantF, 02129 WINED3D_SHADER_TYPE_PIXEL, max_version); 02130 if (FAILED(hr)) 02131 { 02132 WARN("Failed to set function, hr %#x.\n", hr); 02133 shader_cleanup(shader); 02134 return hr; 02135 } 02136 02137 pixelshader_set_limits(shader); 02138 02139 for (i = 0; i < MAX_REG_INPUT; ++i) 02140 { 02141 if (shader->u.ps.input_reg_used[i]) 02142 { 02143 ++num_regs_used; 02144 highest_reg_used = i; 02145 } 02146 } 02147 02148 /* Don't do any register mapping magic if it is not needed, or if we can't 02149 * achieve anything anyway */ 02150 if (highest_reg_used < (gl_info->limits.glsl_varyings / 4) 02151 || num_regs_used > (gl_info->limits.glsl_varyings / 4)) 02152 { 02153 if (num_regs_used > (gl_info->limits.glsl_varyings / 4)) 02154 { 02155 /* This happens with relative addressing. The input mapper function 02156 * warns about this if the higher registers are declared too, so 02157 * don't write a FIXME here */ 02158 WARN("More varying registers used than supported\n"); 02159 } 02160 02161 for (i = 0; i < MAX_REG_INPUT; ++i) 02162 { 02163 shader->u.ps.input_reg_map[i] = i; 02164 } 02165 02166 shader->u.ps.declared_in_count = highest_reg_used + 1; 02167 } 02168 else 02169 { 02170 shader->u.ps.declared_in_count = 0; 02171 for (i = 0; i < MAX_REG_INPUT; ++i) 02172 { 02173 if (shader->u.ps.input_reg_used[i]) 02174 shader->u.ps.input_reg_map[i] = shader->u.ps.declared_in_count++; 02175 else shader->u.ps.input_reg_map[i] = ~0U; 02176 } 02177 } 02178 02179 shader->load_local_constsF = FALSE; 02180 02181 return WINED3D_OK; 02182 } 02183 02184 void pixelshader_update_samplers(struct wined3d_shader_reg_maps *reg_maps, struct wined3d_texture * const *textures) 02185 { 02186 enum wined3d_sampler_texture_type *sampler_type = reg_maps->sampler_type; 02187 unsigned int i; 02188 02189 if (reg_maps->shader_version.major != 1) return; 02190 02191 for (i = 0; i < max(MAX_FRAGMENT_SAMPLERS, MAX_VERTEX_SAMPLERS); ++i) 02192 { 02193 /* We don't sample from this sampler. */ 02194 if (!sampler_type[i]) continue; 02195 02196 if (!textures[i]) 02197 { 02198 WARN("No texture bound to sampler %u, using 2D.\n", i); 02199 sampler_type[i] = WINED3DSTT_2D; 02200 continue; 02201 } 02202 02203 switch (textures[i]->target) 02204 { 02205 case GL_TEXTURE_RECTANGLE_ARB: 02206 case GL_TEXTURE_2D: 02207 /* We have to select between texture rectangles and 2D 02208 * textures later because 2.0 and 3.0 shaders only have 02209 * WINED3DSTT_2D as well. */ 02210 sampler_type[i] = WINED3DSTT_2D; 02211 break; 02212 02213 case GL_TEXTURE_3D: 02214 sampler_type[i] = WINED3DSTT_VOLUME; 02215 break; 02216 02217 case GL_TEXTURE_CUBE_MAP_ARB: 02218 sampler_type[i] = WINED3DSTT_CUBE; 02219 break; 02220 02221 default: 02222 FIXME("Unrecognized texture type %#x, using 2D.\n", textures[i]->target); 02223 sampler_type[i] = WINED3DSTT_2D; 02224 } 02225 } 02226 } 02227 02228 HRESULT CDECL wined3d_shader_create_gs(struct wined3d_device *device, const DWORD *byte_code, 02229 const struct wined3d_shader_signature *output_signature, void *parent, 02230 const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader, unsigned int max_version) 02231 { 02232 struct wined3d_shader *object; 02233 HRESULT hr; 02234 02235 TRACE("device %p, byte_code %p, output_signature %p, parent %p, parent_ops %p, shader %p.\n", 02236 device, byte_code, output_signature, parent, parent_ops, shader); 02237 02238 object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object)); 02239 if (!object) 02240 { 02241 ERR("Failed to allocate shader memory.\n"); 02242 return E_OUTOFMEMORY; 02243 } 02244 02245 hr = geometryshader_init(object, device, byte_code, output_signature, parent, parent_ops, max_version); 02246 if (FAILED(hr)) 02247 { 02248 WARN("Failed to initialize geometry shader, hr %#x.\n", hr); 02249 HeapFree(GetProcessHeap(), 0, object); 02250 return hr; 02251 } 02252 02253 TRACE("Created geometry shader %p.\n", object); 02254 *shader = object; 02255 02256 return WINED3D_OK; 02257 } 02258 02259 HRESULT CDECL wined3d_shader_create_ps(struct wined3d_device *device, const DWORD *byte_code, 02260 const struct wined3d_shader_signature *output_signature, void *parent, 02261 const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader, unsigned int max_version) 02262 { 02263 struct wined3d_shader *object; 02264 HRESULT hr; 02265 02266 TRACE("device %p, byte_code %p, output_signature %p, parent %p, parent_ops %p, shader %p.\n", 02267 device, byte_code, output_signature, parent, parent_ops, shader); 02268 02269 if (device->ps_selected_mode == SHADER_NONE) 02270 return WINED3DERR_INVALIDCALL; 02271 02272 object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object)); 02273 if (!object) 02274 { 02275 ERR("Failed to allocate shader memory.\n"); 02276 return E_OUTOFMEMORY; 02277 } 02278 02279 hr = pixelshader_init(object, device, byte_code, output_signature, parent, parent_ops, max_version); 02280 if (FAILED(hr)) 02281 { 02282 WARN("Failed to initialize pixel shader, hr %#x.\n", hr); 02283 HeapFree(GetProcessHeap(), 0, object); 02284 return hr; 02285 } 02286 02287 TRACE("Created pixel shader %p.\n", object); 02288 *shader = object; 02289 02290 return WINED3D_OK; 02291 } 02292 02293 HRESULT CDECL wined3d_shader_create_vs(struct wined3d_device *device, const DWORD *byte_code, 02294 const struct wined3d_shader_signature *output_signature, void *parent, 02295 const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader, unsigned int max_version) 02296 { 02297 struct wined3d_shader *object; 02298 HRESULT hr; 02299 02300 TRACE("device %p, byte_code %p, output_signature %p, parent %p, parent_ops %p, shader %p.\n", 02301 device, byte_code, output_signature, parent, parent_ops, shader); 02302 02303 if (device->vs_selected_mode == SHADER_NONE) 02304 return WINED3DERR_INVALIDCALL; 02305 02306 object = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*object)); 02307 if (!object) 02308 { 02309 ERR("Failed to allocate shader memory.\n"); 02310 return E_OUTOFMEMORY; 02311 } 02312 02313 hr = vertexshader_init(object, device, byte_code, output_signature, parent, parent_ops, max_version); 02314 if (FAILED(hr)) 02315 { 02316 WARN("Failed to initialize vertex shader, hr %#x.\n", hr); 02317 HeapFree(GetProcessHeap(), 0, object); 02318 return hr; 02319 } 02320 02321 TRACE("Created vertex shader %p.\n", object); 02322 *shader = object; 02323 02324 return WINED3D_OK; 02325 } Generated on Wed May 23 2012 04:20:01 for ReactOS by
1.7.6.1
|