ReactOS  0.4.12-dev-90-g2e2e63e
shader.c
Go to the documentation of this file.
1 /*
2  * Copyright 2002-2003 Jason Edmeades
3  * Copyright 2002-2003 Raphael Junqueira
4  * Copyright 2004 Christian Costa
5  * Copyright 2005 Oliver Stieber
6  * Copyright 2006 Ivan Gyurdiev
7  * Copyright 2007-2008, 2013 Stefan Dösinger for CodeWeavers
8  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
9  *
10  * This library is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * This library is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with this library; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
23  */
24 
25 #include "config.h"
26 #include "wine/port.h"
27 
28 #include <stdio.h>
29 #include <string.h>
30 
31 #include "wined3d_private.h"
32 
33 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
34 
35 /* pow, mul_high, sub_high, mul_low */
36 const float wined3d_srgb_const0[] = {0.41666f, 1.055f, 0.055f, 12.92f};
37 /* cmp */
38 const float wined3d_srgb_const1[] = {0.0031308f, 0.0f, 0.0f, 0.0f};
39 
40 static const char * const shader_opcode_names[] =
41 {
42  /* WINED3DSIH_ABS */ "abs",
43  /* WINED3DSIH_ADD */ "add",
44  /* WINED3DSIH_AND */ "and",
45  /* WINED3DSIH_ATOMIC_AND */ "atomic_and",
46  /* WINED3DSIH_ATOMIC_CMP_STORE */ "atomic_cmp_store",
47  /* WINED3DSIH_ATOMIC_IADD */ "atomic_iadd",
48  /* WINED3DSIH_ATOMIC_IMAX */ "atomic_imax",
49  /* WINED3DSIH_ATOMIC_IMIN */ "atomic_imin",
50  /* WINED3DSIH_ATOMIC_OR */ "atomic_or",
51  /* WINED3DSIH_ATOMIC_UMAX */ "atomic_umax",
52  /* WINED3DSIH_ATOMIC_UMIN */ "atomic_umin",
53  /* WINED3DSIH_ATOMIC_XOR */ "atomic_xor",
54  /* WINED3DSIH_BEM */ "bem",
55  /* WINED3DSIH_BFI */ "bfi",
56  /* WINED3DSIH_BFREV */ "bfrev",
57  /* WINED3DSIH_BREAK */ "break",
58  /* WINED3DSIH_BREAKC */ "breakc",
59  /* WINED3DSIH_BREAKP */ "breakp",
60  /* WINED3DSIH_BUFINFO */ "bufinfo",
61  /* WINED3DSIH_CALL */ "call",
62  /* WINED3DSIH_CALLNZ */ "callnz",
63  /* WINED3DSIH_CASE */ "case",
64  /* WINED3DSIH_CMP */ "cmp",
65  /* WINED3DSIH_CND */ "cnd",
66  /* WINED3DSIH_CONTINUE */ "continue",
67  /* WINED3DSIH_CONTINUEP */ "continuec",
68  /* WINED3DSIH_COUNTBITS */ "countbits",
69  /* WINED3DSIH_CRS */ "crs",
70  /* WINED3DSIH_CUT */ "cut",
71  /* WINED3DSIH_CUT_STREAM */ "cut_stream",
72  /* WINED3DSIH_DCL */ "dcl",
73  /* WINED3DSIH_DCL_CONSTANT_BUFFER */ "dcl_constantBuffer",
74  /* WINED3DSIH_DCL_FUNCTION_BODY */ "dcl_function_body",
75  /* WINED3DSIH_DCL_FUNCTION_TABLE */ "dcl_function_table",
76  /* WINED3DSIH_DCL_GLOBAL_FLAGS */ "dcl_globalFlags",
77  /* WINED3DSIH_DCL_GS_INSTANCES */ "dcl_gs_instances",
78  /* WINED3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT */ "dcl_hs_fork_phase_instance_count",
79  /* WINED3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT */ "dcl_hs_join_phase_instance_count",
80  /* WINED3DSIH_DCL_HS_MAX_TESSFACTOR */ "dcl_hs_max_tessfactor",
81  /* WINED3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER */ "dcl_immediateConstantBuffer",
82  /* WINED3DSIH_DCL_INDEX_RANGE */ "dcl_index_range",
83  /* WINED3DSIH_DCL_INDEXABLE_TEMP */ "dcl_indexableTemp",
84  /* WINED3DSIH_DCL_INPUT */ "dcl_input",
85  /* WINED3DSIH_DCL_INPUT_CONTROL_POINT_COUNT */ "dcl_input_control_point_count",
86  /* WINED3DSIH_DCL_INPUT_PRIMITIVE */ "dcl_inputPrimitive",
87  /* WINED3DSIH_DCL_INPUT_PS */ "dcl_input_ps",
88  /* WINED3DSIH_DCL_INPUT_PS_SGV */ "dcl_input_ps_sgv",
89  /* WINED3DSIH_DCL_INPUT_PS_SIV */ "dcl_input_ps_siv",
90  /* WINED3DSIH_DCL_INPUT_SGV */ "dcl_input_sgv",
91  /* WINED3DSIH_DCL_INPUT_SIV */ "dcl_input_siv",
92  /* WINED3DSIH_DCL_INTERFACE */ "dcl_interface",
93  /* WINED3DSIH_DCL_OUTPUT */ "dcl_output",
94  /* WINED3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT */ "dcl_output_control_point_count",
95  /* WINED3DSIH_DCL_OUTPUT_SIV */ "dcl_output_siv",
96  /* WINED3DSIH_DCL_OUTPUT_TOPOLOGY */ "dcl_outputTopology",
97  /* WINED3DSIH_DCL_RESOURCE_RAW */ "dcl_resource_raw",
98  /* WINED3DSIH_DCL_RESOURCE_STRUCTURED */ "dcl_resource_structured",
99  /* WINED3DSIH_DCL_SAMPLER */ "dcl_sampler",
100  /* WINED3DSIH_DCL_STREAM */ "dcl_stream",
101  /* WINED3DSIH_DCL_TEMPS */ "dcl_temps",
102  /* WINED3DSIH_DCL_TESSELLATOR_DOMAIN */ "dcl_tessellator_domain",
103  /* WINED3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE */ "dcl_tessellator_output_primitive",
104  /* WINED3DSIH_DCL_TESSELLATOR_PARTITIONING */ "dcl_tessellator_partitioning",
105  /* WINED3DSIH_DCL_TGSM_RAW */ "dcl_tgsm_raw",
106  /* WINED3DSIH_DCL_TGSM_STRUCTURED */ "dcl_tgsm_structured",
107  /* WINED3DSIH_DCL_THREAD_GROUP */ "dcl_thread_group",
108  /* WINED3DSIH_DCL_UAV_RAW */ "dcl_uav_raw",
109  /* WINED3DSIH_DCL_UAV_STRUCTURED */ "dcl_uav_structured",
110  /* WINED3DSIH_DCL_UAV_TYPED */ "dcl_uav_typed",
111  /* WINED3DSIH_DCL_VERTICES_OUT */ "dcl_maxOutputVertexCount",
112  /* WINED3DSIH_DEF */ "def",
113  /* WINED3DSIH_DEFAULT */ "default",
114  /* WINED3DSIH_DEFB */ "defb",
115  /* WINED3DSIH_DEFI */ "defi",
116  /* WINED3DSIH_DIV */ "div",
117  /* WINED3DSIH_DP2 */ "dp2",
118  /* WINED3DSIH_DP2ADD */ "dp2add",
119  /* WINED3DSIH_DP3 */ "dp3",
120  /* WINED3DSIH_DP4 */ "dp4",
121  /* WINED3DSIH_DST */ "dst",
122  /* WINED3DSIH_DSX */ "dsx",
123  /* WINED3DSIH_DSX_COARSE */ "deriv_rtx_coarse",
124  /* WINED3DSIH_DSX_FINE */ "deriv_rtx_fine",
125  /* WINED3DSIH_DSY */ "dsy",
126  /* WINED3DSIH_DSY_COARSE */ "deriv_rty_coarse",
127  /* WINED3DSIH_DSY_FINE */ "deriv_rty_fine",
128  /* WINED3DSIH_EVAL_SAMPLE_INDEX */ "eval_sample_index",
129  /* WINED3DSIH_ELSE */ "else",
130  /* WINED3DSIH_EMIT */ "emit",
131  /* WINED3DSIH_EMIT_STREAM */ "emit_stream",
132  /* WINED3DSIH_ENDIF */ "endif",
133  /* WINED3DSIH_ENDLOOP */ "endloop",
134  /* WINED3DSIH_ENDREP */ "endrep",
135  /* WINED3DSIH_ENDSWITCH */ "endswitch",
136  /* WINED3DSIH_EQ */ "eq",
137  /* WINED3DSIH_EXP */ "exp",
138  /* WINED3DSIH_EXPP */ "expp",
139  /* WINED3DSIH_F16TOF32 */ "f16tof32",
140  /* WINED3DSIH_F32TOF16 */ "f32tof16",
141  /* WINED3DSIH_FCALL */ "fcall",
142  /* WINED3DSIH_FIRSTBIT_HI */ "firstbit_hi",
143  /* WINED3DSIH_FIRSTBIT_LO */ "firstbit_lo",
144  /* WINED3DSIH_FIRSTBIT_SHI */ "firstbit_shi",
145  /* WINED3DSIH_FRC */ "frc",
146  /* WINED3DSIH_FTOI */ "ftoi",
147  /* WINED3DSIH_FTOU */ "ftou",
148  /* WINED3DSIH_GATHER4 */ "gather4",
149  /* WINED3DSIH_GATHER4_C */ "gather4_c",
150  /* WINED3DSIH_GATHER4_PO */ "gather4_po",
151  /* WINED3DSIH_GATHER4_PO_C */ "gather4_po_c",
152  /* WINED3DSIH_GE */ "ge",
153  /* WINED3DSIH_HS_CONTROL_POINT_PHASE */ "hs_control_point_phase",
154  /* WINED3DSIH_HS_DECLS */ "hs_decls",
155  /* WINED3DSIH_HS_FORK_PHASE */ "hs_fork_phase",
156  /* WINED3DSIH_HS_JOIN_PHASE */ "hs_join_phase",
157  /* WINED3DSIH_IADD */ "iadd",
158  /* WINED3DSIH_IBFE */ "ibfe",
159  /* WINED3DSIH_IEQ */ "ieq",
160  /* WINED3DSIH_IF */ "if",
161  /* WINED3DSIH_IFC */ "ifc",
162  /* WINED3DSIH_IGE */ "ige",
163  /* WINED3DSIH_ILT */ "ilt",
164  /* WINED3DSIH_IMAD */ "imad",
165  /* WINED3DSIH_IMAX */ "imax",
166  /* WINED3DSIH_IMIN */ "imin",
167  /* WINED3DSIH_IMM_ATOMIC_ALLOC */ "imm_atomic_alloc",
168  /* WINED3DSIH_IMM_ATOMIC_AND */ "imm_atomic_and",
169  /* WINED3DSIH_IMM_ATOMIC_CMP_EXCH */ "imm_atomic_cmp_exch",
170  /* WINED3DSIH_IMM_ATOMIC_CONSUME */ "imm_atomic_consume",
171  /* WINED3DSIH_IMM_ATOMIC_EXCH */ "imm_atomic_exch",
172  /* WINED3DSIH_IMM_ATOMIC_IADD */ "imm_atomic_iadd",
173  /* WINED3DSIH_IMM_ATOMIC_IMAX */ "imm_atomic_imax",
174  /* WINED3DSIH_IMM_ATOMIC_IMIN */ "imm_atomic_imin",
175  /* WINED3DSIH_IMM_ATOMIC_OR */ "imm_atomic_or",
176  /* WINED3DSIH_IMM_ATOMIC_UMAX */ "imm_atomic_umax",
177  /* WINED3DSIH_IMM_ATOMIC_UMIN */ "imm_atomic_umin",
178  /* WINED3DSIH_IMM_ATOMIC_XOR */ "imm_atomic_xor",
179  /* WINED3DSIH_IMUL */ "imul",
180  /* WINED3DSIH_INE */ "ine",
181  /* WINED3DSIH_INEG */ "ineg",
182  /* WINED3DSIH_ISHL */ "ishl",
183  /* WINED3DSIH_ISHR */ "ishr",
184  /* WINED3DSIH_ITOF */ "itof",
185  /* WINED3DSIH_LABEL */ "label",
186  /* WINED3DSIH_LD */ "ld",
187  /* WINED3DSIH_LD2DMS */ "ld2dms",
188  /* WINED3DSIH_LD_RAW */ "ld_raw",
189  /* WINED3DSIH_LD_STRUCTURED */ "ld_structured",
190  /* WINED3DSIH_LD_UAV_TYPED */ "ld_uav_typed",
191  /* WINED3DSIH_LIT */ "lit",
192  /* WINED3DSIH_LOD */ "lod",
193  /* WINED3DSIH_LOG */ "log",
194  /* WINED3DSIH_LOGP */ "logp",
195  /* WINED3DSIH_LOOP */ "loop",
196  /* WINED3DSIH_LRP */ "lrp",
197  /* WINED3DSIH_LT */ "lt",
198  /* WINED3DSIH_M3x2 */ "m3x2",
199  /* WINED3DSIH_M3x3 */ "m3x3",
200  /* WINED3DSIH_M3x4 */ "m3x4",
201  /* WINED3DSIH_M4x3 */ "m4x3",
202  /* WINED3DSIH_M4x4 */ "m4x4",
203  /* WINED3DSIH_MAD */ "mad",
204  /* WINED3DSIH_MAX */ "max",
205  /* WINED3DSIH_MIN */ "min",
206  /* WINED3DSIH_MOV */ "mov",
207  /* WINED3DSIH_MOVA */ "mova",
208  /* WINED3DSIH_MOVC */ "movc",
209  /* WINED3DSIH_MUL */ "mul",
210  /* WINED3DSIH_NE */ "ne",
211  /* WINED3DSIH_NOP */ "nop",
212  /* WINED3DSIH_NOT */ "not",
213  /* WINED3DSIH_NRM */ "nrm",
214  /* WINED3DSIH_OR */ "or",
215  /* WINED3DSIH_PHASE */ "phase",
216  /* WINED3DSIH_POW */ "pow",
217  /* WINED3DSIH_RCP */ "rcp",
218  /* WINED3DSIH_REP */ "rep",
219  /* WINED3DSIH_RESINFO */ "resinfo",
220  /* WINED3DSIH_RET */ "ret",
221  /* WINED3DSIH_RETP */ "retp",
222  /* WINED3DSIH_ROUND_NE */ "round_ne",
223  /* WINED3DSIH_ROUND_NI */ "round_ni",
224  /* WINED3DSIH_ROUND_PI */ "round_pi",
225  /* WINED3DSIH_ROUND_Z */ "round_z",
226  /* WINED3DSIH_RSQ */ "rsq",
227  /* WINED3DSIH_SAMPLE */ "sample",
228  /* WINED3DSIH_SAMPLE_B */ "sample_b",
229  /* WINED3DSIH_SAMPLE_C */ "sample_c",
230  /* WINED3DSIH_SAMPLE_C_LZ */ "sample_c_lz",
231  /* WINED3DSIH_SAMPLE_GRAD */ "sample_d",
232  /* WINED3DSIH_SAMPLE_INFO */ "sample_info",
233  /* WINED3DSIH_SAMPLE_LOD */ "sample_l",
234  /* WINED3DSIH_SAMPLE_POS */ "sample_pos",
235  /* WINED3DSIH_SETP */ "setp",
236  /* WINED3DSIH_SGE */ "sge",
237  /* WINED3DSIH_SGN */ "sgn",
238  /* WINED3DSIH_SINCOS */ "sincos",
239  /* WINED3DSIH_SLT */ "slt",
240  /* WINED3DSIH_SQRT */ "sqrt",
241  /* WINED3DSIH_STORE_RAW */ "store_raw",
242  /* WINED3DSIH_STORE_STRUCTURED */ "store_structured",
243  /* WINED3DSIH_STORE_UAV_TYPED */ "store_uav_typed",
244  /* WINED3DSIH_SUB */ "sub",
245  /* WINED3DSIH_SWAPC */ "swapc",
246  /* WINED3DSIH_SWITCH */ "switch",
247  /* WINED3DSIH_SYNC */ "sync",
248  /* WINED3DSIH_TEX */ "texld",
249  /* WINED3DSIH_TEXBEM */ "texbem",
250  /* WINED3DSIH_TEXBEML */ "texbeml",
251  /* WINED3DSIH_TEXCOORD */ "texcrd",
252  /* WINED3DSIH_TEXDEPTH */ "texdepth",
253  /* WINED3DSIH_TEXDP3 */ "texdp3",
254  /* WINED3DSIH_TEXDP3TEX */ "texdp3tex",
255  /* WINED3DSIH_TEXKILL */ "texkill",
256  /* WINED3DSIH_TEXLDD */ "texldd",
257  /* WINED3DSIH_TEXLDL */ "texldl",
258  /* WINED3DSIH_TEXM3x2DEPTH */ "texm3x2depth",
259  /* WINED3DSIH_TEXM3x2PAD */ "texm3x2pad",
260  /* WINED3DSIH_TEXM3x2TEX */ "texm3x2tex",
261  /* WINED3DSIH_TEXM3x3 */ "texm3x3",
262  /* WINED3DSIH_TEXM3x3DIFF */ "texm3x3diff",
263  /* WINED3DSIH_TEXM3x3PAD */ "texm3x3pad",
264  /* WINED3DSIH_TEXM3x3SPEC */ "texm3x3spec",
265  /* WINED3DSIH_TEXM3x3TEX */ "texm3x3tex",
266  /* WINED3DSIH_TEXM3x3VSPEC */ "texm3x3vspec",
267  /* WINED3DSIH_TEXREG2AR */ "texreg2ar",
268  /* WINED3DSIH_TEXREG2GB */ "texreg2gb",
269  /* WINED3DSIH_TEXREG2RGB */ "texreg2rgb",
270  /* WINED3DSIH_UBFE */ "ubfe",
271  /* WINED3DSIH_UDIV */ "udiv",
272  /* WINED3DSIH_UGE */ "uge",
273  /* WINED3DSIH_ULT */ "ult",
274  /* WINED3DSIH_UMAX */ "umax",
275  /* WINED3DSIH_UMIN */ "umin",
276  /* WINED3DSIH_UMUL */ "umul",
277  /* WINED3DSIH_USHR */ "ushr",
278  /* WINED3DSIH_UTOF */ "utof",
279  /* WINED3DSIH_XOR */ "xor",
280 };
281 
282 static const char * const semantic_names[] =
283 {
284  /* WINED3D_DECL_USAGE_POSITION */ "SV_POSITION",
285  /* WINED3D_DECL_USAGE_BLEND_WEIGHT */ "BLENDWEIGHT",
286  /* WINED3D_DECL_USAGE_BLEND_INDICES */ "BLENDINDICES",
287  /* WINED3D_DECL_USAGE_NORMAL */ "NORMAL",
288  /* WINED3D_DECL_USAGE_PSIZE */ "PSIZE",
289  /* WINED3D_DECL_USAGE_TEXCOORD */ "TEXCOORD",
290  /* WINED3D_DECL_USAGE_TANGENT */ "TANGENT",
291  /* WINED3D_DECL_USAGE_BINORMAL */ "BINORMAL",
292  /* WINED3D_DECL_USAGE_TESS_FACTOR */ "TESSFACTOR",
293  /* WINED3D_DECL_USAGE_POSITIONT */ "POSITIONT",
294  /* WINED3D_DECL_USAGE_COLOR */ "COLOR",
295  /* WINED3D_DECL_USAGE_FOG */ "FOG",
296  /* WINED3D_DECL_USAGE_DEPTH */ "DEPTH",
297  /* WINED3D_DECL_USAGE_SAMPLE */ "SAMPLE",
298 };
299 
300 static const struct
301 {
303  const char *sysval_name;
304 }
306 {
307  {WINED3D_SIV_POSITION, "position"},
308  {WINED3D_SIV_CLIP_DISTANCE, "clip_distance"},
309  {WINED3D_SIV_CULL_DISTANCE, "cull_distance"},
310  {WINED3D_SIV_RENDER_TARGET_ARRAY_INDEX, "render_target_array_index"},
311  {WINED3D_SIV_VIEWPORT_ARRAY_INDEX, "viewport_array_index"},
312  {WINED3D_SIV_VERTEX_ID, "vertex_id"},
313  {WINED3D_SIV_INSTANCE_ID, "instance_id"},
314  {WINED3D_SIV_PRIMITIVE_ID, "primitive_id"},
315  {WINED3D_SIV_IS_FRONT_FACE, "is_front_face"},
316  {WINED3D_SIV_SAMPLE_INDEX, "sample_index"},
317  {WINED3D_SIV_QUAD_U0_TESS_FACTOR, "finalQuadUeq0EdgeTessFactor"},
318  {WINED3D_SIV_QUAD_V0_TESS_FACTOR, "finalQuadVeq0EdgeTessFactor"},
319  {WINED3D_SIV_QUAD_U1_TESS_FACTOR, "finalQuadUeq1EdgeTessFactor"},
320  {WINED3D_SIV_QUAD_V1_TESS_FACTOR, "finalQuadVeq1EdgeTessFactor"},
321  {WINED3D_SIV_QUAD_U_INNER_TESS_FACTOR, "finalQuadUInsideTessFactor"},
322  {WINED3D_SIV_QUAD_V_INNER_TESS_FACTOR, "finalQuadVInsideTessFactor"},
323  {WINED3D_SIV_TRIANGLE_U_TESS_FACTOR, "finalTriUeq0EdgeTessFactor"},
324  {WINED3D_SIV_TRIANGLE_V_TESS_FACTOR, "finalTriVeq0EdgeTessFactor"},
325  {WINED3D_SIV_TRIANGLE_W_TESS_FACTOR, "finalTriWeq0EdgeTessFactor"},
326  {WINED3D_SIV_TRIANGLE_INNER_TESS_FACTOR, "finalTriInsideTessFactor"},
327  {WINED3D_SIV_LINE_DETAIL_TESS_FACTOR, "finalLineDetailTessFactor"},
328  {WINED3D_SIV_LINE_DENSITY_TESS_FACTOR, "finalLineDensityTessFactor"},
329 };
330 
332  const struct wined3d_shader_src_param *param, const struct wined3d_shader_version *shader_version);
333 
335 {
336  if (handler_idx >= ARRAY_SIZE(shader_opcode_names))
337  return wine_dbg_sprintf("UNRECOGNIZED(%#x)", handler_idx);
338 
339  return shader_opcode_names[handler_idx];
340 }
341 
343 {
344  if (usage >= ARRAY_SIZE(semantic_names))
345  {
346  FIXME("Unrecognized usage %#x.\n", usage);
347  return "UNRECOGNIZED";
348  }
349 
350  return semantic_names[usage];
351 }
352 
354 {
355  unsigned int i;
356 
357  for (i = 0; i < ARRAY_SIZE(semantic_names); ++i)
358  {
359  if (!strcmp(name, semantic_names[i]))
360  return i;
361  }
362 
363  return ~0U;
364 }
365 
367 {
368  switch (usage)
369  {
371  return WINED3D_SV_POSITION;
372  default:
373  return 0;
374  }
375 }
376 
377 BOOL shader_match_semantic(const char *semantic_name, enum wined3d_decl_usage usage)
378 {
379  return !strcmp(semantic_name, shader_semantic_name_from_usage(usage));
380 }
381 
383  const struct wined3d_shader_semantic *s)
384 {
386  e->semantic_idx = s->usage_idx;
387  e->stream_idx = 0;
390  e->register_idx = s->reg.reg.idx[0].offset;
391  e->mask = s->reg.write_mask;
392 }
393 
395  enum wined3d_decl_usage usage, UINT usage_idx, UINT reg_idx, DWORD write_mask)
396 {
398  e->semantic_idx = usage_idx;
399  e->stream_idx = 0;
402  e->register_idx = reg_idx;
403  e->mask = write_mask;
404 }
405 
407 {
408  switch (format)
409  {
411  return &sm1_shader_frontend;
412 
414  return &sm4_shader_frontend;
415 
416  default:
417  WARN("Invalid byte code format %#x specified.\n", format);
418  return NULL;
419  }
420 }
421 
423 {
426 
427  FIXME("Could not get shader type for byte code format %#x.\n", desc->format);
429 }
430 
432 {
433  buffer->buffer[0] = '\0';
434  buffer->content_size = 0;
435 }
436 
438 {
439  buffer->buffer_size = 32;
440  if (!(buffer->buffer = heap_alloc(buffer->buffer_size)))
441  {
442  ERR("Failed to allocate shader buffer memory.\n");
443  return FALSE;
444  }
445 
446  string_buffer_clear(buffer);
447  return TRUE;
448 }
449 
451 {
452  heap_free(buffer->buffer);
453 }
454 
456 {
457  char *new_buffer;
458  unsigned int new_buffer_size = buffer->buffer_size * 2;
459 
460  while (rc > 0 && (unsigned int)rc >= new_buffer_size - buffer->content_size)
461  new_buffer_size *= 2;
462  if (!(new_buffer = heap_realloc(buffer->buffer, new_buffer_size)))
463  {
464  ERR("Failed to grow buffer.\n");
465  buffer->buffer[buffer->content_size] = '\0';
466  return FALSE;
467  }
468  buffer->buffer = new_buffer;
469  buffer->buffer_size = new_buffer_size;
470  return TRUE;
471 }
472 
473 int shader_vaddline(struct wined3d_string_buffer *buffer, const char *format, va_list args)
474 {
475  unsigned int rem;
476  int rc;
477 
478  rem = buffer->buffer_size - buffer->content_size;
479  rc = vsnprintf(&buffer->buffer[buffer->content_size], rem, format, args);
480  if (rc < 0 /* C89 */ || (unsigned int)rc >= rem /* C99 */)
481  return rc;
482 
483  buffer->content_size += rc;
484  return 0;
485 }
486 
487 int shader_addline(struct wined3d_string_buffer *buffer, const char *format, ...)
488 {
489  va_list args;
490  int ret;
491 
492  for (;;)
493  {
494  va_start(args, format);
495  ret = shader_vaddline(buffer, format, args);
496  va_end(args);
497  if (!ret)
498  return ret;
499  if (!string_buffer_resize(buffer, ret))
500  return -1;
501  }
502 }
503 
505 {
507 
508  if (list_empty(&list->list))
509  {
510  buffer = heap_alloc(sizeof(*buffer));
511  if (!buffer || !string_buffer_init(buffer))
512  {
513  ERR("Couldn't allocate buffer for temporary string.\n");
514  heap_free(buffer);
515  return NULL;
516  }
517  }
518  else
519  {
520  buffer = LIST_ENTRY(list_head(&list->list), struct wined3d_string_buffer, entry);
521  list_remove(&buffer->entry);
522  }
523  string_buffer_clear(buffer);
524  return buffer;
525 }
526 
527 static int string_buffer_vsprintf(struct wined3d_string_buffer *buffer, const char *format, va_list args)
528 {
529  if (!buffer)
530  return 0;
531  string_buffer_clear(buffer);
532  return shader_vaddline(buffer, format, args);
533 }
534 
535 void string_buffer_sprintf(struct wined3d_string_buffer *buffer, const char *format, ...)
536 {
537  va_list args;
538  int ret;
539 
540  for (;;)
541  {
542  va_start(args, format);
543  ret = string_buffer_vsprintf(buffer, format, args);
544  va_end(args);
545  if (!ret)
546  return;
547  if (!string_buffer_resize(buffer, ret))
548  return;
549  }
550 }
551 
553 {
554  if (!buffer)
555  return;
556  list_add_head(&list->list, &buffer->entry);
557 }
558 
560 {
561  list_init(&list->list);
562 }
563 
565 {
566  struct wined3d_string_buffer *buffer, *buffer_next;
567 
568  LIST_FOR_EACH_ENTRY_SAFE(buffer, buffer_next, &list->list, struct wined3d_string_buffer, entry)
569  {
570  string_buffer_free(buffer);
571  heap_free(buffer);
572  }
573  list_init(&list->list);
574 }
575 
576 /* Convert floating point offset relative to a register file to an absolute
577  * offset for float constants. */
578 static unsigned int shader_get_float_offset(enum wined3d_shader_register_type register_type, UINT register_idx)
579 {
580  switch (register_type)
581  {
582  case WINED3DSPR_CONST: return register_idx;
583  case WINED3DSPR_CONST2: return 2048 + register_idx;
584  case WINED3DSPR_CONST3: return 4096 + register_idx;
585  case WINED3DSPR_CONST4: return 6144 + register_idx;
586  default:
587  FIXME("Unsupported register type: %u.\n", register_type);
588  return register_idx;
589  }
590 }
591 
592 static void shader_delete_constant_list(struct list *clist)
593 {
594  struct wined3d_shader_lconst *constant, *constant_next;
595 
596  LIST_FOR_EACH_ENTRY_SAFE(constant, constant_next, clist, struct wined3d_shader_lconst, entry)
597  heap_free(constant);
598  list_init(clist);
599 }
600 
602 {
603  static const struct limits_entry
604  {
605  unsigned int min_version;
606  unsigned int max_version;
607  struct wined3d_shader_limits limits;
608  }
609  vs_limits[] =
610  {
611  /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
612  {WINED3D_SHADER_VERSION(1, 0), WINED3D_SHADER_VERSION(1, 1), { 0, 0, 256, 0, 12, 0}},
613  {WINED3D_SHADER_VERSION(2, 0), WINED3D_SHADER_VERSION(2, 1), { 0, 16, 256, 16, 12, 0}},
614  /* DX10 cards on Windows advertise a D3D9 constant limit of 256
615  * even though they are capable of supporting much more (GL
616  * drivers advertise 1024). d3d9.dll and d3d8.dll clamp the
617  * wined3d-advertised maximum. Clamp the constant limit for <= 3.0
618  * shaders to 256. */
619  {WINED3D_SHADER_VERSION(3, 0), WINED3D_SHADER_VERSION(3, 0), { 4, 16, 256, 16, 12, 0}},
620  {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(4, 0), {16, 0, 0, 0, 16, 0}},
621  {WINED3D_SHADER_VERSION(4, 1), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 32, 0}},
622  {0}
623  },
624  hs_limits[] =
625  {
626  /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packet_input */
627  {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 32, 32}},
628  },
629  ds_limits[] =
630  {
631  /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packet_input */
632  {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 32, 32}},
633  },
634  gs_limits[] =
635  {
636  /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
637  {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(4, 0), {16, 0, 0, 0, 32, 16}},
638  {WINED3D_SHADER_VERSION(4, 1), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 32, 32}},
639  {0}
640  },
641  ps_limits[] =
642  {
643  /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
644  {WINED3D_SHADER_VERSION(1, 0), WINED3D_SHADER_VERSION(1, 3), { 4, 0, 8, 0, 0, 0}},
645  {WINED3D_SHADER_VERSION(1, 4), WINED3D_SHADER_VERSION(1, 4), { 6, 0, 8, 0, 0, 0}},
646  {WINED3D_SHADER_VERSION(2, 0), WINED3D_SHADER_VERSION(2, 0), {16, 0, 32, 0, 0, 0}},
647  {WINED3D_SHADER_VERSION(2, 1), WINED3D_SHADER_VERSION(2, 1), {16, 16, 32, 16, 0, 0}},
648  {WINED3D_SHADER_VERSION(3, 0), WINED3D_SHADER_VERSION(3, 0), {16, 16, 224, 16, 0, 10}},
649  {WINED3D_SHADER_VERSION(4, 0), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 0, 32}},
650  {0}
651  },
652  cs_limits[] =
653  {
654  /* min_version, max_version, sampler, constant_int, constant_float, constant_bool, packed_output, packed_input */
655  {WINED3D_SHADER_VERSION(5, 0), WINED3D_SHADER_VERSION(5, 0), {16, 0, 0, 0, 0, 0}},
656  };
657  const struct limits_entry *limits_array;
658  DWORD shader_version = WINED3D_SHADER_VERSION(shader->reg_maps.shader_version.major,
659  shader->reg_maps.shader_version.minor);
660  int i = 0;
661 
662  switch (shader->reg_maps.shader_version.type)
663  {
664  default:
665  FIXME("Unexpected shader type %u found.\n", shader->reg_maps.shader_version.type);
666  /* Fall-through. */
668  limits_array = vs_limits;
669  break;
671  limits_array = hs_limits;
672  break;
674  limits_array = ds_limits;
675  break;
677  limits_array = gs_limits;
678  break;
680  limits_array = ps_limits;
681  break;
683  limits_array = cs_limits;
684  break;
685  }
686 
687  while (limits_array[i].min_version && limits_array[i].min_version <= shader_version)
688  {
689  if (shader_version <= limits_array[i].max_version)
690  {
691  shader->limits = &limits_array[i].limits;
692  break;
693  }
694  ++i;
695  }
696  if (!shader->limits)
697  {
698  FIXME("Unexpected shader version \"%u.%u\".\n",
699  shader->reg_maps.shader_version.major,
700  shader->reg_maps.shader_version.minor);
701  shader->limits = &limits_array[max(0, i - 1)].limits;
702  }
703 }
704 
706  const struct wined3d_shader_register *reg, enum wined3d_shader_type shader_type, unsigned int constf_size)
707 {
708  switch (reg->type)
709  {
710  case WINED3DSPR_TEXTURE: /* WINED3DSPR_ADDR */
711  if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
712  reg_maps->texcoord |= 1u << reg->idx[0].offset;
713  else
714  reg_maps->address |= 1u << reg->idx[0].offset;
715  break;
716 
717  case WINED3DSPR_TEMP:
718  reg_maps->temporary |= 1u << reg->idx[0].offset;
719  break;
720 
721  case WINED3DSPR_INPUT:
722  if (reg->idx[0].rel_addr)
723  reg_maps->input_rel_addressing = 1;
724  if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
725  {
726  /* If relative addressing is used, we must assume that all
727  * registers are used. Even if it is a construct like v3[aL],
728  * we can't assume that v0, v1 and v2 aren't read because aL
729  * can be negative. */
730  if (reg->idx[0].rel_addr)
731  shader->u.ps.input_reg_used = ~0u;
732  else
733  shader->u.ps.input_reg_used |= 1u << reg->idx[0].offset;
734  }
735  else
736  {
737  reg_maps->input_registers |= 1u << reg->idx[0].offset;
738  }
739  break;
740 
741  case WINED3DSPR_RASTOUT:
742  if (reg->idx[0].offset == 1)
743  reg_maps->fog = 1;
744  if (reg->idx[0].offset == 2)
745  reg_maps->point_size = 1;
746  break;
747 
748  case WINED3DSPR_MISCTYPE:
749  if (shader_type == WINED3D_SHADER_TYPE_PIXEL)
750  {
751  if (!reg->idx[0].offset)
752  reg_maps->vpos = 1;
753  else if (reg->idx[0].offset == 1)
754  reg_maps->usesfacing = 1;
755  }
756  break;
757 
758  case WINED3DSPR_CONST:
759  if (reg->idx[0].rel_addr)
760  {
761  if (reg->idx[0].offset < reg_maps->min_rel_offset)
762  reg_maps->min_rel_offset = reg->idx[0].offset;
763  if (reg->idx[0].offset > reg_maps->max_rel_offset)
764  reg_maps->max_rel_offset = reg->idx[0].offset;
765  reg_maps->usesrelconstF = TRUE;
766  }
767  else
768  {
769  if (reg->idx[0].offset >= min(shader->limits->constant_float, constf_size))
770  {
771  WARN("Shader using float constant %u which is not supported.\n", reg->idx[0].offset);
772  return FALSE;
773  }
774  else
775  {
776  wined3d_insert_bits(reg_maps->constf, reg->idx[0].offset, 1, 0x1);
777  }
778  }
779  break;
780 
781  case WINED3DSPR_CONSTINT:
782  if (reg->idx[0].offset >= shader->limits->constant_int)
783  {
784  WARN("Shader using integer constant %u which is not supported.\n", reg->idx[0].offset);
785  return FALSE;
786  }
787  else
788  {
789  reg_maps->integer_constants |= (1u << reg->idx[0].offset);
790  }
791  break;
792 
794  if (reg->idx[0].offset >= shader->limits->constant_bool)
795  {
796  WARN("Shader using bool constant %u which is not supported.\n", reg->idx[0].offset);
797  return FALSE;
798  }
799  else
800  {
801  reg_maps->boolean_constants |= (1u << reg->idx[0].offset);
802  }
803  break;
804 
805  case WINED3DSPR_COLOROUT:
806  reg_maps->rt_mask |= (1u << reg->idx[0].offset);
807  break;
808 
810  reg_maps->vocp = 1;
811  break;
812 
814  reg_maps->sample_mask = 1;
815  break;
816 
817  default:
818  TRACE("Not recording register of type %#x and [%#x][%#x].\n",
819  reg->type, reg->idx[0].offset, reg->idx[1].offset);
820  break;
821  }
822  return TRUE;
823 }
824 
825 static void shader_record_sample(struct wined3d_shader_reg_maps *reg_maps,
826  unsigned int resource_idx, unsigned int sampler_idx, unsigned int bind_idx)
827 {
828  struct wined3d_shader_sampler_map_entry *entries, *entry;
830  unsigned int i;
831 
832  map = &reg_maps->sampler_map;
833  entries = map->entries;
834  for (i = 0; i < map->count; ++i)
835  {
836  if (entries[i].resource_idx == resource_idx && entries[i].sampler_idx == sampler_idx)
837  return;
838  }
839 
840  if (!map->size)
841  {
842  if (!(entries = heap_calloc(4, sizeof(*entries))))
843  {
844  ERR("Failed to allocate sampler map entries.\n");
845  return;
846  }
847  map->size = 4;
848  map->entries = entries;
849  }
850  else if (map->count == map->size)
851  {
852  size_t new_size = map->size * 2;
853 
854  if (sizeof(*entries) * new_size <= sizeof(*entries) * map->size
855  || !(entries = heap_realloc(entries, sizeof(*entries) * new_size)))
856  {
857  ERR("Failed to resize sampler map entries.\n");
858  return;
859  }
860  map->size = new_size;
861  map->entries = entries;
862  }
863 
864  entry = &entries[map->count++];
865  entry->resource_idx = resource_idx;
866  entry->sampler_idx = sampler_idx;
867  entry->bind_idx = bind_idx;
868 }
869 
870 static unsigned int get_instr_extra_regcount(enum WINED3D_SHADER_INSTRUCTION_HANDLER instr, unsigned int param)
871 {
872  switch (instr)
873  {
874  case WINED3DSIH_M4x4:
875  case WINED3DSIH_M3x4:
876  return param == 1 ? 3 : 0;
877 
878  case WINED3DSIH_M4x3:
879  case WINED3DSIH_M3x3:
880  return param == 1 ? 2 : 0;
881 
882  case WINED3DSIH_M3x2:
883  return param == 1 ? 1 : 0;
884 
885  default:
886  return 0;
887  }
888 }
889 
891  unsigned int register_idx, unsigned int size, unsigned int stride)
892 {
893  struct wined3d_shader_tgsm *tgsm;
894 
895  if (register_idx >= MAX_TGSM_REGISTERS)
896  {
897  ERR("Invalid TGSM register index %u.\n", register_idx);
898  return S_OK;
899  }
900  if (reg_maps->shader_version.type != WINED3D_SHADER_TYPE_COMPUTE)
901  {
902  FIXME("TGSM declarations are allowed only in compute shaders.\n");
903  return S_OK;
904  }
905 
906  if (!wined3d_array_reserve((void **)&reg_maps->tgsm, &reg_maps->tgsm_capacity,
907  register_idx + 1, sizeof(*reg_maps->tgsm)))
908  return E_OUTOFMEMORY;
909 
910  reg_maps->tgsm_count = max(register_idx + 1, reg_maps->tgsm_count);
911  tgsm = &reg_maps->tgsm[register_idx];
912  tgsm->size = size;
913  tgsm->stride = stride;
914  return S_OK;
915 }
916 
918  struct wined3d_shader_phase **current_phase, const struct wined3d_shader_instruction *ins,
919  const DWORD *current_instruction_ptr, const DWORD *previous_instruction_ptr)
920 {
921  struct wined3d_shader_phase *phase;
922 
923  if ((phase = *current_phase))
924  {
925  phase->end = previous_instruction_ptr;
926  *current_phase = NULL;
927  }
928 
929  if (shader->reg_maps.shader_version.type != WINED3D_SHADER_TYPE_HULL)
930  {
931  ERR("Unexpected shader type %#x.\n", shader->reg_maps.shader_version.type);
932  return E_FAIL;
933  }
934 
935  switch (ins->handler_idx)
936  {
938  if (shader->u.hs.phases.control_point)
939  {
940  FIXME("Multiple control point phases.\n");
941  heap_free(shader->u.hs.phases.control_point);
942  }
943  if (!(shader->u.hs.phases.control_point = heap_alloc_zero(sizeof(*shader->u.hs.phases.control_point))))
944  return E_OUTOFMEMORY;
945  phase = shader->u.hs.phases.control_point;
946  break;
948  if (!wined3d_array_reserve((void **)&shader->u.hs.phases.fork,
949  &shader->u.hs.phases.fork_size, shader->u.hs.phases.fork_count + 1,
950  sizeof(*shader->u.hs.phases.fork)))
951  return E_OUTOFMEMORY;
952  phase = &shader->u.hs.phases.fork[shader->u.hs.phases.fork_count++];
953  break;
955  if (!wined3d_array_reserve((void **)&shader->u.hs.phases.join,
956  &shader->u.hs.phases.join_size, shader->u.hs.phases.join_count + 1,
957  sizeof(*shader->u.hs.phases.join)))
958  return E_OUTOFMEMORY;
959  phase = &shader->u.hs.phases.join[shader->u.hs.phases.join_count++];
960  break;
961  default:
962  ERR("Unexpected opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
963  return E_FAIL;
964  }
965 
966  phase->start = current_instruction_ptr;
967  *current_phase = phase;
968 
969  return WINED3D_OK;
970 }
971 
973  const struct wined3d_shader_signature_element *e, unsigned int *mask)
974 {
975  /* Clip and cull distances are packed in 4 component registers. 0 and 1 are
976  * the only allowed semantic indices.
977  */
978  if (e->semantic_idx >= MAX_CLIP_DISTANCES / 4)
979  {
980  *mask = 0;
981  WARN("Invalid clip/cull distance index %u.\n", e->semantic_idx);
982  return WINED3DERR_INVALIDCALL;
983  }
984 
985  *mask = (e->mask & WINED3DSP_WRITEMASK_ALL) << (4 * e->semantic_idx);
986  return WINED3D_OK;
987 }
988 
989 static void wined3d_insert_interpolation_mode(DWORD *packed_interpolation_mode,
990  unsigned int register_idx, enum wined3d_shader_interpolation_mode mode)
991 {
993  FIXME("Unexpected interpolation mode %#x.\n", mode);
994 
995  wined3d_insert_bits(packed_interpolation_mode,
996  register_idx * WINED3D_PACKED_INTERPOLATION_BIT_COUNT, WINED3D_PACKED_INTERPOLATION_BIT_COUNT, mode);
997 }
998 
1000 {
1001  const struct wined3d_shader_signature *output_signature = &shader->output_signature;
1002  struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
1003  unsigned int i;
1004  HRESULT hr;
1005 
1006  for (i = 0; i < output_signature->element_count; ++i)
1007  {
1008  const struct wined3d_shader_signature_element *e = &output_signature->elements[i];
1009  unsigned int mask;
1010 
1011  reg_maps->output_registers |= 1u << e->register_idx;
1013  {
1015  return hr;
1016  reg_maps->clip_distance_mask |= mask;
1017  }
1019  {
1021  return hr;
1022  reg_maps->cull_distance_mask |= mask;
1023  }
1025  {
1026  reg_maps->viewport_array = 1;
1027  }
1028  }
1029 
1030  return WINED3D_OK;
1031 }
1032 
1033 /* Note that this does not count the loop register as an address register. */
1035 {
1036  struct wined3d_shader_signature_element input_signature_elements[max(MAX_ATTRIBS, MAX_REG_INPUT)];
1037  struct wined3d_shader_signature_element output_signature_elements[MAX_REG_OUTPUT];
1038  struct wined3d_shader_signature *output_signature = &shader->output_signature;
1039  struct wined3d_shader_signature *input_signature = &shader->input_signature;
1040  struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
1041  const struct wined3d_shader_frontend *fe = shader->frontend;
1042  unsigned int cur_loop_depth = 0, max_loop_depth = 0;
1043  struct wined3d_shader_version shader_version;
1044  struct wined3d_shader_phase *phase = NULL;
1045  const DWORD *ptr, *prev_ins, *current_ins;
1046  void *fe_data = shader->frontend_data;
1047  unsigned int i;
1048  HRESULT hr;
1049 
1050  memset(reg_maps, 0, sizeof(*reg_maps));
1051  memset(input_signature_elements, 0, sizeof(input_signature_elements));
1052  memset(output_signature_elements, 0, sizeof(output_signature_elements));
1053  reg_maps->min_rel_offset = ~0U;
1054  list_init(&reg_maps->indexable_temps);
1055 
1056  fe->shader_read_header(fe_data, &ptr, &shader_version);
1057  prev_ins = current_ins = ptr;
1058  reg_maps->shader_version = shader_version;
1059 
1060  shader_set_limits(shader);
1061 
1062  if (!(reg_maps->constf = heap_calloc(((min(shader->limits->constant_float, constf_size) + 31) / 32),
1063  sizeof(*reg_maps->constf))))
1064  {
1065  ERR("Failed to allocate constant map memory.\n");
1066  return E_OUTOFMEMORY;
1067  }
1068 
1069  while (!fe->shader_is_end(fe_data, &ptr))
1070  {
1071  struct wined3d_shader_instruction ins;
1072 
1073  current_ins = ptr;
1074  /* Fetch opcode. */
1075  fe->shader_read_instruction(fe_data, &ptr, &ins);
1076 
1077  /* Unhandled opcode, and its parameters. */
1079  {
1080  WARN("Encountered unrecognised or invalid instruction.\n");
1081  return WINED3DERR_INVALIDCALL;
1082  }
1083 
1084  /* Handle declarations. */
1085  if (ins.handler_idx == WINED3DSIH_DCL
1087  {
1088  struct wined3d_shader_semantic *semantic = &ins.declaration.semantic;
1089  unsigned int reg_idx = semantic->reg.reg.idx[0].offset;
1090 
1091  switch (semantic->reg.reg.type)
1092  {
1093  /* Mark input registers used. */
1094  case WINED3DSPR_INPUT:
1095  if (reg_idx >= MAX_REG_INPUT)
1096  {
1097  ERR("Invalid input register index %u.\n", reg_idx);
1098  break;
1099  }
1100  if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL && shader_version.major == 3
1101  && semantic->usage == WINED3D_DECL_USAGE_POSITION && !semantic->usage_idx)
1102  return WINED3DERR_INVALIDCALL;
1103  reg_maps->input_registers |= 1u << reg_idx;
1104  shader_signature_from_semantic(&input_signature_elements[reg_idx], semantic);
1105  break;
1106 
1107  /* Vertex shader: mark 3.0 output registers used, save token. */
1108  case WINED3DSPR_OUTPUT:
1109  if (reg_idx >= MAX_REG_OUTPUT)
1110  {
1111  ERR("Invalid output register index %u.\n", reg_idx);
1112  break;
1113  }
1114  reg_maps->output_registers |= 1u << reg_idx;
1115  shader_signature_from_semantic(&output_signature_elements[reg_idx], semantic);
1116  if (semantic->usage == WINED3D_DECL_USAGE_FOG)
1117  reg_maps->fog = 1;
1118  if (semantic->usage == WINED3D_DECL_USAGE_PSIZE)
1119  reg_maps->point_size = 1;
1120  break;
1121 
1122  case WINED3DSPR_SAMPLER:
1123  shader_record_sample(reg_maps, reg_idx, reg_idx, reg_idx);
1124  case WINED3DSPR_RESOURCE:
1125  if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1126  {
1127  ERR("Invalid resource index %u.\n", reg_idx);
1128  break;
1129  }
1130  reg_maps->resource_info[reg_idx].type = semantic->resource_type;
1131  reg_maps->resource_info[reg_idx].data_type = semantic->resource_data_type;
1132  break;
1133 
1134  case WINED3DSPR_UAV:
1135  if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
1136  {
1137  ERR("Invalid UAV resource index %u.\n", reg_idx);
1138  break;
1139  }
1140  reg_maps->uav_resource_info[reg_idx].type = semantic->resource_type;
1141  reg_maps->uav_resource_info[reg_idx].data_type = semantic->resource_data_type;
1142  if (ins.flags)
1143  FIXME("Ignoring typed UAV flags %#x.\n", ins.flags);
1144  break;
1145 
1146  default:
1147  TRACE("Not recording DCL register type %#x.\n", semantic->reg.reg.type);
1148  break;
1149  }
1150  }
1152  {
1153  struct wined3d_shader_register *reg = &ins.declaration.src.reg;
1154  if (reg->idx[0].offset >= WINED3D_MAX_CBS)
1155  ERR("Invalid CB index %u.\n", reg->idx[0].offset);
1156  else
1157  reg_maps->cb_sizes[reg->idx[0].offset] = reg->idx[1].offset;
1158  }
1159  else if (ins.handler_idx == WINED3DSIH_DCL_GLOBAL_FLAGS)
1160  {
1162  {
1163  if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1164  shader->u.ps.force_early_depth_stencil = TRUE;
1165  else
1166  FIXME("Invalid instruction %#x for shader type %#x.\n",
1167  ins.handler_idx, shader_version.type);
1168  }
1169  else
1170  {
1171  WARN("Ignoring global flags %#x.\n", ins.flags);
1172  }
1173  }
1174  else if (ins.handler_idx == WINED3DSIH_DCL_GS_INSTANCES)
1175  {
1176  if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1177  shader->u.gs.instance_count = ins.declaration.count;
1178  else
1179  FIXME("Invalid instruction %#x for shader type %#x.\n",
1180  ins.handler_idx, shader_version.type);
1181  }
1184  {
1185  if (phase)
1186  phase->instance_count = ins.declaration.count;
1187  else
1188  FIXME("Instruction %s outside of shader phase.\n",
1190  }
1192  {
1193  if (reg_maps->icb)
1194  FIXME("Multiple immediate constant buffers.\n");
1195  reg_maps->icb = ins.declaration.icb;
1196  }
1198  {
1199  if (phase)
1200  {
1201  FIXME("Indexable temporary registers not supported.\n");
1202  }
1203  else
1204  {
1206 
1207  if (!(reg = heap_alloc(sizeof(*reg))))
1208  return E_OUTOFMEMORY;
1209 
1210  *reg = ins.declaration.indexable_temp;
1211  list_add_tail(&reg_maps->indexable_temps, &reg->entry);
1212  }
1213  }
1215  {
1216  if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1217  shader->u.gs.input_type = ins.declaration.primitive_type.type;
1218  else
1219  FIXME("Invalid instruction %#x for shader type %#x.\n",
1220  ins.handler_idx, shader_version.type);
1221  }
1222  else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PS)
1223  {
1224  unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
1225  if (reg_idx >= MAX_REG_INPUT)
1226  {
1227  ERR("Invalid register index %u.\n", reg_idx);
1228  break;
1229  }
1230  if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1231  wined3d_insert_interpolation_mode(shader->u.ps.interpolation_mode, reg_idx, ins.flags);
1232  else
1233  FIXME("Invalid instruction %#x for shader type %#x.\n",
1234  ins.handler_idx, shader_version.type);
1235  }
1236  else if (ins.handler_idx == WINED3DSIH_DCL_OUTPUT)
1237  {
1238  if (ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUT
1239  || ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUTGE
1240  || ins.declaration.dst.reg.type == WINED3DSPR_DEPTHOUTLE)
1241  {
1242  if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1243  shader->u.ps.depth_output = ins.declaration.dst.reg.type;
1244  else
1245  FIXME("Invalid instruction %#x for shader type %#x.\n",
1246  ins.handler_idx, shader_version.type);
1247  }
1248  }
1250  {
1251  if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
1252  shader->u.hs.output_vertex_count = ins.declaration.count;
1253  else
1254  FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1255  }
1257  {
1258  if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1259  shader->u.gs.output_type = ins.declaration.primitive_type.type;
1260  else
1261  FIXME("Invalid instruction %#x for shader type %#x.\n",
1262  ins.handler_idx, shader_version.type);
1263  }
1264  else if (ins.handler_idx == WINED3DSIH_DCL_RESOURCE_RAW)
1265  {
1266  unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
1267  if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1268  {
1269  ERR("Invalid resource index %u.\n", reg_idx);
1270  break;
1271  }
1272  reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1273  reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1274  reg_maps->resource_info[reg_idx].flags = WINED3D_VIEW_BUFFER_RAW;
1275  }
1277  {
1278  unsigned int reg_idx = ins.declaration.structured_resource.reg.reg.idx[0].offset;
1279  if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1280  {
1281  ERR("Invalid resource index %u.\n", reg_idx);
1282  break;
1283  }
1284  reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1285  reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1286  reg_maps->resource_info[reg_idx].flags = 0;
1287  reg_maps->resource_info[reg_idx].stride = ins.declaration.structured_resource.byte_stride / 4;
1288  }
1289  else if (ins.handler_idx == WINED3DSIH_DCL_SAMPLER)
1290  {
1292  reg_maps->sampler_comparison_mode |= (1u << ins.declaration.dst.reg.idx[0].offset);
1293  }
1294  else if (ins.handler_idx == WINED3DSIH_DCL_TEMPS)
1295  {
1296  if (phase)
1297  phase->temporary_count = ins.declaration.count;
1298  else
1299  reg_maps->temporary_count = ins.declaration.count;
1300  }
1302  {
1303  if (shader_version.type == WINED3D_SHADER_TYPE_DOMAIN)
1304  shader->u.ds.tessellator_domain = ins.declaration.tessellator_domain;
1305  else if (shader_version.type != WINED3D_SHADER_TYPE_HULL)
1306  FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1307  }
1309  {
1310  if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
1311  shader->u.hs.tessellator_output_primitive = ins.declaration.tessellator_output_primitive;
1312  else
1313  FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1314  }
1316  {
1317  if (shader_version.type == WINED3D_SHADER_TYPE_HULL)
1318  shader->u.hs.tessellator_partitioning = ins.declaration.tessellator_partitioning;
1319  else
1320  FIXME("Invalid instruction %#x for shader type %#x.\n", ins.handler_idx, shader_version.type);
1321  }
1322  else if (ins.handler_idx == WINED3DSIH_DCL_TGSM_RAW)
1323  {
1324  if (FAILED(hr = shader_reg_maps_add_tgsm(reg_maps, ins.declaration.tgsm_raw.reg.reg.idx[0].offset,
1325  ins.declaration.tgsm_raw.byte_count / 4, 0)))
1326  return hr;
1327  }
1329  {
1330  unsigned int stride = ins.declaration.tgsm_structured.byte_stride / 4;
1331  unsigned int size = stride * ins.declaration.tgsm_structured.structure_count;
1332  if (FAILED(hr = shader_reg_maps_add_tgsm(reg_maps,
1333  ins.declaration.tgsm_structured.reg.reg.idx[0].offset, size, stride)))
1334  return hr;
1335  }
1336  else if (ins.handler_idx == WINED3DSIH_DCL_THREAD_GROUP)
1337  {
1338  if (shader_version.type == WINED3D_SHADER_TYPE_COMPUTE)
1339  {
1340  shader->u.cs.thread_group_size = ins.declaration.thread_group_size;
1341  }
1342  else
1343  {
1344  FIXME("Invalid instruction %#x for shader type %#x.\n",
1345  ins.handler_idx, shader_version.type);
1346  }
1347  }
1348  else if (ins.handler_idx == WINED3DSIH_DCL_UAV_RAW)
1349  {
1350  unsigned int reg_idx = ins.declaration.dst.reg.idx[0].offset;
1351  if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
1352  {
1353  ERR("Invalid UAV resource index %u.\n", reg_idx);
1354  break;
1355  }
1356  if (ins.flags)
1357  FIXME("Ignoring raw UAV flags %#x.\n", ins.flags);
1358  reg_maps->uav_resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1359  reg_maps->uav_resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1360  reg_maps->uav_resource_info[reg_idx].flags = WINED3D_VIEW_BUFFER_RAW;
1361  }
1363  {
1364  unsigned int reg_idx = ins.declaration.structured_resource.reg.reg.idx[0].offset;
1365  if (reg_idx >= ARRAY_SIZE(reg_maps->uav_resource_info))
1366  {
1367  ERR("Invalid UAV resource index %u.\n", reg_idx);
1368  break;
1369  }
1370  if (ins.flags)
1371  FIXME("Ignoring structured UAV flags %#x.\n", ins.flags);
1372  reg_maps->uav_resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_BUFFER;
1373  reg_maps->uav_resource_info[reg_idx].data_type = WINED3D_DATA_UINT;
1374  reg_maps->uav_resource_info[reg_idx].flags = 0;
1375  reg_maps->uav_resource_info[reg_idx].stride = ins.declaration.structured_resource.byte_stride / 4;
1376  }
1377  else if (ins.handler_idx == WINED3DSIH_DCL_VERTICES_OUT)
1378  {
1379  if (shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
1380  shader->u.gs.vertices_out = ins.declaration.count;
1381  else
1382  FIXME("Invalid instruction %#x for shader type %#x.\n",
1383  ins.handler_idx, shader_version.type);
1384  }
1385  else if (ins.handler_idx == WINED3DSIH_DEF)
1386  {
1387  struct wined3d_shader_lconst *lconst;
1388  float *value;
1389 
1390  if (!(lconst = heap_alloc(sizeof(*lconst))))
1391  return E_OUTOFMEMORY;
1392 
1393  lconst->idx = ins.dst[0].reg.idx[0].offset;
1394  memcpy(lconst->value, ins.src[0].reg.u.immconst_data, 4 * sizeof(DWORD));
1395  value = (float *)lconst->value;
1396 
1397  /* In pixel shader 1.X shaders, the constants are clamped between [-1;1] */
1398  if (shader_version.major == 1 && shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1399  {
1400  if (value[0] < -1.0f) value[0] = -1.0f;
1401  else if (value[0] > 1.0f) value[0] = 1.0f;
1402  if (value[1] < -1.0f) value[1] = -1.0f;
1403  else if (value[1] > 1.0f) value[1] = 1.0f;
1404  if (value[2] < -1.0f) value[2] = -1.0f;
1405  else if (value[2] > 1.0f) value[2] = 1.0f;
1406  if (value[3] < -1.0f) value[3] = -1.0f;
1407  else if (value[3] > 1.0f) value[3] = 1.0f;
1408  }
1409 
1410  list_add_head(&shader->constantsF, &lconst->entry);
1411 
1412  if (isinf(value[0]) || isnan(value[0]) || isinf(value[1]) || isnan(value[1])
1413  || isinf(value[2]) || isnan(value[2]) || isinf(value[3]) || isnan(value[3]))
1414  {
1415  shader->lconst_inf_or_nan = TRUE;
1416  }
1417  }
1418  else if (ins.handler_idx == WINED3DSIH_DEFI)
1419  {
1420  struct wined3d_shader_lconst *lconst;
1421 
1422  if (!(lconst = heap_alloc(sizeof(*lconst))))
1423  return E_OUTOFMEMORY;
1424 
1425  lconst->idx = ins.dst[0].reg.idx[0].offset;
1426  memcpy(lconst->value, ins.src[0].reg.u.immconst_data, 4 * sizeof(DWORD));
1427 
1428  list_add_head(&shader->constantsI, &lconst->entry);
1429  reg_maps->local_int_consts |= (1u << lconst->idx);
1430  }
1431  else if (ins.handler_idx == WINED3DSIH_DEFB)
1432  {
1433  struct wined3d_shader_lconst *lconst;
1434 
1435  if (!(lconst = heap_alloc(sizeof(*lconst))))
1436  return E_OUTOFMEMORY;
1437 
1438  lconst->idx = ins.dst[0].reg.idx[0].offset;
1439  memcpy(lconst->value, ins.src[0].reg.u.immconst_data, sizeof(DWORD));
1440 
1441  list_add_head(&shader->constantsB, &lconst->entry);
1442  reg_maps->local_bool_consts |= (1u << lconst->idx);
1443  }
1444  /* Handle shader phases. */
1448  {
1449  if (FAILED(hr = shader_record_shader_phase(shader, &phase, &ins, current_ins, prev_ins)))
1450  return hr;
1451  }
1452  /* For subroutine prototypes. */
1453  else if (ins.handler_idx == WINED3DSIH_LABEL)
1454  {
1455  reg_maps->labels |= 1u << ins.src[0].reg.idx[0].offset;
1456  }
1457  /* Set texture, address, temporary registers. */
1458  else
1459  {
1460  BOOL color0_mov = FALSE;
1461  unsigned int i;
1462 
1463  /* This will loop over all the registers and try to
1464  * make a bitmask of the ones we're interested in.
1465  *
1466  * Relative addressing tokens are ignored, but that's
1467  * okay, since we'll catch any address registers when
1468  * they are initialized (required by spec). */
1469  for (i = 0; i < ins.dst_count; ++i)
1470  {
1471  if (!shader_record_register_usage(shader, reg_maps, &ins.dst[i].reg,
1472  shader_version.type, constf_size))
1473  return WINED3DERR_INVALIDCALL;
1474 
1475  if (shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
1476  {
1477  UINT idx = ins.dst[i].reg.idx[0].offset;
1478 
1479  switch (ins.dst[i].reg.type)
1480  {
1481  case WINED3DSPR_RASTOUT:
1482  if (shader_version.major >= 3)
1483  break;
1484  switch (idx)
1485  {
1486  case 0: /* oPos */
1487  reg_maps->output_registers |= 1u << 10;
1488  shader_signature_from_usage(&output_signature_elements[10],
1490  break;
1491 
1492  case 1: /* oFog */
1493  reg_maps->output_registers |= 1u << 11;
1494  shader_signature_from_usage(&output_signature_elements[11],
1496  break;
1497 
1498  case 2: /* oPts */
1499  reg_maps->output_registers |= 1u << 11;
1500  shader_signature_from_usage(&output_signature_elements[11],
1502  break;
1503  }
1504  break;
1505 
1506  case WINED3DSPR_ATTROUT:
1507  if (shader_version.major >= 3)
1508  break;
1509  if (idx < 2)
1510  {
1511  idx += 8;
1512  if (reg_maps->output_registers & (1u << idx))
1513  {
1514  output_signature_elements[idx].mask |= ins.dst[i].write_mask;
1515  }
1516  else
1517  {
1518  reg_maps->output_registers |= 1u << idx;
1519  shader_signature_from_usage(&output_signature_elements[idx],
1520  WINED3D_DECL_USAGE_COLOR, idx - 8, idx, ins.dst[i].write_mask);
1521  }
1522  }
1523  break;
1524 
1525  case WINED3DSPR_TEXCRDOUT: /* WINED3DSPR_OUTPUT */
1526  if (shader_version.major >= 3)
1527  {
1528  if (idx >= ARRAY_SIZE(reg_maps->u.output_registers_mask))
1529  {
1530  WARN("Invalid output register index %u.\n", idx);
1531  break;
1532  }
1533  reg_maps->u.output_registers_mask[idx] |= ins.dst[i].write_mask;
1534  break;
1535  }
1536  if (idx >= ARRAY_SIZE(reg_maps->u.texcoord_mask))
1537  {
1538  WARN("Invalid texcoord index %u.\n", idx);
1539  break;
1540  }
1541  reg_maps->u.texcoord_mask[idx] |= ins.dst[i].write_mask;
1542  if (reg_maps->output_registers & (1u << idx))
1543  {
1544  output_signature_elements[idx].mask |= ins.dst[i].write_mask;
1545  }
1546  else
1547  {
1548  reg_maps->output_registers |= 1u << idx;
1549  shader_signature_from_usage(&output_signature_elements[idx],
1550  WINED3D_DECL_USAGE_TEXCOORD, idx, idx, ins.dst[i].write_mask);
1551  }
1552  break;
1553 
1554  default:
1555  break;
1556  }
1557  }
1558 
1559  if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1560  {
1561  if (ins.dst[i].reg.type == WINED3DSPR_COLOROUT && !ins.dst[i].reg.idx[0].offset)
1562  {
1563  /* Many 2.0 and 3.0 pixel shaders end with a MOV from a temp register to
1564  * COLOROUT 0. If we know this in advance, the ARB shader backend can skip
1565  * the mov and perform the sRGB write correction from the source register.
1566  *
1567  * However, if the mov is only partial, we can't do this, and if the write
1568  * comes from an instruction other than MOV it is hard to do as well. If
1569  * COLOROUT 0 is overwritten partially later, the marker is dropped again. */
1570  shader->u.ps.color0_mov = FALSE;
1571  if (ins.handler_idx == WINED3DSIH_MOV
1572  && ins.dst[i].write_mask == WINED3DSP_WRITEMASK_ALL)
1573  {
1574  /* Used later when the source register is read. */
1575  color0_mov = TRUE;
1576  }
1577  }
1578  /* Also drop the MOV marker if the source register is overwritten prior to the shader
1579  * end
1580  */
1581  else if (ins.dst[i].reg.type == WINED3DSPR_TEMP
1582  && ins.dst[i].reg.idx[0].offset == shader->u.ps.color0_reg)
1583  {
1584  shader->u.ps.color0_mov = FALSE;
1585  }
1586  }
1587 
1588  /* Declare 1.x samplers implicitly, based on the destination reg. number. */
1589  if (shader_version.major == 1
1590  && (ins.handler_idx == WINED3DSIH_TEX
1591  || ins.handler_idx == WINED3DSIH_TEXBEM
1601  {
1602  unsigned int reg_idx = ins.dst[i].reg.idx[0].offset;
1603 
1604  if (reg_idx >= ARRAY_SIZE(reg_maps->resource_info))
1605  {
1606  WARN("Invalid 1.x sampler index %u.\n", reg_idx);
1607  continue;
1608  }
1609 
1610  TRACE("Setting fake 2D resource for 1.x pixelshader.\n");
1611  reg_maps->resource_info[reg_idx].type = WINED3D_SHADER_RESOURCE_TEXTURE_2D;
1612  reg_maps->resource_info[reg_idx].data_type = WINED3D_DATA_FLOAT;
1613  shader_record_sample(reg_maps, reg_idx, reg_idx, reg_idx);
1614 
1615  /* texbem is only valid with < 1.4 pixel shaders */
1616  if (ins.handler_idx == WINED3DSIH_TEXBEM
1617  || ins.handler_idx == WINED3DSIH_TEXBEML)
1618  {
1619  reg_maps->bumpmat |= 1u << reg_idx;
1620  if (ins.handler_idx == WINED3DSIH_TEXBEML)
1621  {
1622  reg_maps->luminanceparams |= 1u << reg_idx;
1623  }
1624  }
1625  }
1626  else if (ins.handler_idx == WINED3DSIH_BEM)
1627  {
1628  reg_maps->bumpmat |= 1u << ins.dst[i].reg.idx[0].offset;
1629  }
1630  }
1631 
1633  {
1634  unsigned int reg_idx = ins.src[0].reg.idx[0].offset;
1635  if (reg_idx >= MAX_UNORDERED_ACCESS_VIEWS)
1636  {
1637  ERR("Invalid UAV index %u.\n", reg_idx);
1638  break;
1639  }
1640  reg_maps->uav_counter_mask |= (1u << reg_idx);
1641  }
1644  || (ins.handler_idx == WINED3DSIH_BUFINFO && ins.src[0].reg.type == WINED3DSPR_UAV)
1646  || (ins.handler_idx == WINED3DSIH_LD_RAW && ins.src[1].reg.type == WINED3DSPR_UAV)
1647  || (ins.handler_idx == WINED3DSIH_LD_STRUCTURED && ins.src[2].reg.type == WINED3DSPR_UAV))
1648  {
1649  unsigned int reg_idx;
1651  reg_idx = ins.src[1].reg.idx[0].offset;
1652  else if (ins.handler_idx == WINED3DSIH_LD_STRUCTURED)
1653  reg_idx = ins.src[2].reg.idx[0].offset;
1655  reg_idx = ins.dst[0].reg.idx[0].offset;
1656  else if (ins.handler_idx == WINED3DSIH_BUFINFO)
1657  reg_idx = ins.src[0].reg.idx[0].offset;
1658  else
1659  reg_idx = ins.dst[1].reg.idx[0].offset;
1660  if (reg_idx >= MAX_UNORDERED_ACCESS_VIEWS)
1661  {
1662  ERR("Invalid UAV index %u.\n", reg_idx);
1663  break;
1664  }
1665  reg_maps->uav_read_mask |= (1u << reg_idx);
1666  }
1667  else if (ins.handler_idx == WINED3DSIH_NRM)
1668  {
1669  reg_maps->usesnrm = 1;
1670  }
1671  else if (ins.handler_idx == WINED3DSIH_DSY
1673  || ins.handler_idx == WINED3DSIH_DSY_FINE)
1674  {
1675  reg_maps->usesdsy = 1;
1676  }
1677  else if (ins.handler_idx == WINED3DSIH_DSX
1679  || ins.handler_idx == WINED3DSIH_DSX_FINE)
1680  {
1681  reg_maps->usesdsx = 1;
1682  }
1683  else if (ins.handler_idx == WINED3DSIH_TEXLDD) reg_maps->usestexldd = 1;
1684  else if (ins.handler_idx == WINED3DSIH_TEXLDL) reg_maps->usestexldl = 1;
1685  else if (ins.handler_idx == WINED3DSIH_MOVA) reg_maps->usesmova = 1;
1686  else if (ins.handler_idx == WINED3DSIH_IFC) reg_maps->usesifc = 1;
1687  else if (ins.handler_idx == WINED3DSIH_CALL) reg_maps->usescall = 1;
1688  else if (ins.handler_idx == WINED3DSIH_POW) reg_maps->usespow = 1;
1689  else if (ins.handler_idx == WINED3DSIH_LOOP
1690  || ins.handler_idx == WINED3DSIH_REP)
1691  {
1692  ++cur_loop_depth;
1693  if (cur_loop_depth > max_loop_depth)
1694  max_loop_depth = cur_loop_depth;
1695  }
1696  else if (ins.handler_idx == WINED3DSIH_ENDLOOP
1697  || ins.handler_idx == WINED3DSIH_ENDREP)
1698  {
1699  --cur_loop_depth;
1700  }
1701  else if (ins.handler_idx == WINED3DSIH_GATHER4
1703  || ins.handler_idx == WINED3DSIH_SAMPLE
1709  {
1710  shader_record_sample(reg_maps, ins.src[1].reg.idx[0].offset,
1711  ins.src[2].reg.idx[0].offset, reg_maps->sampler_map.count);
1712  }
1713  else if (ins.handler_idx == WINED3DSIH_GATHER4_PO
1715  {
1716  shader_record_sample(reg_maps, ins.src[2].reg.idx[0].offset,
1717  ins.src[3].reg.idx[0].offset, reg_maps->sampler_map.count);
1718  }
1719  else if ((ins.handler_idx == WINED3DSIH_BUFINFO && ins.src[0].reg.type == WINED3DSPR_RESOURCE)
1720  || (ins.handler_idx == WINED3DSIH_SAMPLE_INFO && ins.src[0].reg.type == WINED3DSPR_RESOURCE))
1721  {
1722  shader_record_sample(reg_maps, ins.src[0].reg.idx[0].offset,
1723  WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
1724  }
1725  else if (ins.handler_idx == WINED3DSIH_LD
1726  || ins.handler_idx == WINED3DSIH_LD2DMS
1727  || (ins.handler_idx == WINED3DSIH_LD_RAW && ins.src[1].reg.type == WINED3DSPR_RESOURCE)
1728  || (ins.handler_idx == WINED3DSIH_RESINFO && ins.src[1].reg.type == WINED3DSPR_RESOURCE))
1729  {
1730  shader_record_sample(reg_maps, ins.src[1].reg.idx[0].offset,
1731  WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
1732  }
1733  else if (ins.handler_idx == WINED3DSIH_LD_STRUCTURED
1734  && ins.src[2].reg.type == WINED3DSPR_RESOURCE)
1735  {
1736  shader_record_sample(reg_maps, ins.src[2].reg.idx[0].offset,
1737  WINED3D_SAMPLER_DEFAULT, reg_maps->sampler_map.count);
1738  }
1739 
1740  if (ins.predicate)
1741  if (!shader_record_register_usage(shader, reg_maps, &ins.predicate->reg,
1742  shader_version.type, constf_size))
1743  return WINED3DERR_INVALIDCALL;
1744 
1745  for (i = 0; i < ins.src_count; ++i)
1746  {
1747  unsigned int count = get_instr_extra_regcount(ins.handler_idx, i);
1748  struct wined3d_shader_register reg = ins.src[i].reg;
1749 
1750  if (!shader_record_register_usage(shader, reg_maps, &ins.src[i].reg,
1751  shader_version.type, constf_size))
1752  return WINED3DERR_INVALIDCALL;
1753  while (count)
1754  {
1755  ++reg.idx[0].offset;
1756  if (!shader_record_register_usage(shader, reg_maps, &reg,
1757  shader_version.type, constf_size))
1758  return WINED3DERR_INVALIDCALL;
1759  --count;
1760  }
1761 
1762  if (color0_mov)
1763  {
1764  if (ins.src[i].reg.type == WINED3DSPR_TEMP
1765  && ins.src[i].swizzle == WINED3DSP_NOSWIZZLE)
1766  {
1767  shader->u.ps.color0_mov = TRUE;
1768  shader->u.ps.color0_reg = ins.src[i].reg.idx[0].offset;
1769  }
1770  }
1771  }
1772  }
1773 
1774  prev_ins = current_ins;
1775  }
1776  reg_maps->loop_depth = max_loop_depth;
1777 
1778  if (phase)
1779  {
1780  phase->end = prev_ins;
1781  phase = NULL;
1782  }
1783 
1784  /* PS before 2.0 don't have explicit color outputs. Instead the value of
1785  * R0 is written to the render target. */
1786  if (shader_version.major < 2 && shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1787  reg_maps->rt_mask |= (1u << 0);
1788 
1789  if (input_signature->elements)
1790  {
1791  for (i = 0; i < input_signature->element_count; ++i)
1792  {
1793  if (shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
1794  {
1795  if (input_signature->elements[i].register_idx >= ARRAY_SIZE(shader->u.vs.attributes))
1796  {
1797  WARN("Invalid input signature register index %u.\n", input_signature->elements[i].register_idx);
1798  return WINED3DERR_INVALIDCALL;
1799  }
1800  }
1801  else if (shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1802  {
1803  if (input_signature->elements[i].sysval_semantic == WINED3D_SV_POSITION)
1804  reg_maps->vpos = 1;
1805  else if (input_signature->elements[i].sysval_semantic == WINED3D_SV_IS_FRONT_FACE)
1806  reg_maps->usesfacing = 1;
1807  }
1808  reg_maps->input_registers |= 1u << input_signature->elements[i].register_idx;
1809  }
1810  }
1811  else if (!input_signature->elements && reg_maps->input_registers)
1812  {
1813  unsigned int count = wined3d_popcount(reg_maps->input_registers);
1815  unsigned int i;
1816 
1817  if (!(input_signature->elements = heap_calloc(count, sizeof(*input_signature->elements))))
1818  return E_OUTOFMEMORY;
1819  input_signature->element_count = count;
1820 
1821  e = input_signature->elements;
1822  for (i = 0; i < ARRAY_SIZE(input_signature_elements); ++i)
1823  {
1824  if (!(reg_maps->input_registers & (1u << i)))
1825  continue;
1826  input_signature_elements[i].register_idx = i;
1827  *e++ = input_signature_elements[i];
1828  }
1829  }
1830 
1831  if (output_signature->elements)
1832  {
1833  if (FAILED(hr = shader_scan_output_signature(shader)))
1834  return hr;
1835  }
1836  else if (reg_maps->output_registers)
1837  {
1838  unsigned int count = wined3d_popcount(reg_maps->output_registers);
1840 
1841  if (!(output_signature->elements = heap_calloc(count, sizeof(*output_signature->elements))))
1842  return E_OUTOFMEMORY;
1843  output_signature->element_count = count;
1844 
1845  e = output_signature->elements;
1846  for (i = 0; i < ARRAY_SIZE(output_signature_elements); ++i)
1847  {
1848  if (!(reg_maps->output_registers & (1u << i)))
1849  continue;
1850  *e++ = output_signature_elements[i];
1851  }
1852  }
1853 
1854  return WINED3D_OK;
1855 }
1856 
1858 {
1859  struct wined3d_shader_indexable_temp *reg, *reg_next;
1860 
1861  heap_free(reg_maps->constf);
1862  heap_free(reg_maps->sampler_map.entries);
1863 
1865  heap_free(reg);
1866  list_init(&reg_maps->indexable_temps);
1867 
1868  heap_free(reg_maps->tgsm);
1869 }
1870 
1871 unsigned int shader_find_free_input_register(const struct wined3d_shader_reg_maps *reg_maps, unsigned int max)
1872 {
1873  DWORD map = 1u << max;
1874  map |= map - 1;
1875  map &= reg_maps->shader_version.major < 3 ? ~reg_maps->texcoord : ~reg_maps->input_registers;
1876 
1877  return wined3d_log2i(map);
1878 }
1879 
1880 static void shader_dump_global_flags(struct wined3d_string_buffer *buffer, DWORD global_flags)
1881 {
1882  if (global_flags & WINED3DSGF_REFACTORING_ALLOWED)
1883  {
1884  shader_addline(buffer, "refactoringAllowed");
1885  global_flags &= ~WINED3DSGF_REFACTORING_ALLOWED;
1886  if (global_flags)
1887  shader_addline(buffer, " | ");
1888  }
1889 
1890  if (global_flags & WINED3DSGF_FORCE_EARLY_DEPTH_STENCIL)
1891  {
1892  shader_addline(buffer, "forceEarlyDepthStencil");
1893  global_flags &= ~WINED3DSGF_FORCE_EARLY_DEPTH_STENCIL;
1894  if (global_flags)
1895  shader_addline(buffer, " | ");
1896  }
1897 
1899  {
1900  shader_addline(buffer, "enableRawAndStructuredBuffers");
1901  global_flags &= ~WINED3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS;
1902  }
1903 
1904  if (global_flags)
1905  shader_addline(buffer, "unknown_flags(%#x)", global_flags);
1906 }
1907 
1908 static void shader_dump_sync_flags(struct wined3d_string_buffer *buffer, DWORD sync_flags)
1909 {
1910  if (sync_flags & WINED3DSSF_GROUP_SHARED_MEMORY)
1911  {
1912  shader_addline(buffer, "_g");
1913  sync_flags &= ~WINED3DSSF_GROUP_SHARED_MEMORY;
1914  }
1915  if (sync_flags & WINED3DSSF_THREAD_GROUP)
1916  {
1917  shader_addline(buffer, "_t");
1918  sync_flags &= ~WINED3DSSF_THREAD_GROUP;
1919  }
1920 
1921  if (sync_flags)
1922  shader_addline(buffer, "_unknown_flags(%#x)", sync_flags);
1923 }
1924 
1926 {
1927  if (!(flags & WINED3DSI_PRECISE_XYZW))
1928  return;
1929 
1930  shader_addline(buffer, " [precise");
1931  if (flags != WINED3DSI_PRECISE_XYZW)
1932  {
1933  shader_addline(buffer, "(%s%s%s%s)",
1934  flags & WINED3DSI_PRECISE_X ? "x" : "",
1935  flags & WINED3DSI_PRECISE_Y ? "y" : "",
1936  flags & WINED3DSI_PRECISE_Z ? "z" : "",
1937  flags & WINED3DSI_PRECISE_W ? "w" : "");
1938  }
1939  shader_addline(buffer, "]");
1940 }
1941 
1942 static void shader_dump_uav_flags(struct wined3d_string_buffer *buffer, DWORD uav_flags)
1943 {
1944  if (uav_flags & WINED3DSUF_GLOBALLY_COHERENT)
1945  {
1946  shader_addline(buffer, "_glc");
1947  uav_flags &= ~WINED3DSUF_GLOBALLY_COHERENT;
1948  }
1949  if (uav_flags & WINED3DSUF_ORDER_PRESERVING_COUNTER)
1950  {
1951  shader_addline(buffer, "_opc");
1952  uav_flags &= ~WINED3DSUF_ORDER_PRESERVING_COUNTER;
1953  }
1954 
1955  if (uav_flags)
1956  shader_addline(buffer, "_unknown_flags(%#x)", uav_flags);
1957 }
1958 
1960  enum wined3d_tessellator_domain domain)
1961 {
1962  switch (domain)
1963  {
1965  shader_addline(buffer, "line");
1966  break;
1968  shader_addline(buffer, "triangle");
1969  break;
1971  shader_addline(buffer, "quad");
1972  break;
1973  default:
1974  shader_addline(buffer, "unknown_tessellator_domain(%#x)", domain);
1975  break;
1976  }
1977 }
1978 
1980  enum wined3d_tessellator_output_primitive output_primitive)
1981 {
1982  switch (output_primitive)
1983  {
1985  shader_addline(buffer, "point");
1986  break;
1988  shader_addline(buffer, "line");
1989  break;
1991  shader_addline(buffer, "triangle_cw");
1992  break;
1994  shader_addline(buffer, "triangle_ccw");
1995  break;
1996  default:
1997  shader_addline(buffer, "unknown_tessellator_output_primitive(%#x)", output_primitive);
1998  break;
1999  }
2000 }
2001 
2003  enum wined3d_tessellator_partitioning partitioning)
2004 {
2005  switch (partitioning)
2006  {
2008  shader_addline(buffer, "integer");
2009  break;
2011  shader_addline(buffer, "pow2");
2012  break;
2014  shader_addline(buffer, "fractional_odd");
2015  break;
2017  shader_addline(buffer, "fractional_even");
2018  break;
2019  default:
2020  shader_addline(buffer, "unknown_tessellator_partitioning(%#x)", partitioning);
2021  break;
2022  }
2023 }
2024 
2027 {
2028  unsigned int i;
2029 
2030  for (i = 0; i < ARRAY_SIZE(shader_input_sysval_semantic_names); ++i)
2031  {
2033  {
2035  return;
2036  }
2037  }
2038 
2039  shader_addline(buffer, "unknown_shader_input_sysval_semantic(%#x)", semantic);
2040 }
2041 
2043  const struct wined3d_shader_semantic *semantic, unsigned int flags,
2044  const struct wined3d_shader_version *shader_version)
2045 {
2046  shader_addline(buffer, "dcl");
2047 
2048  if (semantic->reg.reg.type == WINED3DSPR_SAMPLER)
2049  {
2050  switch (semantic->resource_type)
2051  {
2053  shader_addline(buffer, "_2d");
2054  break;
2055 
2057  shader_addline(buffer, "_3d");
2058  break;
2059 
2061  shader_addline(buffer, "_cube");
2062  break;
2063 
2064  default:
2065  shader_addline(buffer, "_unknown_resource_type(%#x)", semantic->resource_type);
2066  break;
2067  }
2068  }
2069  else if (semantic->reg.reg.type == WINED3DSPR_RESOURCE || semantic->reg.reg.type == WINED3DSPR_UAV)
2070  {
2071  if (semantic->reg.reg.type == WINED3DSPR_RESOURCE)
2072  shader_addline(buffer, "_resource_");
2073  else
2074  shader_addline(buffer, "_uav_");
2075  switch (semantic->resource_type)
2076  {
2078  shader_addline(buffer, "buffer");
2079  break;
2080 
2082  shader_addline(buffer, "texture1d");
2083  break;
2084 
2086  shader_addline(buffer, "texture2d");
2087  break;
2088 
2090  shader_addline(buffer, "texture2dms");
2091  break;
2092 
2094  shader_addline(buffer, "texture3d");
2095  break;
2096 
2098  shader_addline(buffer, "texturecube");
2099  break;
2100 
2102  shader_addline(buffer, "texture1darray");
2103  break;
2104 
2106  shader_addline(buffer, "texture2darray");
2107  break;
2108 
2110  shader_addline(buffer, "texture2dmsarray");
2111  break;
2112 
2114  shader_addline(buffer, "texturecubearray");
2115  break;
2116 
2117  default:
2118  shader_addline(buffer, "unknown");
2119  break;
2120  }
2121  if (semantic->reg.reg.type == WINED3DSPR_UAV)
2122  shader_dump_uav_flags(buffer, flags);
2123  switch (semantic->resource_data_type)
2124  {
2125  case WINED3D_DATA_FLOAT:
2126  shader_addline(buffer, " (float)");
2127  break;
2128 
2129  case WINED3D_DATA_INT:
2130  shader_addline(buffer, " (int)");
2131  break;
2132 
2133  case WINED3D_DATA_UINT:
2134  shader_addline(buffer, " (uint)");
2135  break;
2136 
2137  case WINED3D_DATA_UNORM:
2138  shader_addline(buffer, " (unorm)");
2139  break;
2140 
2141  case WINED3D_DATA_SNORM:
2142  shader_addline(buffer, " (snorm)");
2143  break;
2144 
2145  default:
2146  shader_addline(buffer, " (unknown)");
2147  break;
2148  }
2149  }
2150  else
2151  {
2152  /* Pixel shaders 3.0 don't have usage semantics. */
2153  if (shader_version->major < 3 && shader_version->type == WINED3D_SHADER_TYPE_PIXEL)
2154  return;
2155  else
2156  shader_addline(buffer, "_");
2157 
2158  switch (semantic->usage)
2159  {
2161  shader_addline(buffer, "position%u", semantic->usage_idx);
2162  break;
2163 
2165  shader_addline(buffer, "blend");
2166  break;
2167 
2169  shader_addline(buffer, "weight");
2170  break;
2171 
2173  shader_addline(buffer, "normal%u", semantic->usage_idx);
2174  break;
2175 
2177  shader_addline(buffer, "psize");
2178  break;
2179 
2181  if (!semantic->usage_idx)
2182  shader_addline(buffer, "color");
2183  else
2184  shader_addline(buffer, "specular%u", (semantic->usage_idx - 1));
2185  break;
2186 
2188  shader_addline(buffer, "texture%u", semantic->usage_idx);
2189  break;
2190 
2192  shader_addline(buffer, "tangent");
2193  break;
2194 
2196  shader_addline(buffer, "binormal");
2197  break;
2198 
2200  shader_addline(buffer, "tessfactor");
2201  break;
2202 
2204  shader_addline(buffer, "positionT%u", semantic->usage_idx);
2205  break;
2206 
2208  shader_addline(buffer, "fog");
2209  break;
2210 
2212  shader_addline(buffer, "depth");
2213  break;
2214 
2216  shader_addline(buffer, "sample");
2217  break;
2218 
2219  default:
2220  shader_addline(buffer, "<unknown_semantic(%#x)>", semantic->usage);
2221  FIXME("Unrecognised semantic usage %#x.\n", semantic->usage);
2222  }
2223  }
2224 }
2225 
2226 static void shader_dump_register(struct wined3d_string_buffer *buffer,
2227  const struct wined3d_shader_register *reg, const struct wined3d_shader_version *shader_version)
2228 {
2229  static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"};
2230  static const char * const misctype_reg_names[] = {"vPos", "vFace"};
2231  UINT offset = reg->idx[0].offset;
2232 
2233  switch (reg->type)
2234  {
2235  case WINED3DSPR_TEMP:
2236  shader_addline(buffer, "r");
2237  break;
2238 
2239  case WINED3DSPR_INPUT:
2240  shader_addline(buffer, "v");
2241  break;
2242 
2243  case WINED3DSPR_CONST:
2244  case WINED3DSPR_CONST2:
2245  case WINED3DSPR_CONST3:
2246  case WINED3DSPR_CONST4:
2247  shader_addline(buffer, "c");
2248  offset = shader_get_float_offset(reg->type, offset);
2249  break;
2250 
2251  case WINED3DSPR_TEXTURE: /* vs: case WINED3DSPR_ADDR */
2252  shader_addline(buffer, "%c", shader_version->type == WINED3D_SHADER_TYPE_PIXEL ? 't' : 'a');
2253  break;
2254 
2255  case WINED3DSPR_RASTOUT:
2256  shader_addline(buffer, "%s", rastout_reg_names[offset]);
2257  break;
2258 
2259  case WINED3DSPR_COLOROUT:
2260  shader_addline(buffer, "oC");
2261  break;
2262 
2263  case WINED3DSPR_DEPTHOUT:
2264  shader_addline(buffer, "oDepth");
2265  break;
2266 
2267  case WINED3DSPR_DEPTHOUTGE:
2268  shader_addline(buffer, "oDepthGE");
2269  break;
2270 
2271  case WINED3DSPR_DEPTHOUTLE:
2272  shader_addline(buffer, "oDepthLE");
2273  break;
2274 
2275  case WINED3DSPR_ATTROUT:
2276  shader_addline(buffer, "oD");
2277  break;
2278 
2279  case WINED3DSPR_TEXCRDOUT:
2280  /* Vertex shaders >= 3.0 use general purpose output registers
2281  * (WINED3DSPR_OUTPUT), which can include an address token. */
2282  if (shader_version->major >= 3)
2283  shader_addline(buffer, "o");
2284  else
2285  shader_addline(buffer, "oT");
2286  break;
2287 
2288  case WINED3DSPR_CONSTINT:
2289  shader_addline(buffer, "i");
2290  break;
2291 
2292  case WINED3DSPR_CONSTBOOL:
2293  shader_addline(buffer, "b");
2294  break;
2295 
2296  case WINED3DSPR_LABEL:
2297  shader_addline(buffer, "l");
2298  break;
2299 
2300  case WINED3DSPR_LOOP:
2301  shader_addline(buffer, "aL");
2302  break;
2303 
2304  case WINED3DSPR_SAMPLER:
2305  shader_addline(buffer, "s");
2306  break;
2307 
2308  case WINED3DSPR_MISCTYPE:
2309  if (offset > 1)
2310  {
2311  FIXME("Unhandled misctype register %u.\n", offset);
2312  shader_addline(buffer, "<unhandled misctype %#x>", offset);
2313  }
2314  else
2315  {
2316  shader_addline(buffer, "%s", misctype_reg_names[offset]);
2317  }
2318  break;
2319 
2320  case WINED3DSPR_PREDICATE:
2321  shader_addline(buffer, "p");
2322  break;
2323 
2324  case WINED3DSPR_IMMCONST:
2325  shader_addline(buffer, "l");
2326  break;
2327 
2329  shader_addline(buffer, "cb");
2330  break;
2331 
2333  shader_addline(buffer, "icb");
2334  break;
2335 
2336  case WINED3DSPR_PRIMID:
2337  shader_addline(buffer, "primID");
2338  break;
2339 
2340  case WINED3DSPR_NULL:
2341  shader_addline(buffer, "null");
2342  break;
2343 
2344  case WINED3DSPR_RASTERIZER:
2345  shader_addline(buffer, "rasterizer");
2346  break;
2347 
2348  case WINED3DSPR_RESOURCE:
2349  shader_addline(buffer, "t");
2350  break;
2351 
2352  case WINED3DSPR_UAV:
2353  shader_addline(buffer, "u");
2354  break;
2355 
2356  case WINED3DSPR_OUTPOINTID:
2357  shader_addline(buffer, "vOutputControlPointID");
2358  break;
2359 
2360  case WINED3DSPR_FORKINSTID:
2361  shader_addline(buffer, "vForkInstanceId");
2362  break;
2363 
2364  case WINED3DSPR_JOININSTID:
2365  shader_addline(buffer, "vJoinInstanceId");
2366  break;
2367 
2369  shader_addline(buffer, "vicp");
2370  break;
2371 
2373  shader_addline(buffer, "vocp");
2374  break;
2375 
2376  case WINED3DSPR_PATCHCONST:
2377  shader_addline(buffer, "vpc");
2378  break;
2379 
2380  case WINED3DSPR_TESSCOORD:
2381  shader_addline(buffer, "vDomainLocation");
2382  break;
2383 
2385  shader_addline(buffer, "g");
2386  break;
2387 
2388  case WINED3DSPR_THREADID:
2389  shader_addline(buffer, "vThreadID");
2390  break;
2391 
2393  shader_addline(buffer, "vThreadGroupID");
2394  break;
2395 
2397  shader_addline(buffer, "vThreadIDInGroup");
2398  break;
2399 
2401  shader_addline(buffer, "vThreadIDInGroupFlattened");
2402  break;
2403 
2404  case WINED3DSPR_IDXTEMP:
2405  shader_addline(buffer, "x");
2406  break;
2407 
2408  case WINED3DSPR_STREAM:
2409  shader_addline(buffer, "m");
2410  break;
2411 
2413  shader_addline(buffer, "fb");
2414  break;
2415 
2417  shader_addline(buffer, "fp");
2418  break;
2419 
2420  case WINED3DSPR_COVERAGE:
2421  shader_addline(buffer, "vCoverage");
2422  break;
2423 
2424  case WINED3DSPR_SAMPLEMASK:
2425  shader_addline(buffer, "oMask");
2426  break;
2427 
2428  case WINED3DSPR_GSINSTID:
2429  shader_addline(buffer, "vGSInstanceID");
2430  break;
2431 
2432  default:
2433  shader_addline(buffer, "<unhandled_rtype(%#x)>", reg->type);
2434  break;
2435  }
2436 
2437  if (reg->type == WINED3DSPR_IMMCONST)
2438  {
2439  shader_addline(buffer, "(");
2440  switch (reg->immconst_type)
2441  {
2443  switch (reg->data_type)
2444  {
2445  case WINED3D_DATA_FLOAT:
2446  shader_addline(buffer, "%.8e", *(const float *)reg->u.immconst_data);
2447  break;
2448  case WINED3D_DATA_INT:
2449  shader_addline(buffer, "%d", reg->u.immconst_data[0]);
2450  break;
2451  case WINED3D_DATA_RESOURCE:
2452  case WINED3D_DATA_SAMPLER:
2453  case WINED3D_DATA_UINT:
2454  shader_addline(buffer, "%u", reg->u.immconst_data[0]);
2455  break;
2456  default:
2457  shader_addline(buffer, "<unhandled data type %#x>", reg->data_type);
2458  break;
2459  }
2460  break;
2461 
2462  case WINED3D_IMMCONST_VEC4:
2463  switch (reg->data_type)
2464  {
2465  case WINED3D_DATA_FLOAT:
2466  shader_addline(buffer, "%.8e, %.8e, %.8e, %.8e",
2467  *(const float *)&reg->u.immconst_data[0], *(const float *)&reg->u.immconst_data[1],
2468  *(const float *)&reg->u.immconst_data[2], *(const float *)&reg->u.immconst_data[3]);
2469  break;
2470  case WINED3D_DATA_INT:
2471  shader_addline(buffer, "%d, %d, %d, %d",
2472  reg->u.immconst_data[0], reg->u.immconst_data[1],
2473  reg->u.immconst_data[2], reg->u.immconst_data[3]);
2474  break;
2475  case WINED3D_DATA_RESOURCE:
2476  case WINED3D_DATA_SAMPLER:
2477  case WINED3D_DATA_UINT:
2478  shader_addline(buffer, "%u, %u, %u, %u",
2479  reg->u.immconst_data[0], reg->u.immconst_data[1],
2480  reg->u.immconst_data[2], reg->u.immconst_data[3]);
2481  break;
2482  default:
2483  shader_addline(buffer, "<unhandled data type %#x>", reg->data_type);
2484  break;
2485  }
2486  break;
2487 
2488  default:
2489  shader_addline(buffer, "<unhandled immconst_type %#x>", reg->immconst_type);
2490  break;
2491  }
2492  shader_addline(buffer, ")");
2493  }
2494  else if (reg->type != WINED3DSPR_RASTOUT
2495  && reg->type != WINED3DSPR_MISCTYPE
2496  && reg->type != WINED3DSPR_NULL)
2497  {
2498  if (offset != ~0u)
2499  {
2500  shader_addline(buffer, "[");
2501  if (reg->idx[0].rel_addr)
2502  {
2503  shader_dump_src_param(buffer, reg->idx[0].rel_addr, shader_version);
2504  shader_addline(buffer, " + ");
2505  }
2506  shader_addline(buffer, "%u]", offset);
2507 
2508  if (reg->idx[1].offset != ~0u)
2509  {
2510  shader_addline(buffer, "[");
2511  if (reg->idx[1].rel_addr)
2512  {
2513  shader_dump_src_param(buffer, reg->idx[1].rel_addr, shader_version);
2514  shader_addline(buffer, " + ");
2515  }
2516  shader_addline(buffer, "%u]", reg->idx[1].offset);
2517  }
2518  }
2519 
2520  if (reg->type == WINED3DSPR_FUNCTIONPOINTER)
2521  shader_addline(buffer, "[%u]", reg->u.fp_body_idx);
2522  }
2523 }
2524 
2526  const struct wined3d_shader_dst_param *param, const struct wined3d_shader_version *shader_version)
2527 {
2528  DWORD write_mask = param->write_mask;
2529 
2530  shader_dump_register(buffer, &param->reg, shader_version);
2531 
2532  if (write_mask && write_mask != WINED3DSP_WRITEMASK_ALL)
2533  {
2534  static const char write_mask_chars[] = "xyzw";
2535 
2536  shader_addline(buffer, ".");
2537  if (write_mask & WINED3DSP_WRITEMASK_0)
2538  shader_addline(buffer, "%c", write_mask_chars[0]);
2539  if (write_mask & WINED3DSP_WRITEMASK_1)
2540  shader_addline(buffer, "%c", write_mask_chars[1]);
2541  if (write_mask & WINED3DSP_WRITEMASK_2)
2542  shader_addline(buffer, "%c", write_mask_chars[2]);
2543  if (write_mask & WINED3DSP_WRITEMASK_3)
2544  shader_addline(buffer, "%c", write_mask_chars[3]);
2545  }
2546 }
2547 
2549  const struct wined3d_shader_src_param *param, const struct wined3d_shader_version *shader_version)
2550 {
2551  enum wined3d_shader_src_modifier src_modifier = param->modifiers;
2552  DWORD swizzle = param->swizzle;
2553 
2554  if (src_modifier == WINED3DSPSM_NEG
2555  || src_modifier == WINED3DSPSM_BIASNEG
2556  || src_modifier == WINED3DSPSM_SIGNNEG
2557  || src_modifier == WINED3DSPSM_X2NEG
2558  || src_modifier == WINED3DSPSM_ABSNEG)
2559  shader_addline(buffer, "-");
2560  else if (src_modifier == WINED3DSPSM_COMP)
2561  shader_addline(buffer, "1-");
2562  else if (src_modifier == WINED3DSPSM_NOT)
2563  shader_addline(buffer, "!");
2564 
2565  if (src_modifier == WINED3DSPSM_ABS || src_modifier == WINED3DSPSM_ABSNEG)
2566  shader_addline(buffer, "abs(");
2567 
2568  shader_dump_register(buffer, &param->reg, shader_version);
2569 
2570  switch (src_modifier)
2571  {
2572  case WINED3DSPSM_NONE: break;
2573  case WINED3DSPSM_NEG: break;
2574  case WINED3DSPSM_NOT: break;
2575  case WINED3DSPSM_BIAS: shader_addline(buffer, "_bias"); break;
2576  case WINED3DSPSM_BIASNEG: shader_addline(buffer, "_bias"); break;
2577  case WINED3DSPSM_SIGN: shader_addline(buffer, "_bx2"); break;
2578  case WINED3DSPSM_SIGNNEG: shader_addline(buffer, "_bx2"); break;
2579  case WINED3DSPSM_COMP: break;
2580  case WINED3DSPSM_X2: shader_addline(buffer, "_x2"); break;
2581  case WINED3DSPSM_X2NEG: shader_addline(buffer, "_x2"); break;
2582  case WINED3DSPSM_DZ: shader_addline(buffer, "_dz"); break;
2583  case WINED3DSPSM_DW: shader_addline(buffer, "_dw"); break;
2584  case WINED3DSPSM_ABSNEG: shader_addline(buffer, ")"); break;
2585  case WINED3DSPSM_ABS: shader_addline(buffer, ")"); break;
2586  default: shader_addline(buffer, "_unknown_modifier(%#x)", src_modifier);
2587  }
2588 
2589  if (swizzle != WINED3DSP_NOSWIZZLE)
2590  {
2591  static const char swizzle_chars[] = "xyzw";
2592  DWORD swizzle_x = swizzle & 0x03;
2593  DWORD swizzle_y = (swizzle >> 2) & 0x03;
2594  DWORD swizzle_z = (swizzle >> 4) & 0x03;
2595  DWORD swizzle_w = (swizzle >> 6) & 0x03;
2596 
2597  if (swizzle_x == swizzle_y
2598  && swizzle_x == swizzle_z
2599  && swizzle_x == swizzle_w)
2600  {
2601  shader_addline(buffer, ".%c", swizzle_chars[swizzle_x]);
2602  }
2603  else
2604  {
2605  shader_addline(buffer, ".%c%c%c%c", swizzle_chars[swizzle_x], swizzle_chars[swizzle_y],
2606  swizzle_chars[swizzle_z], swizzle_chars[swizzle_w]);
2607  }
2608  }
2609 }
2610 
2611 /* Shared code in order to generate the bulk of the shader string. */
2613  const struct wined3d_shader_reg_maps *reg_maps, void *backend_ctx,
2614  const DWORD *start, const DWORD *end)
2615 {
2616  struct wined3d_device *device = shader->device;
2617  const struct wined3d_shader_frontend *fe = shader->frontend;
2618  void *fe_data = shader->frontend_data;
2619  struct wined3d_shader_version shader_version;
2620  struct wined3d_shader_parser_state state;
2621  struct wined3d_shader_instruction ins;
2622  struct wined3d_shader_tex_mx tex_mx;
2623  struct wined3d_shader_context ctx;
2624  const DWORD *ptr;
2625 
2626  /* Initialize current parsing state. */
2627  tex_mx.current_row = 0;
2628  state.current_loop_depth = 0;
2629  state.current_loop_reg = 0;
2630  state.in_subroutine = FALSE;
2631 
2632  ctx.shader = shader;
2633  ctx.gl_info = &device->adapter->gl_info;
2634  ctx.reg_maps = reg_maps;
2635  ctx.buffer = buffer;
2636  ctx.tex_mx = &tex_mx;
2637  ctx.state = &state;
2638  ctx.backend_data = backend_ctx;
2639  ins.ctx = &ctx;
2640 
2641  fe->shader_read_header(fe_data, &ptr, &shader_version);
2642  if (start)
2643  ptr = start;
2644 
2645  while (!fe->shader_is_end(fe_data, &ptr) && ptr != end)
2646  {
2647  /* Read opcode. */
2648  fe->shader_read_instruction(fe_data, &ptr, &ins);
2649 
2650  /* Unknown opcode and its parameters. */
2652  {
2653  WARN("Encountered unrecognised or invalid instruction.\n");
2654  return WINED3DERR_INVALIDCALL;
2655  }
2656 
2657  if (ins.predicate)
2658  FIXME("Predicates not implemented.\n");
2659 
2660  /* Call appropriate function for output target */
2661  device->shader_backend->shader_handle_instruction(&ins);
2662  }
2663 
2664  return WINED3D_OK;
2665 }
2666 
2668  const struct wined3d_shader_dst_param *dst)
2669 {
2670  DWORD mmask = dst->modifiers;
2671 
2672  switch (dst->shift)
2673  {
2674  case 0: break;
2675  case 13: shader_addline(buffer, "_d8"); break;
2676  case 14: shader_addline(buffer, "_d4"); break;
2677  case 15: shader_addline(buffer, "_d2"); break;
2678  case 1: shader_addline(buffer, "_x2"); break;
2679  case 2: shader_addline(buffer, "_x4"); break;
2680  case 3: shader_addline(buffer, "_x8"); break;
2681  default: shader_addline(buffer, "_unhandled_shift(%d)", dst->shift); break;
2682  }
2683 
2684  if (mmask & WINED3DSPDM_SATURATE) shader_addline(buffer, "_sat");
2685  if (mmask & WINED3DSPDM_PARTIALPRECISION) shader_addline(buffer, "_pp");
2686  if (mmask & WINED3DSPDM_MSAMPCENTROID) shader_addline(buffer, "_centroid");
2687 
2688  mmask &= ~(WINED3DSPDM_SATURATE | WINED3DSPDM_PARTIALPRECISION | WINED3DSPDM_MSAMPCENTROID);
2689  if (mmask) FIXME("Unrecognised modifier %#x.\n", mmask);
2690 }
2691 
2693  const struct wined3d_shader_primitive_type *primitive_type)
2694 {
2695  switch (primitive_type->type)
2696  {
2697  case WINED3D_PT_UNDEFINED:
2698  shader_addline(buffer, "undefined");
2699  break;
2700  case WINED3D_PT_POINTLIST:
2701  shader_addline(buffer, "pointlist");
2702  break;
2703  case WINED3D_PT_LINELIST:
2704  shader_addline(buffer, "linelist");
2705  break;
2706  case WINED3D_PT_LINESTRIP:
2707  shader_addline(buffer, "linestrip");
2708  break;
2710  shader_addline(buffer, "trianglelist");
2711  break;
2713  shader_addline(buffer, "trianglestrip");
2714  break;
2716  shader_addline(buffer, "trianglefan");
2717  break;
2719  shader_addline(buffer, "linelist_adj");
2720  break;
2722  shader_addline(buffer, "linestrip_adj");
2723  break;
2725  shader_addline(buffer, "trianglelist_adj");
2726  break;
2728  shader_addline(buffer, "trianglestrip_adj");
2729  break;
2730  case WINED3D_PT_PATCH:
2731  shader_addline(buffer, "patch%u", primitive_type->patch_vertex_count);
2732  break;
2733  default:
2734  shader_addline(buffer, "<unrecognized_primitive_type %#x>", primitive_type->type);
2735  break;
2736  }
2737 }
2738 
2740  enum wined3d_shader_interpolation_mode interpolation_mode)
2741 {
2742  switch (interpolation_mode)
2743  {
2744  case WINED3DSIM_CONSTANT:
2745  shader_addline(buffer, "constant");
2746  break;
2747  case WINED3DSIM_LINEAR:
2748  shader_addline(buffer, "linear");
2749  break;
2751  shader_addline(buffer, "linear centroid");
2752  break;
2754  shader_addline(buffer, "linear noperspective");
2755  break;
2757  shader_addline(buffer, "linear sample");
2758  break;
2760  shader_addline(buffer, "linear noperspective centroid");
2761  break;
2763  shader_addline(buffer, "linear noperspective sample");
2764  break;
2765  default:
2766  shader_addline(buffer, "<unrecognized_interpolation_mode %#x>", interpolation_mode);
2767  break;
2768  }
2769 }
2770 
2771 static void shader_trace_init(const struct wined3d_shader_frontend *fe, void *fe_data)
2772 {
2773  struct wined3d_shader_version shader_version;
2774  struct wined3d_string_buffer buffer;
2775  const char *type_prefix;
2776  const char *p, *q;
2777  const DWORD *ptr;
2778  DWORD i;
2779 
2780  if (!string_buffer_init(&buffer))
2781  {
2782  ERR("Failed to initialize string buffer.\n");
2783  return;
2784  }
2785 
2786  fe->shader_read_header(fe_data, &ptr, &shader_version);
2787 
2788  TRACE("Parsing %p.\n", ptr);
2789 
2790  switch (shader_version.type)
2791  {
2793  type_prefix = "vs";
2794  break;
2795 
2797  type_prefix = "hs";
2798  break;
2799 
2801  type_prefix = "ds";
2802  break;
2803 
2805  type_prefix = "gs";
2806  break;
2807 
2809  type_prefix = "ps";
2810  break;
2811 
2813  type_prefix = "cs";
2814  break;
2815 
2816  default:
2817  FIXME("Unhandled shader type %#x.\n", shader_version.type);
2818  type_prefix = "unknown";
2819  break;
2820  }
2821 
2822  shader_addline(&buffer, "%s_%u_%u\n", type_prefix, shader_version.major, shader_version.minor);
2823 
2824  while (!fe->shader_is_end(fe_data, &ptr))
2825  {
2826  struct wined3d_shader_instruction ins;
2827 
2828  fe->shader_read_instruction(fe_data, &ptr, &ins);
2830  {
2831  WARN("Skipping unrecognized instruction.\n");
2832  shader_addline(&buffer, "<unrecognized instruction>\n");
2833  continue;
2834  }
2835 
2837  {
2838  shader_dump_decl_usage(&buffer, &ins.declaration.semantic, ins.flags, &shader_version);
2839  shader_dump_ins_modifiers(&buffer, &ins.declaration.semantic.reg);
2840  shader_addline(&buffer, " ");
2841  shader_dump_dst_param(&buffer, &ins.declaration.semantic.reg, &shader_version);
2842  }
2844  {
2845  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2846  shader_dump_src_param(&buffer, &ins.declaration.src, &shader_version);
2847  shader_addline(&buffer, ", %s",
2848  ins.flags & WINED3DSI_INDEXED_DYNAMIC ? "dynamicIndexed" : "immediateIndexed");
2849  }
2850  else if (ins.handler_idx == WINED3DSIH_DCL_FUNCTION_BODY)
2851  {
2852  shader_addline(&buffer, "%s fb%u",
2854  }
2856  {
2857  shader_addline(&buffer, "%s ft%u = {...}",
2859  }
2860  else if (ins.handler_idx == WINED3DSIH_DCL_GLOBAL_FLAGS)
2861  {
2862  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2863  shader_dump_global_flags(&buffer, ins.flags);
2864  }
2866  {
2867  shader_addline(&buffer, "%s %.8e", shader_opcode_names[ins.handler_idx],
2869  }
2871  {
2872  shader_addline(&buffer, "%s {\n", shader_opcode_names[ins.handler_idx]);
2873  for (i = 0; i < ins.declaration.icb->vec4_count; ++i)
2874  {
2875  shader_addline(&buffer, " {0x%08x, 0x%08x, 0x%08x, 0x%08x},\n",
2876  ins.declaration.icb->data[4 * i + 0],
2877  ins.declaration.icb->data[4 * i + 1],
2878  ins.declaration.icb->data[4 * i + 2],
2879  ins.declaration.icb->data[4 * i + 3]);
2880  }
2881  shader_addline(&buffer, "}");
2882  }
2883  else if (ins.handler_idx == WINED3DSIH_DCL_INDEX_RANGE)
2884  {
2885  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2886  shader_dump_dst_param(&buffer, &ins.declaration.index_range.first_register, &shader_version);
2887  shader_addline(&buffer, " %u", ins.declaration.index_range.last_register);
2888  }
2890  {
2891  shader_addline(&buffer, "%s x[%u][%u], %u", shader_opcode_names[ins.handler_idx],
2892  ins.declaration.indexable_temp.register_idx,
2893  ins.declaration.indexable_temp.register_size,
2894  ins.declaration.indexable_temp.component_count);
2895  }
2896  else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PS)
2897  {
2898  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2899  shader_dump_interpolation_mode(&buffer, ins.flags);
2900  shader_addline(&buffer, " ");
2901  shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
2902  }
2907  {
2908  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2909  shader_dump_dst_param(&buffer, &ins.declaration.register_semantic.reg, &shader_version);
2910  shader_addline(&buffer, ", ");
2912  }
2913  else if (ins.handler_idx == WINED3DSIH_DCL_INPUT_PS_SIV)
2914  {
2915  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2916  shader_dump_interpolation_mode(&buffer, ins.flags);
2917  shader_addline(&buffer, " ");
2918  shader_dump_dst_param(&buffer, &ins.declaration.register_semantic.reg, &shader_version);
2919  shader_addline(&buffer, ", ");
2921  }
2922  else if (ins.handler_idx == WINED3DSIH_DCL_INPUT
2924  {
2925  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2926  shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
2927  }
2930  {
2931  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2933  }
2934  else if (ins.handler_idx == WINED3DSIH_DCL_INTERFACE)
2935  {
2936  shader_addline(&buffer, "%s fp[%u][%u][%u] = {...}",
2938  ins.declaration.fp.array_size, ins.declaration.fp.body_count);
2939  }
2940  else if (ins.handler_idx == WINED3DSIH_DCL_RESOURCE_RAW)
2941  {
2942  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2943  shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
2944  }
2946  {
2947  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2948  shader_dump_dst_param(&buffer, &ins.declaration.structured_resource.reg, &shader_version);
2949  shader_addline(&buffer, ", %u", ins.declaration.structured_resource.byte_stride);
2950  }
2951  else if (ins.handler_idx == WINED3DSIH_DCL_SAMPLER)
2952  {
2953  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2954  shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
2956  shader_addline(&buffer, ", comparisonMode");
2957  }
2958  else if (ins.handler_idx == WINED3DSIH_DCL_TEMPS
2965  {
2966  shader_addline(&buffer, "%s %u", shader_opcode_names[ins.handler_idx], ins.declaration.count);
2967  }
2969  {
2970  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2972  }
2974  {
2975  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2977  }
2979  {
2980  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2982  }
2983  else if (ins.handler_idx == WINED3DSIH_DCL_TGSM_RAW)
2984  {
2985  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2986  shader_dump_dst_param(&buffer, &ins.declaration.tgsm_raw.reg, &shader_version);
2987  shader_addline(&buffer, ", %u", ins.declaration.tgsm_raw.byte_count);
2988  }
2990  {
2991  shader_addline(&buffer, "%s ", shader_opcode_names[ins.handler_idx]);
2992  shader_dump_dst_param(&buffer, &ins.declaration.tgsm_structured.reg, &shader_version);
2993  shader_addline(&buffer, ", %u, %u", ins.declaration.tgsm_structured.byte_stride,
2994  ins.declaration.tgsm_structured.structure_count);
2995  }
2996  else if (ins.handler_idx == WINED3DSIH_DCL_THREAD_GROUP)
2997  {
2998  shader_addline(&buffer, "%s %u, %u, %u", shader_opcode_names[ins.handler_idx],
3002  }
3003  else if (ins.handler_idx == WINED3DSIH_DCL_UAV_RAW)
3004  {
3005  shader_addline(&buffer, "%s", shader_opcode_names[ins.handler_idx]);
3006  shader_dump_uav_flags(&buffer, ins.flags);
3007  shader_addline(&buffer, " ");
3008  shader_dump_dst_param(&buffer, &ins.declaration.dst, &shader_version);
3009  }
3011  {
3012  shader_addline(&buffer, "%s", shader_opcode_names[ins.handler_idx]);
3013  shader_dump_uav_flags(&buffer, ins.flags);
3014  shader_addline(&buffer, " ");
3015  shader_dump_dst_param(&buffer, &ins.declaration.structured_resource.reg, &shader_version);
3016  shader_addline(&buffer, ", %u", ins.declaration.structured_resource.byte_stride);
3017  }
3018  else if (ins.handler_idx == WINED3DSIH_DEF)
3019  {
3020  shader_addline(&buffer, "def c%u = %.8e, %.8e, %.8e, %.8e", shader_get_float_offset(ins.dst[0].reg.type,
3021  ins.dst[0].reg.idx[0].offset),
3022  *(const float *)&ins.src[0].reg.u.immconst_data[0],
3023  *(const float *)&ins.src[0].reg.u.immconst_data[1],
3024  *(const float *)&ins.src[0].reg.u.immconst_data[2],
3025  *(const float *)&ins.src[0].reg.u.immconst_data[3]);
3026  }
3027  else if (ins.handler_idx == WINED3DSIH_DEFI)
3028  {
3029  shader_addline(&buffer, "defi i%u = %d, %d, %d, %d", ins.dst[0].reg.idx[0].offset,
3030  ins.src[0].reg.u.immconst_data[0],
3031  ins.src[0].reg.u.immconst_data[1],
3032  ins.src[0].reg.u.immconst_data[2],
3033  ins.src[0].reg.u.immconst_data[3]);
3034  }
3035  else if (ins.handler_idx == WINED3DSIH_DEFB)
3036  {
3037  shader_addline(&buffer, "defb b%u = %s",
3038  ins.dst[0].reg.idx[0].offset, ins.src[0].reg.u.immconst_data[0] ? "true" : "false");
3039  }
3040  else
3041  {
3042  if (ins.predicate)
3043  {
3044  shader_addline(&buffer, "(");
3045  shader_dump_src_param(&buffer, ins.predicate, &shader_version);
3046  shader_addline(&buffer, ") ");
3047  }
3048 
3049  /* PixWin marks instructions with the coissue flag with a '+' */
3050  if (ins.coissue)
3051  shader_addline(&buffer, "+");
3052 
3053  shader_addline(&buffer, "%s", shader_opcode_names[ins.handler_idx]);
3054 
3055  if (ins.handler_idx == WINED3DSIH_BREAKP
3057  || ins.handler_idx == WINED3DSIH_IF
3058  || ins.handler_idx == WINED3DSIH_RETP
3059  || ins.handler_idx == WINED3DSIH_TEXKILL)
3060  {
3061  switch (ins.flags)
3062  {
3063  case WINED3D_SHADER_CONDITIONAL_OP_NZ: shader_addline(&buffer, "_nz"); break;
3064  case WINED3D_SHADER_CONDITIONAL_OP_Z: shader_addline(&buffer, "_z"); break;
3065  default: shader_addline(&buffer, "_unrecognized(%#x)", ins.flags); break;
3066  }
3067  }
3068  else if (ins.handler_idx == WINED3DSIH_IFC
3069  || ins.handler_idx == WINED3DSIH_BREAKC)
3070  {
3071  switch (ins.flags)
3072  {
3073  case WINED3D_SHADER_REL_OP_GT: shader_addline(&buffer, "_gt"); break;
3074  case WINED3D_SHADER_REL_OP_EQ: shader_addline(&buffer, "_eq"); break;
3075  case WINED3D_SHADER_REL_OP_GE: shader_addline(&buffer, "_ge"); break;
3076  case WINED3D_SHADER_REL_OP_LT: shader_addline(&buffer, "_lt"); break;
3077  case WINED3D_SHADER_REL_OP_NE: shader_addline(&buffer, "_ne"); break;
3078  case WINED3D_SHADER_REL_OP_LE: shader_addline(&buffer, "_le"); break;
3079  default: shader_addline(&buffer, "_(%u)", ins.flags);
3080  }
3081  }
3082  else if (ins.handler_idx == WINED3DSIH_TEX
3083  && shader_version.major >= 2
3084  && (ins.flags & WINED3DSI_TEXLD_PROJECT))
3085  {
3086  shader_addline(&buffer, "p");
3087  }
3088  else if (ins.handler_idx == WINED3DSIH_RESINFO && ins.flags)
3089  {
3090  switch (ins.flags)
3091  {
3092  case WINED3DSI_RESINFO_RCP_FLOAT: shader_addline(&buffer, "_rcpFloat"); break;
3093  case WINED3DSI_RESINFO_UINT: shader_addline(&buffer, "_uint"); break;
3094  default: shader_addline(&buffer, "_unrecognized(%#x)", ins.flags);
3095  }
3096  }
3097  else if (ins.handler_idx == WINED3DSIH_SAMPLE_INFO && ins.flags)
3098  {
3099  switch (ins.flags)
3100  {
3101  case WINED3DSI_SAMPLE_INFO_UINT: shader_addline(&buffer, "_uint"); break;
3102  default: shader_addline(&buffer, "_unrecognized(%#x)", ins.flags);
3103  }
3104  }
3105  else if (ins.handler_idx == WINED3DSIH_SYNC)
3106  {
3107  shader_dump_sync_flags(&buffer, ins.flags);
3108  }
3109  else
3110  {
3111  shader_dump_precise_flags(&buffer, ins.flags);
3112  }
3113 
3115  shader_addline(&buffer, "(%d,%d,%d)", ins.texel_offset.u, ins.texel_offset.v, ins.texel_offset.w);
3116 
3117  for (i = 0; i < ins.dst_count; ++i)
3118  {
3119  shader_dump_ins_modifiers(&buffer, &ins.dst[i]);
3120  shader_addline(&buffer, !i ? " " : ", ");
3121  shader_dump_dst_param(&buffer, &ins.dst[i], &shader_version);
3122  }
3123 
3124  /* Other source tokens */
3125  for (i = ins.dst_count; i < (ins.dst_count + ins.src_count); ++i)
3126  {
3127  shader_addline(&buffer, !i ? " " : ", ");
3128  shader_dump_src_param(&buffer, &ins.src[i - ins.dst_count], &shader_version);
3129  }
3130  }
3131  shader_addline(&buffer, "\n");
3132  }
3133 
3134  for (p = buffer.buffer; *p; p = q)
3135  {
3136  if (!(q = strstr(p, "\n")))
3137  q = p + strlen(p);
3138  else
3139  ++q;
3140  TRACE(" %.*s", (int)(q - p), p);
3141  }
3142 
3143  string_buffer_free(&buffer);
3144 }
3145 
3147 {
3148  if (shader->reg_maps.shader_version.type == WINED3D_SHADER_TYPE_HULL)
3149  {
3150  heap_free(shader->u.hs.phases.control_point);
3151  heap_free(shader->u.hs.phases.fork);
3152  heap_free(shader->u.hs.phases.join);
3153  }
3154  else if (shader->reg_maps.shader_version.type == WINED3D_SHADER_TYPE_GEOMETRY)
3155  {
3156  heap_free(shader->u.gs.so_desc.elements);
3157  }
3158 
3159  heap_free(shader->patch_constant_signature.elements);
3160  heap_free(shader->output_signature.elements);
3161  heap_free(shader->input_signature.elements);
3162  heap_free(shader->signature_strings);
3163  shader->device->shader_backend->shader_destroy(shader);
3165  heap_free(shader->function);
3169  list_remove(&shader->shader_list_entry);
3170 
3171  if (shader->frontend && shader->frontend_data)
3172  shader->frontend->shader_free(shader->frontend_data);
3173 }
3174 
3176 {
3180 };
3181 
3183 static void shader_none_precompile(void *shader_priv, struct wined3d_shader *shader) {}
3184 static void shader_none_select_compute(void *shader_priv, struct wined3d_context *context,
3185  const struct wined3d_state *state) {}
3188 static void shader_none_load_constants(void *shader_priv, struct wined3d_context *context,
3189  const struct wined3d_state *state) {}
3193 
3194 /* Context activation is done by the caller. */
3195 static void shader_none_select(void *shader_priv, struct wined3d_context *context,
3196  const struct wined3d_state *state)
3197 {
3198  const struct wined3d_gl_info *gl_info = context->gl_info;
3199  struct shader_none_priv *priv = shader_priv;
3200 
3201  priv->vertex_pipe->vp_enable(gl_info, !use_vs(state));
3202  priv->fragment_pipe->enable_extension(gl_info, !use_ps(state));
3203 }
3204 
3205 /* Context activation is done by the caller. */
3206 static void shader_none_disable(void *shader_priv, struct wined3d_context *context)
3207 {
3208  struct shader_none_priv *priv = shader_priv;
3209  const struct wined3d_gl_info *gl_info = context->gl_info;
3210 
3211  priv->vertex_pipe->vp_enable(gl_info, FALSE);
3212  priv->fragment_pipe->enable_extension(gl_info, FALSE);
3213 
3217  | (1u << WINED3D_SHADER_TYPE_HULL)
3220 }
3221 
3222 static HRESULT shader_none_alloc(struct wined3d_device *device, const struct wined3d_vertex_pipe_ops *vertex_pipe,
3223  const struct fragment_pipeline *fragment_pipe)
3224 {
3225  struct fragment_caps fragment_caps;
3226  void *vertex_priv, *fragment_priv;
3227  struct shader_none_priv *priv;
3228 
3229  if (!(priv = heap_alloc(sizeof(*priv))))
3230  return E_OUTOFMEMORY;
3231 
3232  if (!(vertex_priv = vertex_pipe->vp_alloc(&none_shader_backend, priv)))
3233  {
3234  ERR("Failed to initialize vertex pipe.\n");
3235  heap_free(priv);
3236  return E_FAIL;
3237  }
3238 
3239  if (!(fragment_priv = fragment_pipe->alloc_private(&none_shader_backend, priv)))
3240  {
3241  ERR("Failed to initialize fragment pipe.\n");
3242  vertex_pipe->vp_free(device);
3243  heap_free(priv);
3244  return E_FAIL;
3245  }
3246 
3247  priv->vertex_pipe = vertex_pipe;
3248  priv->fragment_pipe = fragment_pipe;
3249  fragment_pipe->get_caps(&device->adapter->gl_info, &fragment_caps);
3251 
3252  device->vertex_priv = vertex_priv;
3253  device->fragment_priv = fragment_priv;
3254  device->shader_priv = priv;
3255 
3256  return WINED3D_OK;
3257 }
3258 
3260 {
3261  struct shader_none_priv *priv = device->shader_priv;
3262 
3263  priv->fragment_pipe->free_private(device);
3264  priv->vertex_pipe->vp_free(device);
3265  heap_free(priv);
3266 }
3267 
3269 {
3270  return TRUE;
3271 }
3272 
3273 static void shader_none_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *caps)
3274 {
3275  /* Set the shader caps to 0 for the none shader backend */
3276  caps->vs_version = 0;
3277  caps->hs_version = 0;
3278  caps->ds_version = 0;
3279  caps->gs_version = 0;
3280  caps->ps_version = 0;
3281  caps->cs_version = 0;
3282  caps->vs_uniform_count = 0;
3283  caps->ps_uniform_count = 0;
3284  caps->ps_1x_max_value = 0.0f;
3285  caps->varying_count = 0;
3286  caps->wined3d_caps = 0;
3287 }
3288 
3290 {
3291  /* We "support" every possible fixup, since we don't support any shader
3292  * model, and will never have to actually sample a texture. */
3293  return TRUE;
3294 }
3295 
3296 static BOOL shader_none_has_ffp_proj_control(void *shader_priv)
3297 {
3298  struct shader_none_priv *priv = shader_priv;
3299 
3300  return priv->ffp_proj_control;
3301 }
3302 
3304 {
3322 };
3323 
3324 static HRESULT shader_set_function(struct wined3d_shader *shader, DWORD float_const_count,
3325  enum wined3d_shader_type type, unsigned int max_version)
3326 {
3327  const struct wined3d_d3d_info *d3d_info = &shader->device->adapter->d3d_info;
3328  struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
3329  const struct wined3d_shader_frontend *fe;
3330  unsigned int backend_version;
3331  HRESULT hr;
3332 
3333  TRACE("shader %p, float_const_count %u, type %#x, max_version %u.\n",
3334  shader, float_const_count, type, max_version);
3335 
3336  fe = shader->frontend;
3337  if (!(shader->frontend_data = fe->shader_init(shader->function,
3338  shader->functionLength, &shader->output_signature)))
3339  {
3340  FIXME("Failed to initialize frontend.\n");
3341  return WINED3DERR_INVALIDCALL;
3342  }
3343 
3344  /* First pass: trace shader. */
3345  if (TRACE_ON(d3d_shader))
3346  shader_trace_init(fe, shader->frontend_data);
3347 
3348  /* Second pass: figure out which registers are used, what the semantics are, etc. */
3349  if (FAILED(hr = shader_get_registers_used(shader, float_const_count)))
3350  return hr;
3351 
3352  if (reg_maps->shader_version.type != type)
3353  {
3354  WARN("Wrong shader type %d.\n", reg_maps->shader_version.type);
3355  return WINED3DERR_INVALIDCALL;
3356  }
3357  if (reg_maps->shader_version.major > max_version)
3358  {
3359  WARN("Shader version %d not supported by this D3D API version.\n", reg_maps->shader_version.major);
3360  return WINED3DERR_INVALIDCALL;
3361  }
3362  switch (type)
3363  {
3365  backend_version = d3d_info->limits.vs_version;
3366  break;
3368  backend_version = d3d_info->limits.hs_version;
3369  break;
3371  backend_version = d3d_info->limits.ds_version;
3372  break;
3374  backend_version = d3d_info->limits.gs_version;
3375  break;
3377  backend_version = d3d_info->limits.ps_version;
3378  break;
3380  backend_version = d3d_info->limits.cs_version;
3381  break;
3382  default:
3383  FIXME("No backend version-checking for this shader type.\n");
3384  backend_version = 0;
3385  }
3386  if (reg_maps->shader_version.major > backend_version)
3387  {
3388  WARN("Shader version %d.%d not supported by your GPU with the current shader backend.\n",
3389  reg_maps->shader_version.major, reg_maps->shader_version.minor);
3390  return WINED3DERR_INVALIDCALL;
3391  }
3392 
3393  return WINED3D_OK;
3394 }
3395 
3397 {
3398  ULONG refcount = InterlockedIncrement(&shader->ref);
3399 
3400  TRACE("%p increasing refcount to %u.\n", shader, refcount);
3401 
3402  return refcount;
3403 }
3404 
3405 static void wined3d_shader_init_object(void *object)
3406 {
3407  struct wined3d_shader *shader = object;
3408  struct wined3d_device *device = shader->device;
3409 
3410  list_add_head(&device->shaders, &shader->shader_list_entry);
3411 
3412  device->shader_backend->shader_precompile(device->shader_priv, shader);
3413 }
3414 
3415 static void wined3d_shader_destroy_object(void *object)
3416 {
3417  shader_cleanup(object);
3418  heap_free(object);
3419 }
3420 
3422 {
3423  ULONG refcount = InterlockedDecrement(&shader->ref);
3424 
3425  TRACE("%p decreasing refcount to %u.\n", shader, refcount);
3426 
3427  if (!refcount)
3428  {
3429  shader->parent_ops->wined3d_object_destroyed(shader->parent);
3431  }
3432 
3433  return refcount;
3434 }
3435 
3437 {
3438  TRACE("shader %p.\n", shader);
3439 
3440  return shader->parent;
3441 }
3442 
3444  void *byte_code, UINT *byte_code_size)
3445 {
3446  TRACE("shader %p, byte_code %p, byte_code_size %p.\n", shader, byte_code, byte_code_size);
3447 
3448  if (!byte_code)
3449  {
3450  *byte_code_size = shader->functionLength;
3451  return WINED3D_OK;
3452  }
3453 
3454  if (*byte_code_size < shader->functionLength)
3455  {
3456  /* MSDN claims (for d3d8 at least) that if *byte_code_size is smaller
3457  * than the required size we should write the required size and
3458  * return D3DERR_MOREDATA. That's not actually true. */
3459  return WINED3DERR_INVALIDCALL;
3460  }
3461 
3462  memcpy(byte_code, shader->function, shader->functionLength);
3463 
3464  return WINED3D_OK;
3465 }
3466 
3467 /* Set local constants for d3d8 shaders. */
3469  UINT start_idx, const float *src_data, UINT count)
3470 {
3471  UINT end_idx = start_idx + count;
3472  UINT i;
3473 
3474  TRACE("shader %p, start_idx %u, src_data %p, count %u.\n", shader, start_idx, src_data, count);
3475 
3476  if (end_idx > shader->limits->constant_float)
3477  {
3478  WARN("end_idx %u > float constants limit %u.\n",
3479  end_idx, shader->limits->constant_float);
3480  end_idx = shader->limits->constant_float;
3481  }
3482 
3483  for (i = start_idx; i < end_idx; ++i)
3484  {
3485  struct wined3d_shader_lconst *lconst;
3486  float *value;
3487 
3488  if (!(lconst = heap_alloc(sizeof(*lconst))))
3489  return E_OUTOFMEMORY;
3490 
3491  lconst->idx = i;
3492  value = (float *)lconst->value;
3493  memcpy(value, src_data + (i - start_idx) * 4 /* 4 components */, 4 * sizeof(float));
3494  list_add_head(&shader->constantsF, &lconst->entry);
3495 
3496  if (isinf(value[0]) || isnan(value[0]) || isinf(value[1]) || isnan(value[1])
3497  || isinf(value[2]) || isnan(value[2]) || isinf(value[3]) || isnan(value[3]))
3498  {
3499  shader->lconst_inf_or_nan = TRUE;
3500  }
3501  }
3502 
3503  return WINED3D_OK;
3504 }
3505 
3506 static void init_interpolation_compile_args(DWORD *interpolation_args,
3507  const struct wined3d_shader *pixel_shader, const struct wined3d_gl_info *gl_info)
3508 {
3510  || !pixel_shader || pixel_shader->reg_maps.shader_version.major < 4)
3511  {
3512  memset(interpolation_args, 0, sizeof(pixel_shader->u.ps.interpolation_mode));
3513  return;
3514  }
3515 
3516  memcpy(interpolation_args, pixel_shader->u.ps.interpolation_mode,
3517  sizeof(pixel_shader->u.ps.interpolation_mode));
3518 }
3519 
3521  WORD swizzle_map, struct vs_compile_args *args, const struct wined3d_context *context)
3522 {
3523  const struct wined3d_shader *geometry_shader = state->shader[WINED3D_SHADER_TYPE_GEOMETRY];
3524  const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
3525  const struct wined3d_shader *hull_shader = state->shader[WINED3D_SHADER_TYPE_HULL];
3526  const struct wined3d_d3d_info *d3d_info = context->d3d_info;
3527  const struct wined3d_gl_info *gl_info = context->gl_info;
3528 
3533  args->point_size = state->gl_primitive_type == GL_POINTS;
3534  args->per_vertex_point_size = shader->reg_maps.point_size;
3535  args->next_shader_type = hull_shader? WINED3D_SHADER_TYPE_HULL
3537  if (shader->reg_maps.shader_version.major >= 4)
3538  args->next_shader_input_count = hull_shader ? hull_shader->limits->packed_input
3539  : geometry_shader ? geometry_shader->limits->packed_input
3540  : pixel_shader ? pixel_shader->limits->packed_input : 0;
3541  else
3542  args->next_shader_input_count = 0;
3543  args->swizzle_map = swizzle_map;
3544  if (d3d_info->emulated_flatshading)
3546  else
3547  args->flatshading = 0;
3548 
3550  args->next_shader_type == WINED3D_SHADER_TYPE_PIXEL ? pixel_shader : NULL, gl_info);
3551 }
3552 
3553 static BOOL match_usage(BYTE usage1, BYTE usage_idx1, BYTE usage2, BYTE usage_idx2)
3554 {
3555  if (usage_idx1 != usage_idx2)
3556  return FALSE;
3557  if (usage1 == usage2)
3558  return TRUE;
3559  if (usage1 == WINED3D_DECL_USAGE_POSITION && usage2 == WINED3D_DECL_USAGE_POSITIONT)
3560  return TRUE;
3561  if (usage2 == WINED3D_DECL_USAGE_POSITION && usage1 == WINED3D_DECL_USAGE_POSITIONT)
3562  return TRUE;
3563 
3564  return FALSE;
3565 }
3566 
3568  BYTE usage_req, BYTE usage_idx_req, unsigned int *regnum)
3569 {
3570  WORD map = shader->reg_maps.input_registers;
3571  unsigned int i;
3572 
3573  for (i = 0; map; map >>= 1, ++i)
3574  {
3575  if (!(map & 1)) continue;
3576 
3577  if (match_usage(shader->u.vs.attributes[i].usage,
3578  shader->u.vs.attributes[i].usage_idx, usage_req, usage_idx_req))
3579  {
3580  *regnum = i;
3581  return TRUE;
3582  }
3583  }
3584  return FALSE;
3585 }
3586 
3588  SIZE_T *total)
3589 {
3591  unsigned int i;
3592  SIZE_T len;
3593 
3594  for (i = 0; i < signature->element_count; ++i)
3595  {
3596  e = &signature->elements[i];
3597  len = strlen(e->semantic_name);
3598  if (len >= ~(SIZE_T)0 - *total)
3599  return E_OUTOFMEMORY;
3600 
3601  *total += len + 1;
3602  }
3603  return WINED3D_OK;
3604 }
3605 
3607  const struct wined3d_shader_signature *src, char **signature_strings)
3608 {
3610  unsigned int i;
3611  SIZE_T len;
3612  char *ptr;
3613 
3614  if (!src->element_count)
3615  return WINED3D_OK;
3616 
3617  ptr = *signature_strings;
3618 
3619  dst->element_count = src->element_count;
3620  if (!(dst->elements = heap_calloc(dst->element_count, sizeof(*dst->elements))))
3621  return E_OUTOFMEMORY;
3622 
3623  for (i = 0; i < src->element_count; ++i)
3624  {
3625  e = &src->elements[i];
3626  dst->elements[i] = *e;
3627 
3628  len = strlen(e->semantic_name);
3629  memcpy(ptr, e->semantic_name, len + 1);
3630  dst->elements[i].semantic_name = ptr;
3631  ptr += len + 1;
3632  }
3633 
3634  *signature_strings = ptr;
3635 
3636  return WINED3D_OK;
3637 }
3638 
3640  const struct wined3d_shader_desc *desc, DWORD float_const_count, enum wined3d_shader_type type,
3641  void *parent, const struct wined3d_parent_ops *parent_ops)
3642 {
3643  size_t byte_code_size;
3644  SIZE_T total;
3645  HRESULT hr;
3646  char *ptr;
3647 
3648  TRACE("byte_code %p, byte_code_size %#lx, format %#x, max_version %#x.\n",
3649  desc->byte_code, (long)desc->byte_code_size, desc->format, desc->max_version);
3650 
3651  if (!(shader->frontend = shader_select_frontend(desc->format)))
3652  {
3653  FIXME("Unable to find frontend for shader.\n");
3654  return WINED3DERR_INVALIDCALL;
3655  }
3656 
3657  shader->ref = 1;
3658  shader->device = device;
3659  shader->parent = parent;
3660  shader->parent_ops = parent_ops;
3661 
3662  total = 0;
3664  return hr;
3666  return hr;
3668  return hr;
3669  if (total && !(shader->signature_strings = heap_alloc(total)))
3670  return E_OUTOFMEMORY;
3671  ptr = shader->signature_strings;
3672 
3673  if (FAILED(hr = shader_signature_copy(&shader->input_signature, &desc->input_signature, &ptr)))
3674  {
3675  heap_free(shader->signature_strings);
3676  return hr;
3677  }
3678  if (FAILED(hr = shader_signature_copy(&shader->output_signature, &desc->output_signature, &ptr)))
3679  {
3680  heap_free(shader->input_signature.elements);
3681  heap_free(shader->signature_strings);
3682  return hr;
3683  }
3685  {
3686  heap_free(shader->output_signature.elements);
3687  heap_free(shader->input_signature.elements);
3688  heap_free(shader->signature_strings);
3689  return hr;
3690  }
3691 
3692  list_init(&shader->linked_programs);
3693  list_init(&shader->constantsF);
3694  list_init(&shader->constantsB);
3695  list_init(&shader->constantsI);
3696  shader->lconst_inf_or_nan = FALSE;
3697  list_init(&shader->reg_maps.indexable_temps);
3698  list_init(&shader->shader_list_entry);
3699 
3700  byte_code_size = desc->byte_code_size;
3701  if (byte_code_size == ~(size_t)0)
3702  {
3703  const struct wined3d_shader_frontend *fe = shader->frontend;
3704  struct wined3d_shader_version shader_version;
3705  struct wined3d_shader_instruction ins;
3706  const DWORD *ptr;
3707  void *fe_data;
3708 
3709  if (!(fe_data = fe->shader_init(desc->byte_code, byte_code_size, &shader->output_signature)))
3710  {
3711  WARN("Failed to initialise frontend data.\n");
3712  shader_cleanup(shader);
3713  return WINED3DERR_INVALIDCALL;
3714  }
3715 
3716  fe->shader_read_header(fe_data, &ptr, &shader_version);
3717  while (!fe->shader_is_end(fe_data, &ptr))
3718  fe->shader_read_instruction(fe_data, &ptr, &ins);
3719 
3720  fe->shader_free(fe_data);
3721 
3722  byte_code_size = (ptr - desc->byte_code) * sizeof(*ptr);
3723  }
3724 
3725  if (desc->byte_code && byte_code_size)
3726  {
3727  if (!(shader->function = heap_alloc(byte_code_size)))
3728  {
3729  shader_cleanup(shader);
3730  return E_OUTOFMEMORY;
3731  }
3732  memcpy(shader->function, desc->byte_code, byte_code_size);
3733  shader->functionLength = byte_code_size;
3734 
3735  if (FAILED(hr = shader_set_function(shader, float_const_count, type, desc->max_version)))
3736  {
3737  WARN("Failed to set function, hr %#x.\n", hr);
3738  shader_cleanup(shader);
3739  return hr;
3740  }
3741  }
3742  else
3743  {
3744  shader->reg_maps.shader_version.type = type;
3745  shader->reg_maps.shader_version.major = 4;
3746  shader->reg_maps.shader_version.minor = 0;
3747  shader_set_limits(shader);
3748 
3749  if (FAILED(hr = shader_scan_output_signature(shader)))
3750  {
3751  shader_cleanup(shader);
3752  return hr;
3753  }
3754  }
3755 
3756  shader->load_local_constsF = shader->lconst_inf_or_nan;
3757 
3758  return hr;
3759 }
3760 
3762  const struct wined3d_shader_desc *desc, void *parent, const struct wined3d_parent_ops *parent_ops)
3763 {
3764  struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
3765  unsigned int i;
3766  HRESULT hr;
3767 
3768  if (FAILED(hr = shader_init(shader, device, desc, device->adapter->d3d_info.limits.vs_uniform_count,
3769  WINED3D_SHADER_TYPE_VERTEX, parent, parent_ops)))
3770  return hr;
3771 
3772  for (i = 0; i < shader->input_signature.element_count; ++i)
3773  {
3774  const struct wined3d_shader_signature_element *input = &shader->input_signature.elements[i];
3775 
3776  if (!(reg_maps->input_registers & (1u << input->register_idx)) || !input->semantic_name)
3777  continue;
3778 
3779  shader->u.vs.attributes[input->register_idx].usage =
3781  shader->u.vs.attributes[input->register_idx].usage_idx = input->semantic_idx;
3782  }
3783 
3784  if (reg_maps->usesrelconstF && !list_empty(&shader->constantsF))
3785  shader->load_local_constsF = TRUE;
3786 
3787  return WINED3D_OK;
3788 }
3789 
3791  const struct wined3d_shader_desc *desc, const struct wined3d_stream_output_desc *so_desc,
3792  void *parent, const struct wined3d_parent_ops *parent_ops)
3793 {
3794  struct wined3d_shader_desc shader_desc = *desc;
3795  struct wined3d_stream_output_element *elements;
3797  HRESULT hr;
3798 
3799  if (so_desc)
3800  {
3801  shader_type = shader_get_shader_type(desc);
3802  switch (shader_type)
3803  {
3805  shader_desc.byte_code = NULL;
3806  shader_desc.byte_code_size = 0;
3807  break;
3809  FIXME("Stream output not supported for %s.\n", debug_shader_type(shader_type));
3810  return E_NOTIMPL;
3811  default:
3812  break;
3813  }
3814  }
3815 
3816  if (FAILED(hr = shader_init(shader, device, &shader_desc, 0,
3817  WINED3D_SHADER_TYPE_GEOMETRY, parent, parent_ops)))
3818  return hr;
3819 
3820  if (so_desc)
3821  {
3822  if (!(elements = heap_calloc(so_desc->element_count, sizeof(*elements))))
3823  {
3824  shader_cleanup(shader);
3825  return E_OUTOFMEMORY;
3826  }
3827 
3828  shader->u.gs.so_desc = *so_desc;
3829  shader->u.gs.so_desc.elements = elements;
3830  memcpy(elements, so_desc->elements, so_desc->element_count * sizeof(*elements));
3831  }
3832 
3833  return WINED3D_OK;
3834 }
3835 
3837  struct ds_compile_args *args, const struct wined3d_context *context)
3838 {
3839  const struct wined3d_shader *geometry_shader = state->shader[WINED3D_SHADER_TYPE_GEOMETRY];
3840  const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
3841  const struct wined3d_shader *hull_shader = state->shader[WINED3D_SHADER_TYPE_HULL];
3842  const struct wined3d_gl_info *gl_info = context->gl_info;
3843 
3844  args->tessellator_output_primitive = hull_shader->u.hs.tessellator_output_primitive;
3845  args->tessellator_partitioning = hull_shader->u.hs.tessellator_partitioning;
3846 
3847  args->output_count = geometry_shader ? geometry_shader->limits->packed_input
3848  : pixel_shader ? pixel_shader->limits->packed_input : shader->limits->packed_output;
3850 
3851  args->render_offscreen = context->render_offscreen;
3852 
3854  args->next_shader_type == WINED3D_SHADER_TYPE_PIXEL ? pixel_shader : NULL, gl_info);
3855 
3856  args->padding = 0;
3857 }
3858 
3860  struct gs_compile_args *args, const struct wined3d_context *context)
3861 {
3862  const struct wined3d_shader *pixel_shader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
3863  const struct wined3d_gl_info *gl_info = context->gl_info;
3864 
3865  args->output_count = pixel_shader ? pixel_shader->limits->packed_input : shader->limits->packed_output;
3866 
3867  if (!(args->primitive_type = shader->u.gs.input_type))
3869 
3870  init_interpolation_compile_args(args->interpolation_mode, pixel_shader, gl_info);
3871 }
3872 
3874  BOOL position_transformed, struct ps_compile_args *args, const struct wined3d_context *context)
3875 {
3876  const struct wined3d_d3d_info *d3d_info = context->d3d_info;
3877  const struct wined3d_gl_info *gl_info = context->gl_info;
3878  const struct wined3d_texture *texture;
3879  unsigned int i;
3880 
3881  memset(args, 0, sizeof(*args)); /* FIXME: Make sure all bits are set. */
3882  if (!gl_info->supported[ARB_FRAMEBUFFER_SRGB] && needs_srgb_write(context, state, state->fb))
3883  {
3884  static unsigned int warned = 0;
3885 
3886  args->srgb_correction = 1;
3887  if (state->render_states[WINED3D_RS_ALPHABLENDENABLE] && !warned++)
3888  WARN("Blending into a sRGB render target with no GL_ARB_framebuffer_sRGB "
3889  "support, expect rendering artifacts.\n");
3890  }
3891 
3892  if (shader->reg_maps.shader_version.major == 1
3893  && shader->reg_maps.shader_version.minor <= 3)
3894  {
3895  for (i = 0; i < shader->limits->sampler; ++i)
3896  {
3898 
3899  if (flags & WINED3D_TTFF_PROJECTED)
3900  {
3901  DWORD tex_transform = flags & ~WINED3D_TTFF_PROJECTED;
3902 
3903  if (!state->shader[WINED3D_SHADER_TYPE_VERTEX])
3904  {
3905  enum wined3d_shader_resource_type resource_type = shader->reg_maps.resource_info[i].type;
3906  unsigned int j;
3907  unsigned int index = state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX];
3908  DWORD max_valid = WINED3D_TTFF_COUNT4;
3909 
3910  for (j = 0; j < state->vertex_declaration->element_count; ++j)
3911  {
3913  &state->vertex_declaration->elements[j];
3914 
3915  if (element->usage == WINED3D_DECL_USAGE_TEXCOORD
3916  && element->usage_idx == index)
3917  {
3918  max_valid = element->format->component_count;
3919  break;
3920  }
3921  }
3922  if (!tex_transform || tex_transform > max_valid)
3923  {
3924  WARN("Fixing up projected texture transform flags from %#x to %#x.\n",
3925  tex_transform, max_valid);
3926  tex_transform = max_valid;
3927  }
3928  if ((resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_1D && tex_transform > WINED3D_TTFF_COUNT1)
3929  || (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_2D
3930  && tex_transform > WINED3D_TTFF_COUNT2)
3931  || (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_3D
3932  && tex_transform > WINED3D_TTFF_COUNT3))
3933  tex_transform |= WINED3D_PSARGS_PROJECTED;
3934  else
3935  {
3936  WARN("Application requested projected texture with unsuitable texture coordinates.\n");
3937  WARN("(texture unit %u, transform flags %#x, sampler type %u).\n",
3938  i, tex_transform, resource_type);
3939  }
3940  }
3941  else
3943 
3944  args->tex_transform |= tex_transform << i * WINED3D_PSARGS_TEXTRANSFORM_SHIFT;
3945  }
3946  }
3947  }
3948  if (shader->reg_maps.shader_version.major == 1
3949  && shader->reg_maps.shader_version.minor <= 4)
3950  {
3951  for (i = 0; i < shader->limits->sampler; ++i)
3952  {
3953  const struct wined3d_texture *texture = state->textures[i];
3954 
3955  if (!shader->reg_maps.resource_info[i].type)
3956  continue;
3957 
3958  /* Treat unbound textures as 2D. The dummy texture will provide
3959  * the proper sample value. The tex_types bitmap defaults to
3960  * 2D because of the memset. */
3961  if (!texture)
3962  continue;
3963 
3964  switch (texture->target)
3965  {
3966  /* RECT textures are distinguished from 2D textures via np2_fixup */
3967  default:
3968  break;
3969 
3970  case GL_TEXTURE_3D:
3972  break;
3973 
3976  break;
3977  }
3978  }
3979  }
3980 
3981  if (shader->reg_maps.shader_version.major >= 4)
3982  {
3983  /* In SM4+ we use dcl_sampler in order to determine if we should use shadow sampler. */
3984  args->shadow = 0;
3985  for (i = 0 ; i < MAX_FRAGMENT_SAMPLERS; ++i)
3986  args->color_fixup[i] = COLOR_FIXUP_IDENTITY;
3987  args->np2_fixup = 0;
3988  }
3989  else
3990  {
3991  for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i)
3992  {
3993  if (!shader->reg_maps.resource_info[i].type)
3994  continue;
3995 
3996  texture = state->textures[i];
3997  if (!texture)
3998  {
4000  continue;
4001  }
4002  if (can_use_texture_swizzle(gl_info, texture->resource.format))
4004  else
4005  args->color_fixup[i] = texture->resource.format->color_fixup;
4006 
4007  if (texture->resource.format_flags & WINED3DFMT_FLAG_SHADOW)
4008  args->shadow |= 1u << i;
4009 
4010  /* Flag samplers that need NP2 texcoord fixup. */
4011  if (!(texture->flags & WINED3D_TEXTURE_POW2_MAT_IDENT))
4012  args->np2_fixup |= (1u << i);
4013  }
4014  }
4015 
4016  if (shader->reg_maps.shader_version.major >= 3)
4017  {
4018  if (position_transformed)
4019  args->vp_mode = WINED3D_VP_MODE_NONE;
4020  else if (use_vs(state))
4022  else
4023  args->vp_mode = WINED3D_VP_MODE_FF;
4024  args->fog = WINED3D_FFP_PS_FOG_OFF;
4025  }
4026  else
4027  {
4029  if (state->render_states[WINED3D_RS_FOGENABLE])
4030  {
4031  switch (state->render_states[WINED3D_RS_FOGTABLEMODE])
4032  {
4033  case WINED3D_FOG_NONE:
4034  if (position_transformed || use_vs(state))
4035  {
4037  break;
4038  }
4039 
4040  switch (state->render_states[WINED3D_RS_FOGVERTEXMODE])
4041  {
4042  case WINED3D_FOG_NONE: /* Fall through. */
4043  case WINED3D_FOG_LINEAR: args->fog = WINED3D_FFP_PS_FOG_LINEAR; break;
4044  case WINED3D_FOG_EXP: args->fog = WINED3D_FFP_PS_FOG_EXP; break;
4045  case WINED3D_FOG_EXP2: args->fog = WINED3D_FFP_PS_FOG_EXP2; break;
4046  }
4047  break;
4048 
4049  case WINED3D_FOG_LINEAR: args->fog = WINED3D_FFP_PS_FOG_LINEAR; break;
4050  case WINED3D_FOG_EXP: args->fog = WINED3D_FFP_PS_FOG_EXP; break;
4051  case WINED3D_FOG_EXP2: args->fog = WINED3D_FFP_PS_FOG_EXP2; break;
4052  }
4053  }
4054  else
4055  {
4056  args->fog = WINED3D_FFP_PS_FOG_OFF;
4057  }
4058  }
4059 
4060  if (context->d3d_info->limits.varying_count < wined3d_max_compat_varyings(context->gl_info))
4061  {
4062  const struct wined3d_shader *vs = state->shader[WINED3D_SHADER_TYPE_VERTEX];
4063 
4064  args->texcoords_initialized = 0;
4065  for (i = 0; i < MAX_TEXTURES; ++i)
4066  {
4067  if (vs)
4068  {
4069  if (state->shader[WINED3D_SHADER_TYPE_VERTEX]->reg_maps.output_registers & (1u << i))
4070  args->texcoords_initialized |= 1u << i;
4071  }
4072  else
4073  {
4074  const struct wined3d_stream_info *si = &context->stream_info;
4075  unsigned int coord_idx = state->texture_states[i][WINED3D_TSS_TEXCOORD_INDEX];
4076 
4079  || (coord_idx < MAX_TEXTURES && (si->use_map & (1u << (WINED3D_FFP_TEXCOORD0 + coord_idx)))))
4080  args->texcoords_initialized |= 1u << i;
4081  }
4082  }
4083  }
4084  else
4085  {
4086  args->texcoords_initialized = (1u << MAX_TEXTURES) - 1;
4087  }
4088 
4090  && state->gl_primitive_type == GL_POINTS;
4091 
4092  if (gl_info->supported[WINED3D_GL_LEGACY_CONTEXT])
4093  args->alpha_test_func = WINED3D_CMP_ALWAYS - 1;
4094  else
4097  : WINED3D_CMP_ALWAYS) - 1;
4098 
4099  if (d3d_info->emulated_flatshading)
4101 
4102  args->render_offscreen = shader->reg_maps.vpos && gl_info->supported[ARB_FRAGMENT_COORD_CONVENTIONS]
4103  ? context->render_offscreen : 0;
4104 
4105  args->dual_source_blend = wined3d_dualblend_enabled(state, gl_info);
4106 }
4107 
4109  const struct wined3d_shader_desc *desc, void *parent, const struct wined3d_parent_ops *parent_ops)
4110 {
4111  const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4112  unsigned int i, highest_reg_used = 0, num_regs_used = 0;
4113  HRESULT hr;
4114 
4115  if (FAILED(hr = shader_init(shader, device, desc, device->adapter->d3d_info.limits.ps_uniform_count,
4116  WINED3D_SHADER_TYPE_PIXEL, parent, parent_ops)))
4117  return hr;
4118 
4119  for (i = 0; i < MAX_REG_INPUT; ++i)
4120  {
4121  if (shader->u.ps.input_reg_used & (1u << i))
4122  {
4123  ++num_regs_used;
4124  highest_reg_used = i;
4125  }
4126  }
4127 
4128  /* Don't do any register mapping magic if it is not needed, or if we can't
4129  * achieve anything anyway */
4130  if (highest_reg_used < (gl_info->limits.glsl_varyings / 4)
4131  || num_regs_used > (gl_info->limits.glsl_varyings / 4)
4132  || shader->reg_maps.shader_version.major >= 4)
4133  {
4134  if (num_regs_used > (gl_info->limits.glsl_varyings / 4))
4135  {
4136  /* This happens with relative addressing. The input mapper function
4137  * warns about this if the higher registers are declared too, so
4138  * don't write a FIXME here */
4139  WARN("More varying registers used than supported\n");
4140  }
4141 
4142  for (i = 0; i < MAX_REG_INPUT; ++i)
4143  {
4144  shader->u.ps.input_reg_map[i] = i;
4145  }
4146 
4147  shader->u.ps.declared_in_count = highest_reg_used + 1;
4148  }
4149  else
4150  {
4151  shader->u.ps.declared_in_count = 0;
4152  for (i = 0; i < MAX_REG_INPUT; ++i)
4153  {
4154  if (shader->u.ps.input_reg_used & (1u << i))
4155  shader->u.ps.input_reg_map[i] = shader->u.ps.declared_in_count++;
4156  else shader->u.ps.input_reg_map[i] = ~0U;
4157  }
4158  }
4159 
4160  return WINED3D_OK;
4161 }
4162 
4164 {
4165  struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
4166  struct wined3d_shader_resource_info *resource_info = reg_maps->resource_info;
4167  unsigned int i;
4168 
4169  if (reg_maps->shader_version.major != 1) return;
4170 
4171  for (i = 0; i < shader->limits->sampler; ++i)
4172  {
4173  /* We don't sample from this sampler. */
4174  if (!resource_info[i].type)
4175  continue;
4176 
4177  switch ((tex_types >> i * WINED3D_PSARGS_TEXTYPE_SHIFT) & WINED3D_PSARGS_TEXTYPE_MASK)
4178  {
4179  case WINED3D_SHADER_TEX_2D:
4180  resource_info[i].type = WINED3D_SHADER_RESOURCE_TEXTURE_2D;
4181  break;
4182 
4183  case WINED3D_SHADER_TEX_3D:
4184  resource_info[i].type = WINED3D_SHADER_RESOURCE_TEXTURE_3D;
4185  break;
4186 
4188  resource_info[i].type = WINED3D_SHADER_RESOURCE_TEXTURE_CUBE;
4189  break;
4190  }
4191  }
4192 }
4193 
4195  void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
4196 {
4197  struct wined3d_shader *object;
4198  HRESULT hr;
4199 
4200  TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
4201  device, desc, parent, parent_ops, shader);
4202 
4203  if (!desc->byte_code)
4204  return WINED3DERR_INVALIDCALL;
4205 
4206  if (!(object = heap_alloc_zero(sizeof(*object))))
4207  return E_OUTOFMEMORY;
4208 
4209  if (FAILED(hr = shader_init(object, device, desc, 0, WINED3D_SHADER_TYPE_COMPUTE, parent, parent_ops)))
4210  {
4211  WARN("Failed to initialize compute shader, hr %#x.\n", hr);
4212  heap_free(object);
4213  return hr;
4214  }
4215 
4217 
4218  TRACE("Created compute shader %p.\n", object);
4219  *shader = object;
4220 
4221  return WINED3D_OK;
4222 }
4223 
4225  void *parent, const struct wined3d_parent_ops *parent_ops, struct wined3d_shader **shader)
4226 {
4227  struct wined3d_shader *object;
4228  HRESULT hr;
4229 
4230  TRACE("device %p, desc %p, parent %p, parent_ops %p, shader %p.\n",
4231  device, desc, parent, parent_ops, shader);
4232 
4233  if (!desc->byte_code)
4234  return WINED3DERR_INVALIDCALL;
4235 
4236  if (!(object = heap_alloc_zero(sizeof(*object))))
4237  return E_OUTOFMEMORY;
4238 
4239  if (FAILED(hr = shader_init(object, device, desc, 0, WINED3D_SHADER_TYPE_DOMAIN, parent, parent_ops)))
4240  {
4241  WARN("Failed to initialize domain shader, hr %#x.\n", hr);
4242  heap_free(object);
4243  return hr;
4244  }
4245 
4247 
4248  TRACE("Created domain shader %p.\n", object);
4249  *shader = object;
4250 
4251  return WINED3D_OK;
4252 }
4253 
4255  const struct wined3d_stream_output_desc *so_desc, void *