ReactOS  0.4.12-dev-90-g2e2e63e
arb_program_shader.c
Go to the documentation of this file.
1 /*
2  * Pixel and vertex shaders implementation using ARB_vertex_program
3  * and ARB_fragment_program GL extensions.
4  *
5  * Copyright 2002-2003 Jason Edmeades
6  * Copyright 2002-2003 Raphael Junqueira
7  * Copyright 2004 Christian Costa
8  * Copyright 2005 Oliver Stieber
9  * Copyright 2006 Ivan Gyurdiev
10  * Copyright 2006 Jason Green
11  * Copyright 2006 Henri Verbeet
12  * Copyright 2007-2011, 2013-2014 Stefan Dösinger for CodeWeavers
13  * Copyright 2009 Henri Verbeet for CodeWeavers
14  *
15  * This library is free software; you can redistribute it and/or
16  * modify it under the terms of the GNU Lesser General Public
17  * License as published by the Free Software Foundation; either
18  * version 2.1 of the License, or (at your option) any later version.
19  *
20  * This library is distributed in the hope that it will be useful,
21  * but WITHOUT ANY WARRANTY; without even the implied warranty of
22  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23  * Lesser General Public License for more details.
24  *
25  * You should have received a copy of the GNU Lesser General Public
26  * License along with this library; if not, write to the Free Software
27  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
28  */
29 
30 #include "config.h"
31 #include "wine/port.h"
32 
33 #include <stdio.h>
34 
35 #include "wined3d_private.h"
36 
37 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
38 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants);
41 
43 {
44  return type == WINED3D_SHADER_TYPE_PIXEL;
45 }
46 
48 {
49  return type == WINED3D_SHADER_TYPE_VERTEX;
50 }
51 
52 static const char *get_line(const char **ptr)
53 {
54  const char *p, *q;
55 
56  p = *ptr;
57  if (!(q = strstr(p, "\n")))
58  {
59  if (!*p) return NULL;
60  *ptr += strlen(p);
61  return p;
62  }
63  *ptr = q + 1;
64 
65  return p;
66 }
67 
69 {
75 
77 };
78 
80 {
81  if (shader != WINED3D_SHADER_TYPE_VERTEX && shader != WINED3D_SHADER_TYPE_PIXEL)
82  {
83  ERR("Unsupported shader type '%s'.\n", debug_shader_type(shader));
84  return "bad";
85  }
86 
87  if (shader == WINED3D_SHADER_TYPE_PIXEL)
88  {
89  switch (value)
90  {
91  case ARB_ZERO: return "ps_helper_const.x";
92  case ARB_ONE: return "ps_helper_const.y";
93  case ARB_TWO: return "coefmul.x";
94  case ARB_0001: return "ps_helper_const.xxxy";
95  case ARB_EPS: return "ps_helper_const.z";
96  default: break;
97  }
98  }
99  else
100  {
101  switch (value)
102  {
103  case ARB_ZERO: return "helper_const.x";
104  case ARB_ONE: return "helper_const.y";
105  case ARB_TWO: return "helper_const.z";
106  case ARB_EPS: return "helper_const.w";
107  case ARB_0001: return "helper_const.xxxy";
108  case ARB_VS_REL_OFFSET: return "rel_addr_const.y";
109  }
110  }
111  FIXME("Unmanaged %s shader helper constant requested: %u.\n",
112  shader == WINED3D_SHADER_TYPE_PIXEL ? "pixel" : "vertex", value);
113  switch (value)
114  {
115  case ARB_ZERO: return "0.0";
116  case ARB_ONE: return "1.0";
117  case ARB_TWO: return "2.0";
118  case ARB_0001: return "{0.0, 0.0, 0.0, 1.0}";
119  case ARB_EPS: return "1e-8";
120  default: return "bad";
121  }
122 }
123 
124 static inline BOOL ffp_clip_emul(const struct wined3d_context *context)
125 {
126  return context->lowest_disabled_stage < 7;
127 }
128 
129 /* ARB_program_shader private data */
130 
132 {
133  struct list entry;
134  enum
135  {
136  IF,
140  } type;
143  union
144  {
145  unsigned int loop;
146  unsigned int ifc;
147  } no;
150 };
151 
153 {
154  struct ps_np2fixup_info super;
155  /* For ARB we need an offset value:
156  * With both GLSL and ARB mode the NP2 fixup information (the texture dimensions) are stored in a
157  * consecutive way (GLSL uses a uniform array). Since ARB doesn't know the notion of a "standalone"
158  * array we need an offset to the index inside the program local parameter array. */
160 };
161 
163 {
164  struct ps_compile_args super;
166  WORD clip; /* only a boolean, use a WORD for alignment */
167  unsigned char loop_ctrl[WINED3D_MAX_CONSTS_I][3];
168 };
169 
171 {
172  unsigned char texunit;
174 };
175 
177 {
179  struct arb_ps_np2fixup_info np2fixup_info;
180  struct stb_const_desc bumpenvmatconst[MAX_TEXTURES];
181  struct stb_const_desc luminanceconst[MAX_TEXTURES];
185  unsigned char numbumpenvmatconsts;
187 };
188 
190 {
191  struct vs_compile_args super;
192  union
193  {
194  struct
195  {
197  unsigned char clip_texcoord;
198  unsigned char clipplane_mask;
199  } boolclip;
201  } clip;
203  union
204  {
205  unsigned char samplers[4];
207  } vertex;
208  unsigned char loop_ctrl[WINED3D_MAX_CONSTS_I][3];
209 };
210 
212 {
219 };
220 
222 {
224  struct list entry;
225 };
226 
228 {
229  char addr_reg[20];
230  enum
231  {
232  /* plain GL_ARB_vertex_program or GL_ARB_fragment_program */
234  /* GL_NV_vertex_program2_option or GL_NV_fragment_program_option */
236  /* GL_NV_vertex_program3 or GL_NV_fragment_program2 */
237  NV3
238  } target_version;
239 
245  struct list control_frames;
246  struct list record;
249  unsigned int num_loops, loop_depth, num_ifcs;
250  int aL;
252 
253  unsigned int vs_clipplanes;
256 
257  /* For 3.0 vertex shaders */
258  const char *vs_output[MAX_REG_OUTPUT];
259  /* For 2.x and earlier vertex shaders */
260  const char *texcrd_output[8], *color_output[2], *fog_output;
261 
262  /* 3.0 pshader input for compatibility with fixed function */
263  const char *ps_input[MAX_REG_INPUT];
264 };
265 
267 {
270  struct wine_rb_entry entry;
271 };
272 
275  UINT num_gl_shaders, shader_array_size;
279 };
280 
283  UINT num_gl_shaders, shader_array_size;
285 };
286 
288 {
294  struct wine_rb_tree fragment_shaders;
297 
298  struct wine_rb_tree signature_tree;
300 
301  unsigned int highest_dirty_ps_const, highest_dirty_vs_const;
302  char vshader_const_dirty[WINED3D_MAX_VS_CONSTS_F];
303  char pshader_const_dirty[WINED3D_MAX_PS_CONSTS_F];
305 
309 };
310 
311 /* Context activation for state handlers is done by the caller. */
312 
313 static BOOL need_rel_addr_const(const struct arb_vshader_private *shader_data,
314  const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info)
315 {
316  if (shader_data->rel_offset) return TRUE;
317  if (!reg_maps->usesmova) return FALSE;
318  return !gl_info->supported[NV_VERTEX_PROGRAM2_OPTION];
319 }
320 
321 /* Returns TRUE if result.clip from GL_NV_vertex_program2 should be used and FALSE otherwise */
322 static inline BOOL use_nv_clip(const struct wined3d_gl_info *gl_info)
323 {
324  return gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]
325  && !(gl_info->quirks & WINED3D_QUIRK_NV_CLIP_BROKEN);
326 }
327 
328 static BOOL need_helper_const(const struct arb_vshader_private *shader_data,
329  const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info)
330 {
331  if (need_rel_addr_const(shader_data, reg_maps, gl_info)) return TRUE;
332  if (!gl_info->supported[NV_VERTEX_PROGRAM]) return TRUE; /* Need to init colors. */
333  if (gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT) return TRUE; /* Load the immval offset. */
334  if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) return TRUE; /* Have to init texcoords. */
335  if (!use_nv_clip(gl_info)) return TRUE; /* Init the clip texcoord */
336  if (reg_maps->usesnrm) return TRUE; /* 0.0 */
337  if (reg_maps->usespow) return TRUE; /* EPS, 0.0 and 1.0 */
338  if (reg_maps->fog) return TRUE; /* Clamping fog coord, 0.0 and 1.0 */
339  return FALSE;
340 }
341 
342 static unsigned int reserved_vs_const(const struct arb_vshader_private *shader_data,
343  const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info)
344 {
345  unsigned int ret = 1;
346  /* We use one PARAM for the pos fixup, and in some cases one to load
347  * some immediate values into the shader. */
348  if (need_helper_const(shader_data, reg_maps, gl_info)) ++ret;
349  if (need_rel_addr_const(shader_data, reg_maps, gl_info)) ++ret;
350  return ret;
351 }
352 
353 /* Loads floating point constants into the currently set ARB_vertex/fragment_program.
354  * When constant_list == NULL, it will load all the constants.
355  *
356  * @target_type should be either GL_VERTEX_PROGRAM_ARB (for vertex shaders)
357  * or GL_FRAGMENT_PROGRAM_ARB (for pixel shaders)
358  */
359 /* Context activation is done by the caller. */
360 static unsigned int shader_arb_load_constants_f(const struct wined3d_shader *shader,
361  const struct wined3d_gl_info *gl_info, GLuint target_type, unsigned int max_constants,
362  const struct wined3d_vec4 *constants, char *dirty_consts)
363 {
364  struct wined3d_shader_lconst *lconst;
365  unsigned int ret, i, j;
366 
367  if (TRACE_ON(d3d_constants))
368  {
369  for (i = 0; i < max_constants; ++i)
370  {
371  if (!dirty_consts[i])
372  continue;
373  TRACE_(d3d_constants)("Loading constant %u: %s.\n", i, debug_vec4(&constants[i]));
374  }
375  }
376 
377  i = 0;
378 
379  /* In 1.X pixel shaders constants are implicitly clamped in the range [-1;1] */
380  if (target_type == GL_FRAGMENT_PROGRAM_ARB && shader->reg_maps.shader_version.major == 1)
381  {
382  float lcl_const[4];
383  /* ps 1.x supports only 8 constants, clamp only those. When switching between 1.x and higher
384  * shaders, the first 8 constants are marked dirty for reload
385  */
386  for (; i < min(8, max_constants); ++i)
387  {
388  if (!dirty_consts[i])
389  continue;
390  dirty_consts[i] = 0;
391 
392  if (constants[i].x > 1.0f)
393  lcl_const[0] = 1.0f;
394  else if (constants[i].x < -1.0f)
395  lcl_const[0] = -1.0f;
396  else
397  lcl_const[0] = constants[i].x;
398 
399  if (constants[i].y > 1.0f)
400  lcl_const[1] = 1.0f;
401  else if (constants[i].y < -1.0f)
402  lcl_const[1] = -1.0f;
403  else
404  lcl_const[1] = constants[i].y;
405 
406  if (constants[i].z > 1.0f)
407  lcl_const[2] = 1.0f;
408  else if (constants[i].z < -1.0f)
409  lcl_const[2] = -1.0f;
410  else
411  lcl_const[2] = constants[i].z;
412 
413  if (constants[i].w > 1.0f)
414  lcl_const[3] = 1.0f;
415  else if (constants[i].w < -1.0f)
416  lcl_const[3] = -1.0f;
417  else
418  lcl_const[3] = constants[i].w;
419 
420  GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, lcl_const));
421  }
422 
423  /* If further constants are dirty, reload them without clamping.
424  *
425  * The alternative is not to touch them, but then we cannot reset the dirty constant count
426  * to zero. That's bad for apps that only use PS 1.x shaders, because in that case the code
427  * above would always re-check the first 8 constants since max_constant remains at the init
428  * value
429  */
430  }
431 
433  {
434  /* TODO: Benchmark if we're better of with finding the dirty constants ourselves,
435  * or just reloading *all* constants at once
436  *
437  GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, i, max_constants, constants + (i * 4)));
438  */
439  for (; i < max_constants; ++i)
440  {
441  if (!dirty_consts[i])
442  continue;
443 
444  /* Find the next block of dirty constants */
445  dirty_consts[i] = 0;
446  j = i;
447  for (++i; (i < max_constants) && dirty_consts[i]; ++i)
448  {
449  dirty_consts[i] = 0;
450  }
451 
452  GL_EXTCALL(glProgramEnvParameters4fvEXT(target_type, j, i - j, &constants[j].x));
453  }
454  }
455  else
456  {
457  for (; i < max_constants; ++i)
458  {
459  if (dirty_consts[i])
460  {
461  dirty_consts[i] = 0;
462  GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, i, &constants[i].x));
463  }
464  }
465  }
466  checkGLcall("glProgramEnvParameter4fvARB()");
467 
468  /* Load immediate constants */
469  if (shader->load_local_constsF)
470  {
471  if (TRACE_ON(d3d_shader))
472  {
473  LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
474  {
475  GLfloat* values = (GLfloat*)lconst->value;
476  TRACE_(d3d_constants)("Loading local constants %i: %f, %f, %f, %f\n", lconst->idx,
477  values[0], values[1], values[2], values[3]);
478  }
479  }
480  /* Immediate constants are clamped for 1.X shaders at loading times */
481  ret = 0;
482  LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
483  {
484  dirty_consts[lconst->idx] = 1; /* Dirtify so the non-immediate constant overwrites it next time */
485  ret = max(ret, lconst->idx + 1);
486  GL_EXTCALL(glProgramEnvParameter4fvARB(target_type, lconst->idx, (GLfloat*)lconst->value));
487  }
488  checkGLcall("glProgramEnvParameter4fvARB()");
489  return ret; /* The loaded immediate constants need reloading for the next shader */
490  } else {
491  return 0; /* No constants are dirty now */
492  }
493 }
494 
495 /* Loads the texture dimensions for NP2 fixup into the currently set
496  * ARB_[vertex/fragment]_programs. */
498  const struct wined3d_gl_info *gl_info, const struct wined3d_state *state)
499 {
500  GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS];
501  WORD active = fixup->super.active;
502  UINT i;
503 
504  if (!active)
505  return;
506 
507  for (i = 0; active; active >>= 1, ++i)
508  {
509  const struct wined3d_texture *tex = state->textures[i];
510  unsigned char idx = fixup->super.idx[i];
511  GLfloat *tex_dim = &np2fixup_constants[(idx >> 1) * 4];
512 
513  if (!(active & 1))
514  continue;
515 
516  if (!tex)
517  {
518  ERR("Nonexistent texture is flagged for NP2 texcoord fixup.\n");
519  continue;
520  }
521 
522  if (idx % 2)
523  {
524  tex_dim[2] = tex->pow2_matrix[0];
525  tex_dim[3] = tex->pow2_matrix[5];
526  }
527  else
528  {
529  tex_dim[0] = tex->pow2_matrix[0];
530  tex_dim[1] = tex->pow2_matrix[5];
531  }
532  }
533 
534  for (i = 0; i < fixup->super.num_consts; ++i)
535  {
536  GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
537  fixup->offset + i, &np2fixup_constants[i * 4]));
538  }
539 }
540 
541 /* Context activation is done by the caller. */
542 static void shader_arb_ps_local_constants(const struct arb_ps_compiled_shader *gl_shader,
543  const struct wined3d_context *context, const struct wined3d_state *state, UINT rt_height)
544 {
545  const struct wined3d_gl_info *gl_info = context->gl_info;
546  unsigned char i;
547 
548  for(i = 0; i < gl_shader->numbumpenvmatconsts; i++)
549  {
550  int texunit = gl_shader->bumpenvmatconst[i].texunit;
551 
552  /* The state manager takes care that this function is always called if the bump env matrix changes */
553  const float *data = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_MAT00];
554  GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
555  gl_shader->bumpenvmatconst[i].const_num, data));
556 
557  if (gl_shader->luminanceconst[i].const_num != WINED3D_CONST_NUM_UNUSED)
558  {
559  /* WINED3D_TSS_BUMPENVLSCALE and WINED3D_TSS_BUMPENVLOFFSET are next to each other.
560  * point gl to the scale, and load 4 floats. x = scale, y = offset, z and w are junk, we
561  * don't care about them. The pointers are valid for sure because the stateblock is bigger.
562  * (they're WINED3D_TSS_TEXTURETRANSFORMFLAGS and WINED3D_TSS_ADDRESSW, so most likely 0 or NaN
563  */
564  const float *scale = (const float *)&state->texture_states[texunit][WINED3D_TSS_BUMPENV_LSCALE];
565  GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB,
566  gl_shader->luminanceconst[i].const_num, scale));
567  }
568  }
569  checkGLcall("Load bumpmap consts");
570 
571  if(gl_shader->ycorrection != WINED3D_CONST_NUM_UNUSED)
572  {
573  /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen).
574  * ycorrection.y: -1.0(onscreen), 1.0(offscreen)
575  * ycorrection.z: 1.0
576  * ycorrection.w: 0.0
577  */
578  float val[4];
579  val[0] = context->render_offscreen ? 0.0f : (float) rt_height;
580  val[1] = context->render_offscreen ? 1.0f : -1.0f;
581  val[2] = 1.0f;
582  val[3] = 0.0f;
583  GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->ycorrection, val));
584  checkGLcall("y correction loading");
585  }
586 
587  if (!gl_shader->num_int_consts) return;
588 
589  for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
590  {
591  if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED)
592  {
593  float val[4];
594  val[0] = (float)state->ps_consts_i[i].x;
595  val[1] = (float)state->ps_consts_i[i].y;
596  val[2] = (float)state->ps_consts_i[i].z;
597  val[3] = -1.0f;
598 
599  GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, gl_shader->int_consts[i], val));
600  }
601  }
602  checkGLcall("Load ps int consts");
603 }
604 
605 /* Context activation is done by the caller. */
606 static void shader_arb_vs_local_constants(const struct arb_vs_compiled_shader *gl_shader,
607  const struct wined3d_context *context, const struct wined3d_state *state)
608 {
609  const struct wined3d_gl_info *gl_info = context->gl_info;
610  float position_fixup[4];
611  unsigned char i;
612 
613  /* Upload the position fixup */
614  shader_get_position_fixup(context, state, 1, position_fixup);
615  GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->pos_fixup, position_fixup));
616 
617  if (!gl_shader->num_int_consts) return;
618 
619  for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
620  {
621  if(gl_shader->int_consts[i] != WINED3D_CONST_NUM_UNUSED)
622  {
623  float val[4];
624  val[0] = (float)state->vs_consts_i[i].x;
625  val[1] = (float)state->vs_consts_i[i].y;
626  val[2] = (float)state->vs_consts_i[i].z;
627  val[3] = -1.0f;
628 
629  GL_EXTCALL(glProgramLocalParameter4fvARB(GL_VERTEX_PROGRAM_ARB, gl_shader->int_consts[i], val));
630  }
631  }
632  checkGLcall("Load vs int consts");
633 }
634 
635 static void shader_arb_select(void *shader_priv, struct wined3d_context *context,
636  const struct wined3d_state *state);
637 
644 /* Context activation is done by the caller (state handler). */
646  struct wined3d_context *context, const struct wined3d_state *state,
647  BOOL usePixelShader, BOOL useVertexShader, BOOL from_shader_select)
648 {
649  const struct wined3d_d3d_info *d3d_info = context->d3d_info;
650  const struct wined3d_gl_info *gl_info = context->gl_info;
651 
652  if (!from_shader_select)
653  {
654  const struct wined3d_shader *vshader = state->shader[WINED3D_SHADER_TYPE_VERTEX];
655  const struct wined3d_shader *pshader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
656 
657  if (vshader
658  && (vshader->reg_maps.boolean_constants
659  || (!gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]
660  && (vshader->reg_maps.integer_constants & ~vshader->reg_maps.local_int_consts))))
661  {
662  TRACE("bool/integer vertex shader constants potentially modified, forcing shader reselection.\n");
663  shader_arb_select(priv, context, state);
664  }
665  else if (pshader
666  && (pshader->reg_maps.boolean_constants
668  && (pshader->reg_maps.integer_constants & ~pshader->reg_maps.local_int_consts))))
669  {
670  TRACE("bool/integer pixel shader constants potentially modified, forcing shader reselection.\n");
671  shader_arb_select(priv, context, state);
672  }
673  }
674 
675  if (context != priv->last_context)
676  {
677  memset(priv->vshader_const_dirty, 1,
678  sizeof(*priv->vshader_const_dirty) * d3d_info->limits.vs_uniform_count);
679  priv->highest_dirty_vs_const = d3d_info->limits.vs_uniform_count;
680 
681  memset(priv->pshader_const_dirty, 1,
682  sizeof(*priv->pshader_const_dirty) * d3d_info->limits.ps_uniform_count);
683  priv->highest_dirty_ps_const = d3d_info->limits.ps_uniform_count;
684 
685  priv->last_context = context;
686  }
687 
688  if (useVertexShader)
689  {
690  const struct wined3d_shader *vshader = state->shader[WINED3D_SHADER_TYPE_VERTEX];
691  const struct arb_vs_compiled_shader *gl_shader = priv->compiled_vprog;
692 
693  /* Load DirectX 9 float constants for vertex shader */
696  shader_arb_vs_local_constants(gl_shader, context, state);
697  }
698 
699  if (usePixelShader)
700  {
701  const struct wined3d_shader *pshader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
702  const struct arb_ps_compiled_shader *gl_shader = priv->compiled_fprog;
703  UINT rt_height = state->fb->render_targets[0]->height;
704 
705  /* Load DirectX 9 float constants for pixel shader */
708  shader_arb_ps_local_constants(gl_shader, context, state, rt_height);
709 
711  shader_arb_load_np2fixup_constants(&gl_shader->np2fixup_info, gl_info, state);
712  }
713 }
714 
715 static void shader_arb_load_constants(void *shader_priv, struct wined3d_context *context,
716  const struct wined3d_state *state)
717 {
718  BOOL vs = use_vs(state);
719  BOOL ps = use_ps(state);
720 
721  shader_arb_load_constants_internal(shader_priv, context, state, ps, vs, FALSE);
722 }
723 
725 {
726  struct wined3d_context *context = context_get_current();
727  struct shader_arb_priv *priv = device->shader_priv;
728 
729  /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active
730  * context. On a context switch the old context will be fully dirtified */
731  if (!context || context->device != device)
732  return;
733 
734  memset(priv->vshader_const_dirty + start, 1, sizeof(*priv->vshader_const_dirty) * count);
735  priv->highest_dirty_vs_const = max(priv->highest_dirty_vs_const, start + count);
736 }
737 
739 {
740  struct wined3d_context *context = context_get_current();
741  struct shader_arb_priv *priv = device->shader_priv;
742 
743  /* We don't want shader constant dirtification to be an O(contexts), so just dirtify the active
744  * context. On a context switch the old context will be fully dirtified */
745  if (!context || context->device != device)
746  return;
747 
748  memset(priv->pshader_const_dirty + start, 1, sizeof(*priv->pshader_const_dirty) * count);
749  priv->highest_dirty_ps_const = max(priv->highest_dirty_ps_const, start + count);
750 }
751 
753 {
754  char str[4][17];
755 
756  wined3d_ftoa(values[0], str[0]);
757  wined3d_ftoa(values[1], str[1]);
758  wined3d_ftoa(values[2], str[2]);
759  wined3d_ftoa(values[3], str[3]);
760  shader_addline(buffer, "{%s, %s, %s, %s}", str[0], str[1], str[2], str[3]);
761 }
762 
763 /* Generate the variable & register declarations for the ARB_vertex_program output target */
765  const struct wined3d_shader_reg_maps *reg_maps, struct wined3d_string_buffer *buffer,
766  const struct wined3d_gl_info *gl_info, DWORD *num_clipplanes,
767  const struct shader_arb_ctx_priv *ctx)
768 {
769  DWORD i;
770  char pshader = shader_is_pshader_version(reg_maps->shader_version.type);
771  const struct wined3d_shader_lconst *lconst;
772  unsigned max_constantsF;
773  DWORD map;
774 
775  /* In pixel shaders, all private constants are program local, we don't need anything
776  * from program.env. Thus we can advertise the full set of constants in pixel shaders.
777  * If we need a private constant the GL implementation will squeeze it in somewhere
778  *
779  * With vertex shaders we need the posFixup and on some GL implementations 4 helper
780  * immediate values. The posFixup is loaded using program.env for now, so always
781  * subtract one from the number of constants. If the shader uses indirect addressing,
782  * account for the helper const too because we have to declare all available d3d constants
783  * and don't know which are actually used.
784  */
785  if (pshader)
786  {
787  max_constantsF = gl_info->limits.arb_ps_native_constants;
788  /* 24 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value. */
789  if (max_constantsF < 24)
790  max_constantsF = gl_info->limits.arb_ps_float_constants;
791  }
792  else
793  {
794  const struct arb_vshader_private *shader_data = shader->backend_data;
795  max_constantsF = gl_info->limits.arb_vs_native_constants;
796  /* 96 is the minimum MAX_PROGRAM_ENV_PARAMETERS_ARB value.
797  * Also prevents max_constantsF from becoming less than 0 and
798  * wrapping . */
799  if (max_constantsF < 96)
800  max_constantsF = gl_info->limits.arb_vs_float_constants;
801 
802  if (reg_maps->usesrelconstF)
803  {
804  DWORD highest_constf = 0, clip_limit;
805 
806  max_constantsF -= reserved_vs_const(shader_data, reg_maps, gl_info);
807  max_constantsF -= wined3d_popcount(reg_maps->integer_constants);
808  max_constantsF -= gl_info->reserved_arb_constants;
809 
810  for (i = 0; i < shader->limits->constant_float; ++i)
811  {
812  DWORD idx = i >> 5;
813  DWORD shift = i & 0x1f;
814  if (reg_maps->constf[idx] & (1u << shift))
815  highest_constf = i;
816  }
817 
818  if(use_nv_clip(gl_info) && ctx->target_version >= NV2)
819  {
820  if(ctx->cur_vs_args->super.clip_enabled)
821  clip_limit = gl_info->limits.user_clip_distances;
822  else
823  clip_limit = 0;
824  }
825  else
826  {
827  unsigned int mask = ctx->cur_vs_args->clip.boolclip.clipplane_mask;
828  clip_limit = min(wined3d_popcount(mask), 4);
829  }
830  *num_clipplanes = min(clip_limit, max_constantsF - highest_constf - 1);
831  max_constantsF -= *num_clipplanes;
832  if(*num_clipplanes < clip_limit)
833  {
834  WARN("Only %u clip planes out of %u enabled.\n", *num_clipplanes,
835  gl_info->limits.user_clip_distances);
836  }
837  }
838  else
839  {
840  if (ctx->target_version >= NV2)
841  *num_clipplanes = gl_info->limits.user_clip_distances;
842  else
843  *num_clipplanes = min(gl_info->limits.user_clip_distances, 4);
844  }
845  }
846 
847  for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
848  {
849  if (map & 1) shader_addline(buffer, "TEMP R%u;\n", i);
850  }
851 
852  for (i = 0, map = reg_maps->address; map; map >>= 1, ++i)
853  {
854  if (map & 1) shader_addline(buffer, "ADDRESS A%u;\n", i);
855  }
856 
857  if (pshader && reg_maps->shader_version.major == 1 && reg_maps->shader_version.minor <= 3)
858  {
859  for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i)
860  {
861  if (map & 1) shader_addline(buffer, "TEMP T%u;\n", i);
862  }
863  }
864 
865  if (!shader->load_local_constsF)
866  {
867  LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
868  {
869  const float *value;
870  value = (const float *)lconst->value;
871  shader_addline(buffer, "PARAM C%u = ", lconst->idx);
872  shader_arb_append_imm_vec4(buffer, value);
873  shader_addline(buffer, ";\n");
874  }
875  }
876 
877  /* After subtracting privately used constants from the hardware limit(they are loaded as
878  * local constants), make sure the shader doesn't violate the env constant limit
879  */
880  if (pshader)
881  {
882  max_constantsF = min(max_constantsF, gl_info->limits.arb_ps_float_constants);
883  }
884  else
885  {
886  max_constantsF = min(max_constantsF, gl_info->limits.arb_vs_float_constants);
887  }
888 
889  /* Avoid declaring more constants than needed */
890  max_constantsF = min(max_constantsF, shader->limits->constant_float);
891 
892  /* we use the array-based constants array if the local constants are marked for loading,
893  * because then we use indirect addressing, or when the local constant list is empty,
894  * because then we don't know if we're using indirect addressing or not. If we're hardcoding
895  * local constants do not declare the loaded constants as an array because ARB compilers usually
896  * do not optimize unused constants away
897  */
898  if (reg_maps->usesrelconstF)
899  {
900  /* Need to PARAM the environment parameters (constants) so we can use relative addressing */
901  shader_addline(buffer, "PARAM C[%d] = { program.env[0..%d] };\n",
902  max_constantsF, max_constantsF - 1);
903  }
904  else
905  {
906  for (i = 0; i < max_constantsF; ++i)
907  {
908  if (!shader_constant_is_local(shader, i) && wined3d_extract_bits(reg_maps->constf, i, 1))
909  {
910  shader_addline(buffer, "PARAM C%d = program.env[%d];\n",i, i);
911  }
912  }
913  }
914 }
915 
916 static const char * const shift_tab[] = {
917  "dummy", /* 0 (none) */
918  "coefmul.x", /* 1 (x2) */
919  "coefmul.y", /* 2 (x4) */
920  "coefmul.z", /* 3 (x8) */
921  "coefmul.w", /* 4 (x16) */
922  "dummy", /* 5 (x32) */
923  "dummy", /* 6 (x64) */
924  "dummy", /* 7 (x128) */
925  "dummy", /* 8 (d256) */
926  "dummy", /* 9 (d128) */
927  "dummy", /* 10 (d64) */
928  "dummy", /* 11 (d32) */
929  "coefdiv.w", /* 12 (d16) */
930  "coefdiv.z", /* 13 (d8) */
931  "coefdiv.y", /* 14 (d4) */
932  "coefdiv.x" /* 15 (d2) */
933 };
934 
936  const struct wined3d_shader_dst_param *dst, char *write_mask)
937 {
938  char *ptr = write_mask;
939 
941  {
942  *ptr++ = '.';
943  if (dst->write_mask & WINED3DSP_WRITEMASK_0) *ptr++ = 'x';
944  if (dst->write_mask & WINED3DSP_WRITEMASK_1) *ptr++ = 'y';
945  if (dst->write_mask & WINED3DSP_WRITEMASK_2) *ptr++ = 'z';
946  if (dst->write_mask & WINED3DSP_WRITEMASK_3) *ptr++ = 'w';
947  }
948 
949  *ptr = '\0';
950 }
951 
952 static void shader_arb_get_swizzle(const struct wined3d_shader_src_param *param, BOOL fixup, char *swizzle_str)
953 {
954  /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
955  * but addressed as "rgba". To fix this we need to swap the register's x
956  * and z components. */
957  const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
958  char *ptr = swizzle_str;
959 
960  /* swizzle bits fields: wwzzyyxx */
961  DWORD swizzle = param->swizzle;
962  DWORD swizzle_x = swizzle & 0x03;
963  DWORD swizzle_y = (swizzle >> 2) & 0x03;
964  DWORD swizzle_z = (swizzle >> 4) & 0x03;
965  DWORD swizzle_w = (swizzle >> 6) & 0x03;
966 
967  /* If the swizzle is the default swizzle (ie, "xyzw"), we don't need to
968  * generate a swizzle string. Unless we need to our own swizzling. */
969  if (swizzle != WINED3DSP_NOSWIZZLE || fixup)
970  {
971  *ptr++ = '.';
972  if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) {
973  *ptr++ = swizzle_chars[swizzle_x];
974  } else {
975  *ptr++ = swizzle_chars[swizzle_x];
976  *ptr++ = swizzle_chars[swizzle_y];
977  *ptr++ = swizzle_chars[swizzle_z];
978  *ptr++ = swizzle_chars[swizzle_w];
979  }
980  }
981 
982  *ptr = '\0';
983 }
984 
985 static void shader_arb_request_a0(const struct wined3d_shader_instruction *ins, const char *src)
986 {
987  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
988  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
989 
990  if (!strcmp(priv->addr_reg, src)) return;
991 
992  strcpy(priv->addr_reg, src);
993  shader_addline(buffer, "ARL A0.x, %s;\n", src);
994 }
995 
996 static void shader_arb_get_src_param(const struct wined3d_shader_instruction *ins,
997  const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr);
998 
1000  const struct wined3d_shader_register *reg, char *register_name, BOOL *is_color)
1001 {
1002  /* oPos, oFog and oPts in D3D */
1003  static const char * const rastout_reg_names[] = {"TMP_OUT", "TMP_FOGCOORD", "result.pointsize"};
1004  const struct wined3d_shader *shader = ins->ctx->shader;
1005  const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
1006  BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type);
1007  struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
1008 
1009  *is_color = FALSE;
1010 
1011  switch (reg->type)
1012  {
1013  case WINED3DSPR_TEMP:
1014  sprintf(register_name, "R%u", reg->idx[0].offset);
1015  break;
1016 
1017  case WINED3DSPR_INPUT:
1018  if (pshader)
1019  {
1020  if (reg_maps->shader_version.major < 3)
1021  {
1022  if (!reg->idx[0].offset)
1023  strcpy(register_name, "fragment.color.primary");
1024  else
1025  strcpy(register_name, "fragment.color.secondary");
1026  }
1027  else
1028  {
1029  if (reg->idx[0].rel_addr)
1030  {
1031  char rel_reg[50];
1032  shader_arb_get_src_param(ins, reg->idx[0].rel_addr, 0, rel_reg);
1033 
1034  if (!strcmp(rel_reg, "**aL_emul**"))
1035  {
1036  DWORD idx = ctx->aL + reg->idx[0].offset;
1037  if(idx < MAX_REG_INPUT)
1038  {
1039  strcpy(register_name, ctx->ps_input[idx]);
1040  }
1041  else
1042  {
1043  ERR("Pixel shader input register out of bounds: %u\n", idx);
1044  sprintf(register_name, "out_of_bounds_%u", idx);
1045  }
1046  }
1047  else if (reg_maps->input_registers & 0x0300)
1048  {
1049  /* There are two ways basically:
1050  *
1051  * 1) Use the unrolling code that is used for loop emulation and unroll the loop.
1052  * That means trouble if the loop also contains a breakc or if the control values
1053  * aren't local constants.
1054  * 2) Generate an if block that checks if aL.y < 8, == 8 or == 9 and selects the
1055  * source dynamically. The trouble is that we cannot simply read aL.y because it
1056  * is an ADDRESS register. We could however push it, load .zw with a value and use
1057  * ADAC to load the condition code register and pop it again afterwards
1058  */
1059  FIXME("Relative input register addressing with more than 8 registers\n");
1060 
1061  /* This is better than nothing for now */
1062  sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset);
1063  }
1064  else if(ctx->cur_ps_args->super.vp_mode != WINED3D_VP_MODE_SHADER)
1065  {
1066  /* This is problematic because we'd have to consult the ctx->ps_input strings
1067  * for where to find the varying. Some may be "0.0", others can be texcoords or
1068  * colors. This needs either a pipeline replacement to make the vertex shader feed
1069  * proper varyings, or loop unrolling
1070  *
1071  * For now use the texcoords and hope for the best
1072  */
1073  FIXME("Non-vertex shader varying input with indirect addressing\n");
1074  sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset);
1075  }
1076  else
1077  {
1078  /* D3D supports indirect addressing only with aL in loop registers. The loop instruction
1079  * pulls GL_NV_fragment_program2 in
1080  */
1081  sprintf(register_name, "fragment.texcoord[%s + %u]", rel_reg, reg->idx[0].offset);
1082  }
1083  }
1084  else
1085  {
1086  if (reg->idx[0].offset < MAX_REG_INPUT)
1087  {
1088  strcpy(register_name, ctx->ps_input[reg->idx[0].offset]);
1089  }
1090  else
1091  {
1092  ERR("Pixel shader input register out of bounds: %u\n", reg->idx[0].offset);
1093  sprintf(register_name, "out_of_bounds_%u", reg->idx[0].offset);
1094  }
1095  }
1096  }
1097  }
1098  else
1099  {
1100  if (ctx->cur_vs_args->super.swizzle_map & (1u << reg->idx[0].offset))
1101  *is_color = TRUE;
1102  sprintf(register_name, "vertex.attrib[%u]", reg->idx[0].offset);
1103  }
1104  break;
1105 
1106  case WINED3DSPR_CONST:
1107  if (!pshader && reg->idx[0].rel_addr)
1108  {
1109  const struct arb_vshader_private *shader_data = shader->backend_data;
1110  UINT rel_offset = ctx->target_version == ARB ? shader_data->rel_offset : 0;
1111  BOOL aL = FALSE;
1112  char rel_reg[50];
1113  if (reg_maps->shader_version.major < 2)
1114  {
1115  sprintf(rel_reg, "A0.x");
1116  }
1117  else
1118  {
1119  shader_arb_get_src_param(ins, reg->idx[0].rel_addr, 0, rel_reg);
1120  if (ctx->target_version == ARB)
1121  {
1122  if (!strcmp(rel_reg, "**aL_emul**"))
1123  {
1124  aL = TRUE;
1125  } else {
1126  shader_arb_request_a0(ins, rel_reg);
1127  sprintf(rel_reg, "A0.x");
1128  }
1129  }
1130  }
1131  if (aL)
1132  sprintf(register_name, "C[%u]", ctx->aL + reg->idx[0].offset);
1133  else if (reg->idx[0].offset >= rel_offset)
1134  sprintf(register_name, "C[%s + %u]", rel_reg, reg->idx[0].offset - rel_offset);
1135  else
1136  sprintf(register_name, "C[%s - %u]", rel_reg, rel_offset - reg->idx[0].offset);
1137  }
1138  else
1139  {
1140  if (reg_maps->usesrelconstF)
1141  sprintf(register_name, "C[%u]", reg->idx[0].offset);
1142  else
1143  sprintf(register_name, "C%u", reg->idx[0].offset);
1144  }
1145  break;
1146 
1147  case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
1148  if (pshader)
1149  {
1150  if (reg_maps->shader_version.major == 1
1151  && reg_maps->shader_version.minor <= 3)
1152  /* In ps <= 1.3, Tx is a temporary register as destination
1153  * to all instructions, and as source to most instructions.
1154  * For some instructions it is the texcoord input. Those
1155  * instructions know about the special use. */
1156  sprintf(register_name, "T%u", reg->idx[0].offset);
1157  else
1158  /* In ps 1.4 and 2.x Tx is always a (read-only) varying. */
1159  sprintf(register_name, "fragment.texcoord[%u]", reg->idx[0].offset);
1160  }
1161  else
1162  {
1163  if (reg_maps->shader_version.major == 1 || ctx->target_version >= NV2)
1164  sprintf(register_name, "A%u", reg->idx[0].offset);
1165  else
1166  sprintf(register_name, "A%u_SHADOW", reg->idx[0].offset);
1167  }
1168  break;
1169 
1170  case WINED3DSPR_COLOROUT:
1171  if (ctx->ps_post_process && !reg->idx[0].offset)
1172  {
1173  strcpy(register_name, "TMP_COLOR");
1174  }
1175  else
1176  {
1177  if (ctx->cur_ps_args->super.srgb_correction)
1178  FIXME("sRGB correction on higher render targets.\n");
1179  if (reg_maps->rt_mask > 1)
1180  sprintf(register_name, "result.color[%u]", reg->idx[0].offset);
1181  else
1182  strcpy(register_name, "result.color");
1183  }
1184  break;
1185 
1186  case WINED3DSPR_RASTOUT:
1187  if (reg->idx[0].offset == 1)
1188  sprintf(register_name, "%s", ctx->fog_output);
1189  else
1190  sprintf(register_name, "%s", rastout_reg_names[reg->idx[0].offset]);
1191  break;
1192 
1193  case WINED3DSPR_DEPTHOUT:
1194  strcpy(register_name, "result.depth");
1195  break;
1196 
1197  case WINED3DSPR_ATTROUT:
1198  /* case WINED3DSPR_OUTPUT: */
1199  if (pshader)
1200  sprintf(register_name, "oD[%u]", reg->idx[0].offset);
1201  else
1202  strcpy(register_name, ctx->color_output[reg->idx[0].offset]);
1203  break;
1204 
1205  case WINED3DSPR_TEXCRDOUT:
1206  if (pshader)
1207  sprintf(register_name, "oT[%u]", reg->idx[0].offset);
1208  else if (reg_maps->shader_version.major < 3)
1209  strcpy(register_name, ctx->texcrd_output[reg->idx[0].offset]);
1210  else
1211  strcpy(register_name, ctx->vs_output[reg->idx[0].offset]);
1212  break;
1213 
1214  case WINED3DSPR_LOOP:
1215  if(ctx->target_version >= NV2)
1216  {
1217  /* Pshader has an implicitly declared loop index counter A0.x that cannot be renamed */
1218  if(pshader) sprintf(register_name, "A0.x");
1219  else sprintf(register_name, "aL.y");
1220  }
1221  else
1222  {
1223  /* Unfortunately this code cannot return the value of ctx->aL here. An immediate value
1224  * would be valid, but if aL is used for indexing(its only use), there's likely an offset,
1225  * thus the result would be something like C[15 + 30], which is not valid in the ARB program
1226  * grammar. So return a marker for the emulated aL and intercept it in constant and varying
1227  * indexing
1228  */
1229  sprintf(register_name, "**aL_emul**");
1230  }
1231 
1232  break;
1233 
1234  case WINED3DSPR_CONSTINT:
1235  sprintf(register_name, "I%u", reg->idx[0].offset);
1236  break;
1237 
1238  case WINED3DSPR_MISCTYPE:
1239  if (!reg->idx[0].offset)
1240  sprintf(register_name, "vpos");
1241  else if (reg->idx[0].offset == 1)
1242  sprintf(register_name, "fragment.facing.x");
1243  else
1244  FIXME("Unknown MISCTYPE register index %u.\n", reg->idx[0].offset);
1245  break;
1246 
1247  default:
1248  FIXME("Unhandled register type %#x[%u].\n", reg->type, reg->idx[0].offset);
1249  sprintf(register_name, "unrecognized_register[%u]", reg->idx[0].offset);
1250  break;
1251  }
1252 }
1253 
1255  const struct wined3d_shader_dst_param *wined3d_dst, char *str)
1256 {
1257  char register_name[255];
1258  char write_mask[6];
1259  BOOL is_color;
1260 
1261  shader_arb_get_register_name(ins, &wined3d_dst->reg, register_name, &is_color);
1262  strcpy(str, register_name);
1263 
1264  shader_arb_get_write_mask(ins, wined3d_dst, write_mask);
1265  strcat(str, write_mask);
1266 }
1267 
1268 static const char *shader_arb_get_fixup_swizzle(enum fixup_channel_source channel_source)
1269 {
1270  switch(channel_source)
1271  {
1272  case CHANNEL_SOURCE_ZERO: return "0";
1273  case CHANNEL_SOURCE_ONE: return "1";
1274  case CHANNEL_SOURCE_X: return "x";
1275  case CHANNEL_SOURCE_Y: return "y";
1276  case CHANNEL_SOURCE_Z: return "z";
1277  case CHANNEL_SOURCE_W: return "w";
1278  default:
1279  FIXME("Unhandled channel source %#x\n", channel_source);
1280  return "undefined";
1281  }
1282 }
1283 
1285 {
1288 };
1289 
1291 {
1292  struct color_fixup_masks masks = {0, 0};
1293 
1294  if (is_complex_fixup(fixup))
1295  {
1297  FIXME("Complex fixup (%#x) not supported\n", complex_fixup);
1298  return masks;
1299  }
1300 
1301  if (fixup.x_source != CHANNEL_SOURCE_X)
1302  masks.source |= WINED3DSP_WRITEMASK_0;
1303  if (fixup.y_source != CHANNEL_SOURCE_Y)
1304  masks.source |= WINED3DSP_WRITEMASK_1;
1305  if (fixup.z_source != CHANNEL_SOURCE_Z)
1306  masks.source |= WINED3DSP_WRITEMASK_2;
1307  if (fixup.w_source != CHANNEL_SOURCE_W)
1308  masks.source |= WINED3DSP_WRITEMASK_3;
1309  masks.source &= dst_mask;
1310 
1311  if (fixup.x_sign_fixup)
1312  masks.sign |= WINED3DSP_WRITEMASK_0;
1313  if (fixup.y_sign_fixup)
1314  masks.sign |= WINED3DSP_WRITEMASK_1;
1315  if (fixup.z_sign_fixup)
1316  masks.sign |= WINED3DSP_WRITEMASK_2;
1317  if (fixup.w_sign_fixup)
1318  masks.sign |= WINED3DSP_WRITEMASK_3;
1319  masks.sign &= dst_mask;
1320 
1321  return masks;
1322 }
1323 
1324 static void gen_color_correction(struct wined3d_string_buffer *buffer, const char *dst,
1325  const char *src, const char *one, const char *two,
1326  struct color_fixup_desc fixup, struct color_fixup_masks masks)
1327 {
1328  const char *sign_fixup_src = dst;
1329 
1330  if (masks.source)
1331  {
1332  if (masks.sign)
1333  sign_fixup_src = "TA";
1334 
1335  shader_addline(buffer, "SWZ %s, %s, %s, %s, %s, %s;\n", sign_fixup_src, src,
1338  }
1339  else if (masks.sign)
1340  {
1341  sign_fixup_src = src;
1342  }
1343 
1344  if (masks.sign)
1345  {
1346  char reg_mask[6];
1347  char *ptr = reg_mask;
1348 
1349  if (masks.sign != WINED3DSP_WRITEMASK_ALL)
1350  {
1351  *ptr++ = '.';
1352  if (masks.sign & WINED3DSP_WRITEMASK_0)
1353  *ptr++ = 'x';
1354  if (masks.sign & WINED3DSP_WRITEMASK_1)
1355  *ptr++ = 'y';
1356  if (masks.sign & WINED3DSP_WRITEMASK_2)
1357  *ptr++ = 'z';
1358  if (masks.sign & WINED3DSP_WRITEMASK_3)
1359  *ptr++ = 'w';
1360  }
1361  *ptr = '\0';
1362 
1363  shader_addline(buffer, "MAD %s%s, %s, %s, -%s;\n", dst, reg_mask, sign_fixup_src, two, one);
1364  }
1365 }
1366 
1367 static const char *shader_arb_get_modifier(const struct wined3d_shader_instruction *ins)
1368 {
1369  DWORD mod;
1370  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
1371  if (!ins->dst_count) return "";
1372 
1373  mod = ins->dst[0].modifiers;
1374 
1375  /* Silently ignore PARTIALPRECISION if it's not supported */
1376  if(priv->target_version == ARB) mod &= ~WINED3DSPDM_PARTIALPRECISION;
1377 
1378  if(mod & WINED3DSPDM_MSAMPCENTROID)
1379  {
1380  FIXME("Unhandled modifier WINED3DSPDM_MSAMPCENTROID\n");
1381  mod &= ~WINED3DSPDM_MSAMPCENTROID;
1382  }
1383 
1384  switch(mod)
1385  {
1387  return "H_SAT";
1388 
1389  case WINED3DSPDM_SATURATE:
1390  return "_SAT";
1391 
1393  return "H";
1394 
1395  case 0:
1396  return "";
1397 
1398  default:
1399  FIXME("Unknown modifiers 0x%08x\n", mod);
1400  return "";
1401  }
1402 }
1403 
1404 #define TEX_PROJ 0x1
1405 #define TEX_BIAS 0x2
1406 #define TEX_LOD 0x4
1407 #define TEX_DERIV 0x10
1408 
1409 static void shader_hw_sample(const struct wined3d_shader_instruction *ins, DWORD sampler_idx,
1410  const char *dst_str, const char *coord_reg, WORD flags, const char *dsx, const char *dsy)
1411 {
1412  enum wined3d_shader_resource_type resource_type = ins->ctx->reg_maps->resource_info[sampler_idx].type;
1413  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1414  const char *tex_type;
1415  BOOL np2_fixup = FALSE;
1416  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
1417  const char *mod;
1418  BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type);
1419  const char *tex_dst = dst_str;
1420  struct color_fixup_masks masks;
1421 
1422  /* D3D vertex shader sampler IDs are vertex samplers(0-3), not global d3d samplers */
1423  if(!pshader) sampler_idx += MAX_FRAGMENT_SAMPLERS;
1424 
1425  switch (resource_type)
1426  {
1428  tex_type = "1D";
1429  break;
1430 
1432  if (pshader && priv->cur_ps_args->super.np2_fixup & (1u << sampler_idx)
1433  && ins->ctx->gl_info->supported[ARB_TEXTURE_RECTANGLE])
1434  tex_type = "RECT";
1435  else
1436  tex_type = "2D";
1437  if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
1438  {
1439  if (priv->cur_np2fixup_info->super.active & (1u << sampler_idx))
1440  {
1441  if (flags) FIXME("Only ordinary sampling from NP2 textures is supported.\n");
1442  else np2_fixup = TRUE;
1443  }
1444  }
1445  break;
1446 
1448  tex_type = "3D";
1449  break;
1450 
1452  tex_type = "CUBE";
1453  break;
1454 
1455  default:
1456  ERR("Unexpected resource type %#x.\n", resource_type);
1457  tex_type = "";
1458  }
1459 
1460  /* TEX, TXL, TXD and TXP do not support the "H" modifier,
1461  * so don't use shader_arb_get_modifier
1462  */
1463  if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) mod = "_SAT";
1464  else mod = "";
1465 
1466  /* Fragment samplers always have indentity mapping */
1467  if(sampler_idx >= MAX_FRAGMENT_SAMPLERS)
1468  {
1469  sampler_idx = priv->cur_vs_args->vertex.samplers[sampler_idx - MAX_FRAGMENT_SAMPLERS];
1470  }
1471 
1472  if (pshader)
1473  {
1474  masks = calc_color_correction(priv->cur_ps_args->super.color_fixup[sampler_idx],
1475  ins->dst[0].write_mask);
1476 
1477  if (masks.source || masks.sign)
1478  tex_dst = "TA";
1479  }
1480 
1481  if (flags & TEX_DERIV)
1482  {
1483  if(flags & TEX_PROJ) FIXME("Projected texture sampling with custom derivatives\n");
1484  if(flags & TEX_BIAS) FIXME("Biased texture sampling with custom derivatives\n");
1485  shader_addline(buffer, "TXD%s %s, %s, %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg,
1486  dsx, dsy, sampler_idx, tex_type);
1487  }
1488  else if(flags & TEX_LOD)
1489  {
1490  if(flags & TEX_PROJ) FIXME("Projected texture sampling with explicit lod\n");
1491  if(flags & TEX_BIAS) FIXME("Biased texture sampling with explicit lod\n");
1492  shader_addline(buffer, "TXL%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg,
1493  sampler_idx, tex_type);
1494  }
1495  else if (flags & TEX_BIAS)
1496  {
1497  /* Shouldn't be possible, but let's check for it */
1498  if(flags & TEX_PROJ) FIXME("Biased and Projected texture sampling\n");
1499  /* TXB takes the 4th component of the source vector automatically, as d3d. Nothing more to do */
1500  shader_addline(buffer, "TXB%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type);
1501  }
1502  else if (flags & TEX_PROJ)
1503  {
1504  shader_addline(buffer, "TXP%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type);
1505  }
1506  else
1507  {
1508  if (np2_fixup)
1509  {
1510  const unsigned char idx = priv->cur_np2fixup_info->super.idx[sampler_idx];
1511  shader_addline(buffer, "MUL TA, np2fixup[%u].%s, %s;\n", idx >> 1,
1512  (idx % 2) ? "zwxy" : "xyzw", coord_reg);
1513 
1514  shader_addline(buffer, "TEX%s %s, TA, texture[%u], %s;\n", mod, tex_dst, sampler_idx, tex_type);
1515  }
1516  else
1517  shader_addline(buffer, "TEX%s %s, %s, texture[%u], %s;\n", mod, tex_dst, coord_reg, sampler_idx, tex_type);
1518  }
1519 
1520  if (pshader)
1521  {
1522  gen_color_correction(buffer, dst_str, tex_dst,
1525  priv->cur_ps_args->super.color_fixup[sampler_idx], masks);
1526  }
1527 }
1528 
1530  const struct wined3d_shader_src_param *src, unsigned int tmpreg, char *outregstr)
1531 {
1532  /* Generate a line that does the input modifier computation and return the input register to use */
1533  BOOL is_color = FALSE, insert_line;
1534  char regstr[256];
1535  char swzstr[20];
1536  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1537  struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
1538  const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
1539  const char *two = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_TWO);
1540 
1541  /* Assume a new line will be added */
1542  insert_line = TRUE;
1543 
1544  /* Get register name */
1545  shader_arb_get_register_name(ins, &src->reg, regstr, &is_color);
1546  shader_arb_get_swizzle(src, is_color, swzstr);
1547 
1548  switch (src->modifiers)
1549  {
1550  case WINED3DSPSM_NONE:
1551  sprintf(outregstr, "%s%s", regstr, swzstr);
1552  insert_line = FALSE;
1553  break;
1554  case WINED3DSPSM_NEG:
1555  sprintf(outregstr, "-%s%s", regstr, swzstr);
1556  insert_line = FALSE;
1557  break;
1558  case WINED3DSPSM_BIAS:
1559  shader_addline(buffer, "ADD T%c, %s, -coefdiv.x;\n", 'A' + tmpreg, regstr);
1560  break;
1561  case WINED3DSPSM_BIASNEG:
1562  shader_addline(buffer, "ADD T%c, -%s, coefdiv.x;\n", 'A' + tmpreg, regstr);
1563  break;
1564  case WINED3DSPSM_SIGN:
1565  shader_addline(buffer, "MAD T%c, %s, %s, -%s;\n", 'A' + tmpreg, regstr, two, one);
1566  break;
1567  case WINED3DSPSM_SIGNNEG:
1568  shader_addline(buffer, "MAD T%c, %s, -%s, %s;\n", 'A' + tmpreg, regstr, two, one);
1569  break;
1570  case WINED3DSPSM_COMP:
1571  shader_addline(buffer, "SUB T%c, %s, %s;\n", 'A' + tmpreg, one, regstr);
1572  break;
1573  case WINED3DSPSM_X2:
1574  shader_addline(buffer, "ADD T%c, %s, %s;\n", 'A' + tmpreg, regstr, regstr);
1575  break;
1576  case WINED3DSPSM_X2NEG:
1577  shader_addline(buffer, "ADD T%c, -%s, -%s;\n", 'A' + tmpreg, regstr, regstr);
1578  break;
1579  case WINED3DSPSM_DZ:
1580  shader_addline(buffer, "RCP T%c, %s.z;\n", 'A' + tmpreg, regstr);
1581  shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg);
1582  break;
1583  case WINED3DSPSM_DW:
1584  shader_addline(buffer, "RCP T%c, %s.w;\n", 'A' + tmpreg, regstr);
1585  shader_addline(buffer, "MUL T%c, %s, T%c;\n", 'A' + tmpreg, regstr, 'A' + tmpreg);
1586  break;
1587  case WINED3DSPSM_ABS:
1588  if(ctx->target_version >= NV2) {
1589  sprintf(outregstr, "|%s%s|", regstr, swzstr);
1590  insert_line = FALSE;
1591  } else {
1592  shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr);
1593  }
1594  break;
1595  case WINED3DSPSM_ABSNEG:
1596  if(ctx->target_version >= NV2) {
1597  sprintf(outregstr, "-|%s%s|", regstr, swzstr);
1598  } else {
1599  shader_addline(buffer, "ABS T%c, %s;\n", 'A' + tmpreg, regstr);
1600  sprintf(outregstr, "-T%c%s", 'A' + tmpreg, swzstr);
1601  }
1602  insert_line = FALSE;
1603  break;
1604  default:
1605  sprintf(outregstr, "%s%s", regstr, swzstr);
1606  insert_line = FALSE;
1607  }
1608 
1609  /* Return modified or original register, with swizzle */
1610  if (insert_line)
1611  sprintf(outregstr, "T%c%s", 'A' + tmpreg, swzstr);
1612 }
1613 
1614 static void pshader_hw_bem(const struct wined3d_shader_instruction *ins)
1615 {
1616  const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1617  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1618  DWORD sampler_code = dst->reg.idx[0].offset;
1619  char dst_name[50];
1620  char src_name[2][50];
1621 
1622  shader_arb_get_dst_param(ins, dst, dst_name);
1623 
1624  /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed
1625  *
1626  * Keep in mind that src_name[1] can be "TB" and src_name[0] can be "TA" because modifiers like _x2 are valid
1627  * with bem. So delay loading the first parameter until after the perturbation calculation which needs two
1628  * temps is done.
1629  */
1630  shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
1631  shader_addline(buffer, "SWZ TA, bumpenvmat%d, x, z, 0, 0;\n", sampler_code);
1632  shader_addline(buffer, "DP3 TC.r, TA, %s;\n", src_name[1]);
1633  shader_addline(buffer, "SWZ TA, bumpenvmat%d, y, w, 0, 0;\n", sampler_code);
1634  shader_addline(buffer, "DP3 TC.g, TA, %s;\n", src_name[1]);
1635 
1636  shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
1637  shader_addline(buffer, "ADD %s, %s, TC;\n", dst_name, src_name[0]);
1638 }
1639 
1640 static DWORD negate_modifiers(DWORD mod, char *extra_char)
1641 {
1642  *extra_char = ' ';
1643  switch(mod)
1644  {
1645  case WINED3DSPSM_NONE: return WINED3DSPSM_NEG;
1646  case WINED3DSPSM_NEG: return WINED3DSPSM_NONE;
1651  case WINED3DSPSM_COMP: *extra_char = '-'; return WINED3DSPSM_COMP;
1652  case WINED3DSPSM_X2: return WINED3DSPSM_X2NEG;
1653  case WINED3DSPSM_X2NEG: return WINED3DSPSM_X2;
1654  case WINED3DSPSM_DZ: *extra_char = '-'; return WINED3DSPSM_DZ;
1655  case WINED3DSPSM_DW: *extra_char = '-'; return WINED3DSPSM_DW;
1656  case WINED3DSPSM_ABS: return WINED3DSPSM_ABSNEG;
1657  case WINED3DSPSM_ABSNEG: return WINED3DSPSM_ABS;
1658  }
1659  FIXME("Unknown modifier %u\n", mod);
1660  return mod;
1661 }
1662 
1663 static void pshader_hw_cnd(const struct wined3d_shader_instruction *ins)
1664 {
1665  const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1666  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1667  char dst_name[50];
1668  char src_name[3][50];
1669  DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
1670  ins->ctx->reg_maps->shader_version.minor);
1671 
1672  shader_arb_get_dst_param(ins, dst, dst_name);
1673  shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
1674 
1675  if (shader_version <= WINED3D_SHADER_VERSION(1, 3) && ins->coissue
1676  && ins->dst->write_mask != WINED3DSP_WRITEMASK_3)
1677  {
1678  shader_addline(buffer, "MOV%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[1]);
1679  }
1680  else
1681  {
1682  struct wined3d_shader_src_param src0_copy = ins->src[0];
1683  char extra_neg;
1684 
1685  /* src0 may have a negate srcmod set, so we can't blindly add "-" to the name */
1686  src0_copy.modifiers = negate_modifiers(src0_copy.modifiers, &extra_neg);
1687 
1688  shader_arb_get_src_param(ins, &src0_copy, 0, src_name[0]);
1689  shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
1690  shader_addline(buffer, "ADD TA, %c%s, coefdiv.x;\n", extra_neg, src_name[0]);
1691  shader_addline(buffer, "CMP%s %s, TA, %s, %s;\n", shader_arb_get_modifier(ins),
1692  dst_name, src_name[1], src_name[2]);
1693  }
1694 }
1695 
1696 static void pshader_hw_cmp(const struct wined3d_shader_instruction *ins)
1697 {
1698  const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1699  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1700  char dst_name[50];
1701  char src_name[3][50];
1702 
1703  shader_arb_get_dst_param(ins, dst, dst_name);
1704 
1705  /* Generate input register names (with modifiers) */
1706  shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
1707  shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
1708  shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
1709 
1710  shader_addline(buffer, "CMP%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins),
1711  dst_name, src_name[0], src_name[2], src_name[1]);
1712 }
1713 
1716 static void pshader_hw_dp2add(const struct wined3d_shader_instruction *ins)
1717 {
1718  const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1719  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1720  char dst_name[50];
1721  char src_name[3][50];
1722  struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
1723 
1724  shader_arb_get_dst_param(ins, dst, dst_name);
1725  shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
1726  shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
1727 
1728  if(ctx->target_version >= NV3)
1729  {
1730  /* GL_NV_fragment_program2 has a 1:1 matching instruction */
1731  shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
1732  shader_addline(buffer, "DP2A%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins),
1733  dst_name, src_name[0], src_name[1], src_name[2]);
1734  }
1735  else if(ctx->target_version >= NV2)
1736  {
1737  /* dst.x = src2.?, src0.x, src1.x + src0.y * src1.y
1738  * dst.y = src2.?, src0.x, src1.z + src0.y * src1.w
1739  * dst.z = src2.?, src0.x, src1.x + src0.y * src1.y
1740  * dst.z = src2.?, src0.x, src1.z + src0.y * src1.w
1741  *
1742  * Make sure that src1.zw = src1.xy, then we get a classic dp2add
1743  *
1744  * .xyxy and other swizzles that we could get with this are not valid in
1745  * plain ARBfp, but luckily the NV extension grammar lifts this limitation.
1746  */
1747  struct wined3d_shader_src_param tmp_param = ins->src[1];
1748  DWORD swizzle = tmp_param.swizzle & 0xf; /* Selects .xy */
1749  tmp_param.swizzle = swizzle | (swizzle << 4); /* Creates .xyxy */
1750 
1751  shader_arb_get_src_param(ins, &tmp_param, 1, src_name[1]);
1752 
1753  shader_addline(buffer, "X2D%s %s, %s, %s, %s;\n", shader_arb_get_modifier(ins),
1754  dst_name, src_name[2], src_name[0], src_name[1]);
1755  }
1756  else
1757  {
1758  shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
1759  /* Emulate a DP2 with a DP3 and 0.0. Don't use the dest as temp register, it could be src[1] or src[2]
1760  * src_name[0] can be TA, but TA is a private temp for modifiers, so it is save to overwrite
1761  */
1762  shader_addline(buffer, "MOV TA, %s;\n", src_name[0]);
1763  shader_addline(buffer, "MOV TA.z, 0.0;\n");
1764  shader_addline(buffer, "DP3 TA, TA, %s;\n", src_name[1]);
1765  shader_addline(buffer, "ADD%s %s, TA, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name[2]);
1766  }
1767 }
1768 
1769 /* Map the opcode 1-to-1 to the GL code */
1770 static void shader_hw_map2gl(const struct wined3d_shader_instruction *ins)
1771 {
1772  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1773  const char *instruction;
1774  char arguments[256], dst_str[50];
1775  unsigned int i;
1776  const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1777 
1778  switch (ins->handler_idx)
1779  {
1780  case WINED3DSIH_ABS: instruction = "ABS"; break;
1781  case WINED3DSIH_ADD: instruction = "ADD"; break;
1782  case WINED3DSIH_CRS: instruction = "XPD"; break;
1783  case WINED3DSIH_DP3: instruction = "DP3"; break;
1784  case WINED3DSIH_DP4: instruction = "DP4"; break;
1785  case WINED3DSIH_DST: instruction = "DST"; break;
1786  case WINED3DSIH_FRC: instruction = "FRC"; break;
1787  case WINED3DSIH_LIT: instruction = "LIT"; break;
1788  case WINED3DSIH_LRP: instruction = "LRP"; break;
1789  case WINED3DSIH_MAD: instruction = "MAD"; break;
1790  case WINED3DSIH_MAX: instruction = "MAX"; break;
1791  case WINED3DSIH_MIN: instruction = "MIN"; break;
1792  case WINED3DSIH_MOV: instruction = "MOV"; break;
1793  case WINED3DSIH_MUL: instruction = "MUL"; break;
1794  case WINED3DSIH_SGE: instruction = "SGE"; break;
1795  case WINED3DSIH_SLT: instruction = "SLT"; break;
1796  case WINED3DSIH_SUB: instruction = "SUB"; break;
1797  case WINED3DSIH_MOVA:instruction = "ARR"; break;
1798  case WINED3DSIH_DSX: instruction = "DDX"; break;
1799  default: instruction = "";
1800  FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
1801  break;
1802  }
1803 
1804  /* Note that shader_arb_add_dst_param() adds spaces. */
1805  arguments[0] = '\0';
1806  shader_arb_get_dst_param(ins, dst, dst_str);
1807  for (i = 0; i < ins->src_count; ++i)
1808  {
1809  char operand[100];
1810  strcat(arguments, ", ");
1811  shader_arb_get_src_param(ins, &ins->src[i], i, operand);
1812  strcat(arguments, operand);
1813  }
1814  shader_addline(buffer, "%s%s %s%s;\n", instruction, shader_arb_get_modifier(ins), dst_str, arguments);
1815 }
1816 
1817 static void shader_hw_nop(const struct wined3d_shader_instruction *ins) {}
1818 
1820 {
1821  return ((swizzle >> 2 * component) & 0x3) * 0x55;
1822 }
1823 
1824 static void shader_hw_mov(const struct wined3d_shader_instruction *ins)
1825 {
1826  const struct wined3d_shader *shader = ins->ctx->shader;
1827  const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
1828  BOOL pshader = shader_is_pshader_version(reg_maps->shader_version.type);
1829  struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
1830  const char *zero = arb_get_helper_value(reg_maps->shader_version.type, ARB_ZERO);
1831  const char *one = arb_get_helper_value(reg_maps->shader_version.type, ARB_ONE);
1832  const char *two = arb_get_helper_value(reg_maps->shader_version.type, ARB_TWO);
1833 
1834  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1835  char src0_param[256];
1836 
1837  if (ins->handler_idx == WINED3DSIH_MOVA)
1838  {
1839  const struct arb_vshader_private *shader_data = shader->backend_data;
1840  char write_mask[6];
1842 
1843  if(ctx->target_version >= NV2) {
1844  shader_hw_map2gl(ins);
1845  return;
1846  }
1847  shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param);
1848  shader_arb_get_write_mask(ins, &ins->dst[0], write_mask);
1849 
1850  /* This implements the mova formula used in GLSL. The first two instructions
1851  * prepare the sign() part. Note that it is fine to have my_sign(0.0) = 1.0
1852  * in this case:
1853  * mova A0.x, 0.0
1854  *
1855  * A0.x = arl(floor(abs(0.0) + 0.5) * 1.0) = floor(0.5) = 0.0 since arl does a floor
1856  *
1857  * The ARL is performed when A0 is used - the requested component is read from A0_SHADOW into
1858  * A0.x. We can use the overwritten component of A0_shadow as temporary storage for the sign.
1859  */
1860  shader_addline(buffer, "SGE A0_SHADOW%s, %s, %s;\n", write_mask, src0_param, zero);
1861  shader_addline(buffer, "MAD A0_SHADOW%s, A0_SHADOW, %s, -%s;\n", write_mask, two, one);
1862 
1863  shader_addline(buffer, "ABS TA%s, %s;\n", write_mask, src0_param);
1864  shader_addline(buffer, "ADD TA%s, TA, rel_addr_const.x;\n", write_mask);
1865  shader_addline(buffer, "FLR TA%s, TA;\n", write_mask);
1866  if (shader_data->rel_offset)
1867  {
1868  shader_addline(buffer, "ADD TA%s, TA, %s;\n", write_mask, offset);
1869  }
1870  shader_addline(buffer, "MUL A0_SHADOW%s, TA, A0_SHADOW;\n", write_mask);
1871 
1872  ((struct shader_arb_ctx_priv *)ins->ctx->backend_data)->addr_reg[0] = '\0';
1873  }
1874  else if (reg_maps->shader_version.major == 1
1875  && !shader_is_pshader_version(reg_maps->shader_version.type)
1876  && ins->dst[0].reg.type == WINED3DSPR_ADDR)
1877  {
1878  const struct arb_vshader_private *shader_data = shader->backend_data;
1879  src0_param[0] = '\0';
1880 
1881  if (shader_data->rel_offset && ctx->target_version == ARB)
1882  {
1884  shader_arb_get_src_param(ins, &ins->src[0], 0, src0_param);
1885  shader_addline(buffer, "ADD TA.x, %s, %s;\n", src0_param, offset);
1886  shader_addline(buffer, "ARL A0.x, TA.x;\n");
1887  }
1888  else
1889  {
1890  /* Apple's ARB_vertex_program implementation does not accept an ARL source argument
1891  * with more than one component. Thus replicate the first source argument over all
1892  * 4 components. For example, .xyzw -> .x (or better: .xxxx), .zwxy -> .z, etc) */
1893  struct wined3d_shader_src_param tmp_src = ins->src[0];
1894  tmp_src.swizzle = shader_arb_select_component(tmp_src.swizzle, 0);
1895  shader_arb_get_src_param(ins, &tmp_src, 0, src0_param);
1896  shader_addline(buffer, "ARL A0.x, %s;\n", src0_param);
1897  }
1898  }
1899  else if (ins->dst[0].reg.type == WINED3DSPR_COLOROUT && !ins->dst[0].reg.idx[0].offset && pshader)
1900  {
1901  if (ctx->ps_post_process && shader->u.ps.color0_mov)
1902  {
1903  shader_addline(buffer, "#mov handled in srgb write or fog code\n");
1904  return;
1905  }
1906  shader_hw_map2gl(ins);
1907  }
1908  else
1909  {
1910  shader_hw_map2gl(ins);
1911  }
1912 }
1913 
1914 static void pshader_hw_texkill(const struct wined3d_shader_instruction *ins)
1915 {
1916  const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1917  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
1918  char reg_dest[40];
1919 
1920  /* No swizzles are allowed in d3d's texkill. PS 1.x ignores the 4th component as documented,
1921  * but >= 2.0 honors it (undocumented, but tested by the d3d9 testsuite)
1922  */
1923  shader_arb_get_dst_param(ins, dst, reg_dest);
1924 
1925  if (ins->ctx->reg_maps->shader_version.major >= 2)
1926  {
1927  const char *kilsrc = "TA";
1928  BOOL is_color;
1929 
1930  shader_arb_get_register_name(ins, &dst->reg, reg_dest, &is_color);
1932  {
1933  kilsrc = reg_dest;
1934  }
1935  else
1936  {
1937  /* Sigh. KIL doesn't support swizzles/writemasks. KIL passes a writemask, but ".xy" for example
1938  * is not valid as a swizzle in ARB (needs ".xyyy"). Use SWZ to load the register properly, and set
1939  * masked out components to 0(won't kill)
1940  */
1941  char x = '0', y = '0', z = '0', w = '0';
1942  if(dst->write_mask & WINED3DSP_WRITEMASK_0) x = 'x';
1943  if(dst->write_mask & WINED3DSP_WRITEMASK_1) y = 'y';
1944  if(dst->write_mask & WINED3DSP_WRITEMASK_2) z = 'z';
1945  if(dst->write_mask & WINED3DSP_WRITEMASK_3) w = 'w';
1946  shader_addline(buffer, "SWZ TA, %s, %c, %c, %c, %c;\n", reg_dest, x, y, z, w);
1947  }
1948  shader_addline(buffer, "KIL %s;\n", kilsrc);
1949  }
1950  else
1951  {
1952  /* ARB fp doesn't like swizzles on the parameter of the KIL instruction. To mask the 4th component,
1953  * copy the register into our general purpose TMP variable, overwrite .w and pass TMP to KIL
1954  *
1955  * ps_1_3 shaders use the texcoord incarnation of the Tx register. ps_1_4 shaders can use the same,
1956  * or pass in any temporary register(in shader phase 2)
1957  */
1958  if (ins->ctx->reg_maps->shader_version.minor <= 3)
1959  sprintf(reg_dest, "fragment.texcoord[%u]", dst->reg.idx[0].offset);
1960  else
1961  shader_arb_get_dst_param(ins, dst, reg_dest);
1962  shader_addline(buffer, "SWZ TA, %s, x, y, z, 1;\n", reg_dest);
1963  shader_addline(buffer, "KIL TA;\n");
1964  }
1965 }
1966 
1967 static void pshader_hw_tex(const struct wined3d_shader_instruction *ins)
1968 {
1969  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
1970  const struct wined3d_shader_dst_param *dst = &ins->dst[0];
1971  DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
1972  ins->ctx->reg_maps->shader_version.minor);
1973  struct wined3d_shader_src_param src;
1974 
1975  char reg_dest[40];
1976  char reg_coord[40];
1977  DWORD reg_sampler_code;
1978  WORD myflags = 0;
1979  BOOL swizzle_coord = FALSE;
1980 
1981  /* All versions have a destination register */
1982  shader_arb_get_dst_param(ins, dst, reg_dest);
1983 
1984  /* 1.0-1.4: Use destination register number as texture code.
1985  2.0+: Use provided sampler number as texture code. */
1986  if (shader_version < WINED3D_SHADER_VERSION(2,0))
1987  reg_sampler_code = dst->reg.idx[0].offset;
1988  else
1989  reg_sampler_code = ins->src[1].reg.idx[0].offset;
1990 
1991  /* 1.0-1.3: Use the texcoord varying.
1992  1.4+: Use provided coordinate source register. */
1993  if (shader_version < WINED3D_SHADER_VERSION(1,4))
1994  sprintf(reg_coord, "fragment.texcoord[%u]", reg_sampler_code);
1995  else {
1996  /* TEX is the only instruction that can handle DW and DZ natively */
1997  src = ins->src[0];
2000  shader_arb_get_src_param(ins, &src, 0, reg_coord);
2001  }
2002 
2003  /* projection flag:
2004  * 1.1, 1.2, 1.3: Use WINED3D_TSS_TEXTURETRANSFORMFLAGS
2005  * 1.4: Use WINED3DSPSM_DZ or WINED3DSPSM_DW on src[0]
2006  * 2.0+: Use WINED3DSI_TEXLD_PROJECT on the opcode
2007  */
2008  if (shader_version < WINED3D_SHADER_VERSION(1,4))
2009  {
2010  DWORD flags = 0;
2011  if (reg_sampler_code < MAX_TEXTURES)
2012  flags = priv->cur_ps_args->super.tex_transform >> reg_sampler_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT;
2013  if (flags & WINED3D_PSARGS_PROJECTED)
2014  {
2015  myflags |= TEX_PROJ;
2016  if ((flags & ~WINED3D_PSARGS_PROJECTED) == WINED3D_TTFF_COUNT3)
2017  swizzle_coord = TRUE;
2018  }
2019  }
2020  else if (shader_version < WINED3D_SHADER_VERSION(2,0))
2021  {
2022  enum wined3d_shader_src_modifier src_mod = ins->src[0].modifiers;
2023  if (src_mod == WINED3DSPSM_DZ)
2024  {
2025  swizzle_coord = TRUE;
2026  myflags |= TEX_PROJ;
2027  } else if(src_mod == WINED3DSPSM_DW) {
2028  myflags |= TEX_PROJ;
2029  }
2030  } else {
2031  if (ins->flags & WINED3DSI_TEXLD_PROJECT) myflags |= TEX_PROJ;
2032  if (ins->flags & WINED3DSI_TEXLD_BIAS) myflags |= TEX_BIAS;
2033  }
2034 
2035  if (swizzle_coord)
2036  {
2037  /* TXP cannot handle DZ natively, so move the z coordinate to .w.
2038  * reg_coord is a read-only varying register, so we need a temp reg */
2039  shader_addline(ins->ctx->buffer, "SWZ TA, %s, x, y, z, z;\n", reg_coord);
2040  strcpy(reg_coord, "TA");
2041  }
2042 
2043  shader_hw_sample(ins, reg_sampler_code, reg_dest, reg_coord, myflags, NULL, NULL);
2044 }
2045 
2046 static void pshader_hw_texcoord(const struct wined3d_shader_instruction *ins)
2047 {
2048  const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2049  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2050  DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
2051  ins->ctx->reg_maps->shader_version.minor);
2052  char dst_str[50];
2053 
2054  if (shader_version < WINED3D_SHADER_VERSION(1,4))
2055  {
2056  DWORD reg = dst->reg.idx[0].offset;
2057 
2058  shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2059  shader_addline(buffer, "MOV_SAT %s, fragment.texcoord[%u];\n", dst_str, reg);
2060  } else {
2061  char reg_src[40];
2062 
2063  shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src);
2064  shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2065  shader_addline(buffer, "MOV %s, %s;\n", dst_str, reg_src);
2066  }
2067 }
2068 
2069 static void pshader_hw_texreg2ar(const struct wined3d_shader_instruction *ins)
2070 {
2071  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2072  DWORD flags = 0;
2073 
2074  DWORD reg1 = ins->dst[0].reg.idx[0].offset;
2075  char dst_str[50];
2076  char src_str[50];
2077 
2078  /* Note that texreg2ar treats Tx as a temporary register, not as a varying */
2079  shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2080  shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
2081  /* Move .x first in case src_str is "TA" */
2082  shader_addline(buffer, "MOV TA.y, %s.x;\n", src_str);
2083  shader_addline(buffer, "MOV TA.x, %s.w;\n", src_str);
2084  if (reg1 < MAX_TEXTURES)
2085  {
2086  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2087  flags = priv->cur_ps_args->super.tex_transform >> reg1 * WINED3D_PSARGS_TEXTRANSFORM_SHIFT;
2088  }
2089  shader_hw_sample(ins, reg1, dst_str, "TA", flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2090 }
2091 
2092 static void pshader_hw_texreg2gb(const struct wined3d_shader_instruction *ins)
2093 {
2094  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2095 
2096  DWORD reg1 = ins->dst[0].reg.idx[0].offset;
2097  char dst_str[50];
2098  char src_str[50];
2099 
2100  /* Note that texreg2gb treats Tx as a temporary register, not as a varying */
2101  shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2102  shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
2103  shader_addline(buffer, "MOV TA.x, %s.y;\n", src_str);
2104  shader_addline(buffer, "MOV TA.y, %s.z;\n", src_str);
2105  shader_hw_sample(ins, reg1, dst_str, "TA", 0, NULL, NULL);
2106 }
2107 
2109 {
2110  DWORD reg1 = ins->dst[0].reg.idx[0].offset;
2111  char dst_str[50];
2112  char src_str[50];
2113 
2114  /* Note that texreg2rg treats Tx as a temporary register, not as a varying */
2115  shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2116  shader_arb_get_src_param(ins, &ins->src[0], 0, src_str);
2117  shader_hw_sample(ins, reg1, dst_str, src_str, 0, NULL, NULL);
2118 }
2119 
2120 static void pshader_hw_texbem(const struct wined3d_shader_instruction *ins)
2121 {
2122  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2123  const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2124  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2125  char reg_coord[40], dst_reg[50], src_reg[50];
2126  DWORD reg_dest_code;
2127 
2128  /* All versions have a destination register. The Tx where the texture coordinates come
2129  * from is the varying incarnation of the texture register
2130  */
2131  reg_dest_code = dst->reg.idx[0].offset;
2132  shader_arb_get_dst_param(ins, &ins->dst[0], dst_reg);
2133  shader_arb_get_src_param(ins, &ins->src[0], 0, src_reg);
2134  sprintf(reg_coord, "fragment.texcoord[%u]", reg_dest_code);
2135 
2136  /* Sampling the perturbation map in Tsrc was done already, including the signedness correction if needed
2137  * The Tx in which the perturbation map is stored is the tempreg incarnation of the texture register
2138  *
2139  * GL_NV_fragment_program_option could handle this in one instruction via X2D:
2140  * X2D TA.xy, fragment.texcoord, T%u, bumpenvmat%u.xzyw
2141  *
2142  * However, the NV extensions are never enabled for <= 2.0 shaders because of the performance penalty that
2143  * comes with it, and texbem is an 1.x only instruction. No 1.x instruction forces us to enable the NV
2144  * extension.
2145  */
2146  shader_addline(buffer, "SWZ TB, bumpenvmat%d, x, z, 0, 0;\n", reg_dest_code);
2147  shader_addline(buffer, "DP3 TA.x, TB, %s;\n", src_reg);
2148  shader_addline(buffer, "SWZ TB, bumpenvmat%d, y, w, 0, 0;\n", reg_dest_code);
2149  shader_addline(buffer, "DP3 TA.y, TB, %s;\n", src_reg);
2150 
2151  /* with projective textures, texbem only divides the static texture coord, not the displacement,
2152  * so we can't let the GL handle this.
2153  */
2154  if ((priv->cur_ps_args->super.tex_transform >> reg_dest_code * WINED3D_PSARGS_TEXTRANSFORM_SHIFT)
2156  {
2157  shader_addline(buffer, "RCP TB.w, %s.w;\n", reg_coord);
2158  shader_addline(buffer, "MUL TB.xy, %s, TB.w;\n", reg_coord);
2159  shader_addline(buffer, "ADD TA.xy, TA, TB;\n");
2160  } else {
2161  shader_addline(buffer, "ADD TA.xy, TA, %s;\n", reg_coord);
2162  }
2163 
2164  shader_hw_sample(ins, reg_dest_code, dst_reg, "TA", 0, NULL, NULL);
2165 
2166  if (ins->handler_idx == WINED3DSIH_TEXBEML)
2167  {
2168  /* No src swizzles are allowed, so this is ok */
2169  shader_addline(buffer, "MAD TA, %s.z, luminance%d.x, luminance%d.y;\n",
2170  src_reg, reg_dest_code, reg_dest_code);
2171  shader_addline(buffer, "MUL %s, %s, TA;\n", dst_reg, dst_reg);
2172  }
2173 }
2174 
2176 {
2177  DWORD reg = ins->dst[0].reg.idx[0].offset;
2178  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2179  char src0_name[50], dst_name[50];
2180  BOOL is_color;
2181  struct wined3d_shader_register tmp_reg = ins->dst[0].reg;
2182 
2183  shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2184  /* The next instruction will be a texm3x2tex or texm3x2depth that writes to the uninitialized
2185  * T<reg+1> register. Use this register to store the calculated vector
2186  */
2187  tmp_reg.idx[0].offset = reg + 1;
2188  shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color);
2189  shader_addline(buffer, "DP3 %s.x, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name);
2190 }
2191 
2193 {
2194  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2195  DWORD flags;
2196  DWORD reg = ins->dst[0].reg.idx[0].offset;
2197  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2198  char dst_str[50];
2199  char src0_name[50];
2200  char dst_reg[50];
2201  BOOL is_color;
2202 
2203  /* We know that we're writing to the uninitialized T<reg> register, so use it for temporary storage */
2204  shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
2205 
2206  shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2207  shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2208  shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
2209  flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
2210  shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2211 }
2212 
2214 {
2215  struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
2216  DWORD reg = ins->dst[0].reg.idx[0].offset;
2217  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2218  char src0_name[50], dst_name[50];
2219  struct wined3d_shader_register tmp_reg = ins->dst[0].reg;
2220  BOOL is_color;
2221 
2222  /* There are always 2 texm3x3pad instructions followed by one texm3x3[tex,vspec, ...] instruction, with
2223  * incrementing ins->dst[0].register_idx numbers. So the pad instruction already knows the final destination
2224  * register, and this register is uninitialized(otherwise the assembler complains that it is 'redeclared')
2225  */
2226  tmp_reg.idx[0].offset = reg + 2 - tex_mx->current_row;
2227  shader_arb_get_register_name(ins, &tmp_reg, dst_name, &is_color);
2228 
2229  shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2230  shader_addline(buffer, "DP3 %s.%c, fragment.texcoord[%u], %s;\n",
2231  dst_name, 'x' + tex_mx->current_row, reg, src0_name);
2232  tex_mx->texcoord_w[tex_mx->current_row++] = reg;
2233 }
2234 
2236 {
2237  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2238  struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
2239  DWORD flags;
2240  DWORD reg = ins->dst[0].reg.idx[0].offset;
2241  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2242  char dst_str[50];
2243  char src0_name[50], dst_name[50];
2244  BOOL is_color;
2245 
2246  shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
2247  shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2248  shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, reg, src0_name);
2249 
2250  /* Sample the texture using the calculated coordinates */
2251  shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2252  flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
2253  shader_hw_sample(ins, reg, dst_str, dst_name, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2254  tex_mx->current_row = 0;
2255 }
2256 
2258 {
2259  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2260  struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
2261  DWORD flags;
2262  DWORD reg = ins->dst[0].reg.idx[0].offset;
2263  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2264  char dst_str[50];
2265  char src0_name[50];
2266  char dst_reg[50];
2267  BOOL is_color;
2268 
2269  /* Get the dst reg without writemask strings. We know this register is uninitialized, so we can use all
2270  * components for temporary data storage
2271  */
2272  shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
2273  shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2274  shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
2275 
2276  /* Construct the eye-ray vector from w coordinates */
2277  shader_addline(buffer, "MOV TB.x, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[0]);
2278  shader_addline(buffer, "MOV TB.y, fragment.texcoord[%u].w;\n", tex_mx->texcoord_w[1]);
2279  shader_addline(buffer, "MOV TB.z, fragment.texcoord[%u].w;\n", reg);
2280 
2281  /* Calculate reflection vector
2282  */
2283  shader_addline(buffer, "DP3 %s.w, %s, TB;\n", dst_reg, dst_reg);
2284  /* The .w is ignored when sampling, so I can use TB.w to calculate dot(N, N) */
2285  shader_addline(buffer, "DP3 TB.w, %s, %s;\n", dst_reg, dst_reg);
2286  shader_addline(buffer, "RCP TB.w, TB.w;\n");
2287  shader_addline(buffer, "MUL %s.w, %s.w, TB.w;\n", dst_reg, dst_reg);
2288  shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
2289  shader_addline(buffer, "MAD %s, coefmul.x, %s, -TB;\n", dst_reg, dst_reg);
2290 
2291  /* Sample the texture using the calculated coordinates */
2292  shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2293  flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
2294  shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2295  tex_mx->current_row = 0;
2296 }
2297 
2299 {
2300  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2301  struct wined3d_shader_tex_mx *tex_mx = ins->ctx->tex_mx;
2302  DWORD flags;
2303  DWORD reg = ins->dst[0].reg.idx[0].offset;
2304  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2305  char dst_str[50];
2306  char src0_name[50];
2307  char src1_name[50];
2308  char dst_reg[50];
2309  BOOL is_color;
2310 
2311  shader_arb_get_src_param(ins, &ins->src[0], 0, src0_name);
2312  shader_arb_get_src_param(ins, &ins->src[0], 1, src1_name);
2313  shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_reg, &is_color);
2314  /* Note: dst_reg.xy is input here, generated by two texm3x3pad instructions */
2315  shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_reg, reg, src0_name);
2316 
2317  /* Calculate reflection vector.
2318  *
2319  * dot(N, E)
2320  * dst_reg.xyz = 2 * --------- * N - E
2321  * dot(N, N)
2322  *
2323  * Which normalizes the normal vector
2324  */
2325  shader_addline(buffer, "DP3 %s.w, %s, %s;\n", dst_reg, dst_reg, src1_name);
2326  shader_addline(buffer, "DP3 TC.w, %s, %s;\n", dst_reg, dst_reg);
2327  shader_addline(buffer, "RCP TC.w, TC.w;\n");
2328  shader_addline(buffer, "MUL %s.w, %s.w, TC.w;\n", dst_reg, dst_reg);
2329  shader_addline(buffer, "MUL %s, %s.w, %s;\n", dst_reg, dst_reg, dst_reg);
2330  shader_addline(buffer, "MAD %s, coefmul.x, %s, -%s;\n", dst_reg, dst_reg, src1_name);
2331 
2332  /* Sample the texture using the calculated coordinates */
2333  shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2334  flags = reg < MAX_TEXTURES ? priv->cur_ps_args->super.tex_transform >> reg * WINED3D_PSARGS_TEXTRANSFORM_SHIFT : 0;
2335  shader_hw_sample(ins, reg, dst_str, dst_reg, flags & WINED3D_PSARGS_PROJECTED ? TEX_PROJ : 0, NULL, NULL);
2336  tex_mx->current_row = 0;
2337 }
2338 
2339 static void pshader_hw_texdepth(const struct wined3d_shader_instruction *ins)
2340 {
2341  const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2342  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2343  char dst_name[50];
2344  const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
2345  const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
2346 
2347  /* texdepth has an implicit destination, the fragment depth value. It's only parameter,
2348  * which is essentially an input, is the destination register because it is the first
2349  * parameter. According to the msdn, this must be register r5, but let's keep it more flexible
2350  * here(writemasks/swizzles are not valid on texdepth)
2351  */
2352  shader_arb_get_dst_param(ins, dst, dst_name);
2353 
2354  /* According to the msdn, the source register(must be r5) is unusable after
2355  * the texdepth instruction, so we're free to modify it
2356  */
2357  shader_addline(buffer, "MIN %s.y, %s.y, %s;\n", dst_name, dst_name, one);
2358 
2359  /* How to deal with the special case dst_name.g == 0? if r != 0, then
2360  * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct
2361  * result. But if r = 0.0, then 0 * inf = 0, which is incorrect.
2362  */
2363  shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name);
2364  shader_addline(buffer, "MUL TA.x, %s.x, %s.y;\n", dst_name, dst_name);
2365  shader_addline(buffer, "MIN TA.x, TA.x, %s;\n", one);
2366  shader_addline(buffer, "MAX result.depth, TA.x, %s;\n", zero);
2367 }
2368 
2372 static void pshader_hw_texdp3tex(const struct wined3d_shader_instruction *ins)
2373 {
2374  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2375  DWORD sampler_idx = ins->dst[0].reg.idx[0].offset;
2376  char src0[50];
2377  char dst_str[50];
2378 
2379  shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
2380  shader_addline(buffer, "MOV TB, 0.0;\n");
2381  shader_addline(buffer, "DP3 TB.x, fragment.texcoord[%u], %s;\n", sampler_idx, src0);
2382 
2383  shader_arb_get_dst_param(ins, &ins->dst[0], dst_str);
2384  shader_hw_sample(ins, sampler_idx, dst_str, "TB", 0 /* Only one coord, can't be projected */, NULL, NULL);
2385 }
2386 
2389 static void pshader_hw_texdp3(const struct wined3d_shader_instruction *ins)
2390 {
2391  const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2392  char src0[50];
2393  char dst_str[50];
2394  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2395 
2396  /* Handle output register */
2397  shader_arb_get_dst_param(ins, dst, dst_str);
2398  shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
2399  shader_addline(buffer, "DP3 %s, fragment.texcoord[%u], %s;\n", dst_str, dst->reg.idx[0].offset, src0);
2400 }
2401 
2404 static void pshader_hw_texm3x3(const struct wined3d_shader_instruction *ins)
2405 {
2406  const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2407  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2408  char dst_str[50], dst_name[50];
2409  char src0[50];
2410  BOOL is_color;
2411 
2412  shader_arb_get_dst_param(ins, dst, dst_str);
2413  shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
2414  shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
2415  shader_addline(buffer, "DP3 %s.z, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx[0].offset, src0);
2416  shader_addline(buffer, "MOV %s, %s;\n", dst_str, dst_name);
2417 }
2418 
2425 {
2426  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2427  const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2428  char src0[50], dst_name[50];
2429  BOOL is_color;
2430  const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
2431  const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
2432 
2433  shader_arb_get_src_param(ins, &ins->src[0], 0, src0);
2434  shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
2435  shader_addline(buffer, "DP3 %s.y, fragment.texcoord[%u], %s;\n", dst_name, dst->reg.idx[0].offset, src0);
2436 
2437  /* How to deal with the special case dst_name.g == 0? if r != 0, then
2438  * the r * (1 / 0) will give infinity, which is clamped to 1.0, the correct
2439  * result. But if r = 0.0, then 0 * inf = 0, which is incorrect.
2440  */
2441  shader_addline(buffer, "RCP %s.y, %s.y;\n", dst_name, dst_name);
2442  shader_addline(buffer, "MUL %s.x, %s.x, %s.y;\n", dst_name, dst_name, dst_name);
2443  shader_addline(buffer, "MIN %s.x, %s.x, %s;\n", dst_name, dst_name, one);
2444  shader_addline(buffer, "MAX result.depth, %s.x, %s;\n", dst_name, zero);
2445 }
2446 
2449 static void shader_hw_mnxn(const struct wined3d_shader_instruction *ins)
2450 {
2451  int i;
2452  int nComponents = 0;
2453  struct wined3d_shader_dst_param tmp_dst = {{0}};
2454  struct wined3d_shader_src_param tmp_src[2] = {{{0}}};
2455  struct wined3d_shader_instruction tmp_ins;
2456 
2457  memset(&tmp_ins, 0, sizeof(tmp_ins));
2458 
2459  /* Set constants for the temporary argument */
2460  tmp_ins.ctx = ins->ctx;
2461  tmp_ins.dst_count = 1;
2462  tmp_ins.dst = &tmp_dst;
2463  tmp_ins.src_count = 2;
2464  tmp_ins.src = tmp_src;
2465 
2466  switch(ins->handler_idx)
2467  {
2468  case WINED3DSIH_M4x4:
2469  nComponents = 4;
2470  tmp_ins.handler_idx = WINED3DSIH_DP4;
2471  break;
2472  case WINED3DSIH_M4x3:
2473  nComponents = 3;
2474  tmp_ins.handler_idx = WINED3DSIH_DP4;
2475  break;
2476  case WINED3DSIH_M3x4:
2477  nComponents = 4;
2478  tmp_ins.handler_idx = WINED3DSIH_DP3;
2479  break;
2480  case WINED3DSIH_M3x3:
2481  nComponents = 3;
2482  tmp_ins.handler_idx = WINED3DSIH_DP3;
2483  break;
2484  case WINED3DSIH_M3x2:
2485  nComponents = 2;
2486  tmp_ins.handler_idx = WINED3DSIH_DP3;
2487  break;
2488  default:
2489  FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
2490  break;
2491  }
2492 
2493  tmp_dst = ins->dst[0];
2494  tmp_src[0] = ins->src[0];
2495  tmp_src[1] = ins->src[1];
2496  for (i = 0; i < nComponents; ++i)
2497  {
2498  tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i;
2499  shader_hw_map2gl(&tmp_ins);
2500  ++tmp_src[1].reg.idx[0].offset;
2501  }
2502 }
2503 
2504 static DWORD abs_modifier(DWORD mod, BOOL *need_abs)
2505 {
2506  *need_abs = FALSE;
2507 
2508  switch(mod)
2509  {
2510  case WINED3DSPSM_NONE: return WINED3DSPSM_ABS;
2511  case WINED3DSPSM_NEG: return WINED3DSPSM_ABS;
2512  case WINED3DSPSM_BIAS: *need_abs = TRUE; return WINED3DSPSM_BIAS;
2513  case WINED3DSPSM_BIASNEG: *need_abs = TRUE; return WINED3DSPSM_BIASNEG;
2514  case WINED3DSPSM_SIGN: *need_abs = TRUE; return WINED3DSPSM_SIGN;
2515  case WINED3DSPSM_SIGNNEG: *need_abs = TRUE; return WINED3DSPSM_SIGNNEG;
2516  case WINED3DSPSM_COMP: *need_abs = TRUE; return WINED3DSPSM_COMP;
2517  case WINED3DSPSM_X2: *need_abs = TRUE; return WINED3DSPSM_X2;
2518  case WINED3DSPSM_X2NEG: *need_abs = TRUE; return WINED3DSPSM_X2NEG;
2519  case WINED3DSPSM_DZ: *need_abs = TRUE; return WINED3DSPSM_DZ;
2520  case WINED3DSPSM_DW: *need_abs = TRUE; return WINED3DSPSM_DW;
2521  case WINED3DSPSM_ABS: return WINED3DSPSM_ABS;
2522  case WINED3DSPSM_ABSNEG: return WINED3DSPSM_ABS;
2523  }
2524  FIXME("Unknown modifier %u\n", mod);
2525  return mod;
2526 }
2527 
2528 static void shader_hw_scalar_op(const struct wined3d_shader_instruction *ins)
2529 {
2530  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2531  const char *instruction;
2532  struct wined3d_shader_src_param src0_copy = ins->src[0];
2533  BOOL need_abs = FALSE;
2534 
2535  char dst[50];
2536  char src[50];
2537 
2538  switch(ins->handler_idx)
2539  {
2540  case WINED3DSIH_RSQ: instruction = "RSQ"; break;
2541  case WINED3DSIH_RCP: instruction = "RCP"; break;
2542  case WINED3DSIH_EXPP:
2543  if (ins->ctx->reg_maps->shader_version.major < 2)
2544  {
2545  instruction = "EXP";
2546  break;
2547  }
2548  /* Drop through. */
2549  case WINED3DSIH_EXP:
2550  instruction = "EX2";
2551  break;
2552  case WINED3DSIH_LOG:
2553  case WINED3DSIH_LOGP:
2554  /* The precision requirements suggest that LOGP matches ARBvp's LOG
2555  * instruction, but notice that the output of those instructions is
2556  * different. */
2557  src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs);
2558  instruction = "LG2";
2559  break;
2560  default: instruction = "";
2561  FIXME("Unhandled opcode %s.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
2562  break;
2563  }
2564 
2565  /* Dx sdk says .x is used if no swizzle is given, but our test shows that
2566  * .w is used. */
2567  src0_copy.swizzle = shader_arb_select_component(src0_copy.swizzle, 3);
2568 
2569  shader_arb_get_dst_param(ins, &ins->dst[0], dst); /* Destination */
2570  shader_arb_get_src_param(ins, &src0_copy, 0, src);
2571 
2572  if(need_abs)
2573  {
2574  shader_addline(buffer, "ABS TA.w, %s;\n", src);
2575  shader_addline(buffer, "%s%s %s, TA.w;\n", instruction, shader_arb_get_modifier(ins), dst);
2576  }
2577  else
2578  {
2579  shader_addline(buffer, "%s%s %s, %s;\n", instruction, shader_arb_get_modifier(ins), dst, src);
2580  }
2581 
2582 }
2583 
2584 static void shader_hw_nrm(const struct wined3d_shader_instruction *ins)
2585 {
2586  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2587  char dst_name[50];
2588  char src_name[50];
2589  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2590  BOOL pshader = shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type);
2591  const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
2592 
2593  shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
2594  shader_arb_get_src_param(ins, &ins->src[0], 1 /* Use TB */, src_name);
2595 
2596  /* In D3D, NRM of a vector with length zero returns zero. Catch this situation, as
2597  * otherwise NRM or RSQ would return NaN */
2598  if(pshader && priv->target_version >= NV3)
2599  {
2600  /* GL_NV_fragment_program2's NRM needs protection against length zero vectors too
2601  *
2602  * TODO: Find out if DP3+NRM+MOV is really faster than DP3+RSQ+MUL
2603  */
2604  shader_addline(buffer, "DP3C TA, %s, %s;\n", src_name, src_name);
2605  shader_addline(buffer, "NRM%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name);
2606  shader_addline(buffer, "MOV %s (EQ), %s;\n", dst_name, zero);
2607  }
2608  else if(priv->target_version >= NV2)
2609  {
2610  shader_addline(buffer, "DP3C TA.x, %s, %s;\n", src_name, src_name);
2611  shader_addline(buffer, "RSQ TA.x (NE), TA.x;\n");
2612  shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name,
2613  src_name);
2614  }
2615  else
2616  {
2617  const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
2618 
2619  shader_addline(buffer, "DP3 TA.x, %s, %s;\n", src_name, src_name);
2620  /* Pass any non-zero value to RSQ if the input vector has a length of zero. The
2621  * RSQ result doesn't matter, as long as multiplying it by 0 returns 0.
2622  */
2623  shader_addline(buffer, "SGE TA.y, -TA.x, %s;\n", zero);
2624  shader_addline(buffer, "MAD TA.x, %s, TA.y, TA.x;\n", one);
2625 
2626  shader_addline(buffer, "RSQ TA.x, TA.x;\n");
2627  /* dst.w = src[0].w * 1 / (src.x^2 + src.y^2 + src.z^2)^(1/2) according to msdn*/
2628  shader_addline(buffer, "MUL%s %s, %s, TA.x;\n", shader_arb_get_modifier(ins), dst_name,
2629  src_name);
2630  }
2631 }
2632 
2633 static void shader_hw_lrp(const struct wined3d_shader_instruction *ins)
2634 {
2635  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2636  char dst_name[50];
2637  char src_name[3][50];
2638 
2639  /* ARB_fragment_program has a convenient LRP instruction */
2640  if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) {
2641  shader_hw_map2gl(ins);
2642  return;
2643  }
2644 
2645  shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
2646  shader_arb_get_src_param(ins, &ins->src[0], 0, src_name[0]);
2647  shader_arb_get_src_param(ins, &ins->src[1], 1, src_name[1]);
2648  shader_arb_get_src_param(ins, &ins->src[2], 2, src_name[2]);
2649 
2650  shader_addline(buffer, "SUB TA, %s, %s;\n", src_name[1], src_name[2]);
2651  shader_addline(buffer, "MAD%s %s, %s, TA, %s;\n", shader_arb_get_modifier(ins),
2652  dst_name, src_name[0], src_name[2]);
2653 }
2654 
2655 static void shader_hw_sincos(const struct wined3d_shader_instruction *ins)
2656 {
2657  /* This instruction exists in ARB, but the d3d instruction takes two extra parameters which
2658  * must contain fixed constants. So we need a separate function to filter those constants and
2659  * can't use map2gl
2660  */
2661  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2662  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2663  const struct wined3d_shader_dst_param *dst = &ins->dst[0];
2664  char dst_name[50];
2665  char src_name0[50], src_name1[50], src_name2[50];
2666  BOOL is_color;
2667 
2668  shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
2669  if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) {
2670  shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
2671  /* No modifiers are supported on SCS */
2672  shader_addline(buffer, "SCS %s, %s;\n", dst_name, src_name0);
2673 
2674  if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE)
2675  {
2676  shader_arb_get_register_name(ins, &dst->reg, src_name0, &is_color);
2677  shader_addline(buffer, "MOV_SAT %s, %s;\n", dst_name, src_name0);
2678  }
2679  } else if(priv->target_version >= NV2) {
2680  shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color);
2681 
2682  /* Sincos writemask must be .x, .y or .xy */
2684  shader_addline(buffer, "COS%s %s.x, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0);
2686  shader_addline(buffer, "SIN%s %s.y, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0);
2687  } else {
2688  /* Approximate sine and cosine with a taylor series, as per math textbook. The application passes 8
2689  * helper constants(D3DSINCOSCONST1 and D3DSINCOSCONST2) in src1 and src2.
2690  *
2691  * sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ...
2692  * cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + ...
2693  *
2694  * The constants we get are:
2695  *
2696  * +1 +1, -1 -1 +1 +1 -1 -1
2697  * ---- , ---- , ---- , ----- , ----- , ----- , ------
2698  * 1!*2 2!*4 3!*8 4!*16 5!*32 6!*64 7!*128
2699  *
2700  * If used with x^2, x^3, x^4 etc they calculate sin(x/2) and cos(x/2):
2701  *
2702  * (x/2)^2 = x^2 / 4
2703  * (x/2)^3 = x^3 / 8
2704  * (x/2)^4 = x^4 / 16
2705  * (x/2)^5 = x^5 / 32
2706  * etc
2707  *
2708  * To get the final result:
2709  * sin(x) = 2 * sin(x/2) * cos(x/2)
2710  * cos(x) = cos(x/2)^2 - sin(x/2)^2
2711  * (from sin(x+y) and cos(x+y) rules)
2712  *
2713  * As per MSDN, dst.z is undefined after the operation, and so is
2714  * dst.x and dst.y if they're masked out by the writemask. Ie
2715  * sincos dst.y, src1, c0, c1
2716  * returns the sine in dst.y. dst.x and dst.z are undefined, dst.w is not touched. The assembler
2717  * vsa.exe also stops with an error if the dest register is the same register as the source
2718  * register. This means we can use dest.xyz as temporary storage. The assembler vsa.exe output also
2719  * indicates that sincos consumes 8 instruction slots in vs_2_0(and, strangely, in vs_3_0).
2720  */
2721  shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);
2722  shader_arb_get_src_param(ins, &ins->src[2], 2, src_name2);
2723  shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color);
2724 
2725  shader_addline(buffer, "MUL %s.x, %s, %s;\n", dst_name, src_name0, src_name0); /* x ^ 2 */
2726  shader_addline(buffer, "MUL TA.y, %s.x, %s;\n", dst_name, src_name0); /* x ^ 3 */
2727  shader_addline(buffer, "MUL %s.y, TA.y, %s;\n", dst_name, src_name0); /* x ^ 4 */
2728  shader_addline(buffer, "MUL TA.z, %s.y, %s;\n", dst_name, src_name0); /* x ^ 5 */
2729  shader_addline(buffer, "MUL %s.z, TA.z, %s;\n", dst_name, src_name0); /* x ^ 6 */
2730  shader_addline(buffer, "MUL TA.w, %s.z, %s;\n", dst_name, src_name0); /* x ^ 7 */
2731 
2732  /* sin(x/2)
2733  *
2734  * Unfortunately we don't get the constants in a DP4-capable form. Is there a way to
2735  * properly merge that with MULs in the code above?
2736  * The swizzles .yz and xw however fit into the .yzxw swizzle added to ps_2_0. Maybe
2737  * we can merge the sine and cosine MAD rows to calculate them together.
2738  */
2739  shader_addline(buffer, "MUL TA.x, %s, %s.w;\n", src_name0, src_name2); /* x^1, +1/(1!*2) */
2740  shader_addline(buffer, "MAD TA.x, TA.y, %s.x, TA.x;\n", src_name2); /* -1/(3!*8) */
2741  shader_addline(buffer, "MAD TA.x, TA.z, %s.w, TA.x;\n", src_name1); /* +1/(5!*32) */
2742  shader_addline(buffer, "MAD TA.x, TA.w, %s.x, TA.x;\n", src_name1); /* -1/(7!*128) */
2743 
2744  /* cos(x/2) */
2745  shader_addline(buffer, "MAD TA.y, %s.x, %s.y, %s.z;\n", dst_name, src_name2, src_name2); /* -1/(2!*4), +1.0 */
2746  shader_addline(buffer, "MAD TA.y, %s.y, %s.z, TA.y;\n", dst_name, src_name1); /* +1/(4!*16) */
2747  shader_addline(buffer, "MAD TA.y, %s.z, %s.y, TA.y;\n", dst_name, src_name1); /* -1/(6!*64) */
2748 
2749  if(dst->write_mask & WINED3DSP_WRITEMASK_0) {
2750  /* cos x */
2751  shader_addline(buffer, "MUL TA.z, TA.y, TA.y;\n");
2752  shader_addline(buffer, "MAD %s.x, -TA.x, TA.x, TA.z;\n", dst_name);
2753  }
2754  if(dst->write_mask & WINED3DSP_WRITEMASK_1) {
2755  /* sin x */
2756  shader_addline(buffer, "MUL %s.y, TA.x, TA.y;\n", dst_name);
2757  shader_addline(buffer, "ADD %s.y, %s.y, %s.y;\n", dst_name, dst_name, dst_name);
2758  }
2759  }
2760 }
2761 
2762 static void shader_hw_sgn(const struct wined3d_shader_instruction *ins)
2763 {
2764  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2765  char dst_name[50];
2766  char src_name[50];
2767  struct shader_arb_ctx_priv *ctx = ins->ctx->backend_data;
2768 
2769  shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
2770  shader_arb_get_src_param(ins, &ins->src[0], 0, src_name);
2771 
2772  /* SGN is only valid in vertex shaders */
2773  if(ctx->target_version >= NV2) {
2774  shader_addline(buffer, "SSG%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name);
2775  return;
2776  }
2777 
2778  /* If SRC > 0.0, -SRC < SRC = TRUE, otherwise false.
2779  * if SRC < 0.0, SRC < -SRC = TRUE. If neither is true, src = 0.0
2780  */
2781  if(ins->dst[0].modifiers & WINED3DSPDM_SATURATE) {
2782  shader_addline(buffer, "SLT %s, -%s, %s;\n", dst_name, src_name, src_name);
2783  } else {
2784  /* src contains TA? Write to the dest first. This won't overwrite our destination.
2785  * Then use TA, and calculate the final result
2786  *
2787  * Not reading from TA? Store the first result in TA to avoid overwriting the
2788  * destination if src reg = dst reg
2789  */
2790  if(strstr(src_name, "TA"))
2791  {
2792  shader_addline(buffer, "SLT %s, %s, -%s;\n", dst_name, src_name, src_name);
2793  shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name);
2794  shader_addline(buffer, "ADD %s, %s, -TA;\n", dst_name, dst_name);
2795  }
2796  else
2797  {
2798  shader_addline(buffer, "SLT TA, -%s, %s;\n", src_name, src_name);
2799  shader_addline(buffer, "SLT %s, %s, -%s;\n", dst_name, src_name, src_name);
2800  shader_addline(buffer, "ADD %s, TA, -%s;\n", dst_name, dst_name);
2801  }
2802  }
2803 }
2804 
2805 static void shader_hw_dsy(const struct wined3d_shader_instruction *ins)
2806 {
2807  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2808  char src[50];
2809  char dst[50];
2810  char dst_name[50];
2811  BOOL is_color;
2812 
2813  shader_arb_get_dst_param(ins, &ins->dst[0], dst);
2814  shader_arb_get_src_param(ins, &ins->src[0], 0, src);
2815  shader_arb_get_register_name(ins, &ins->dst[0].reg, dst_name, &is_color);
2816 
2817  shader_addline(buffer, "DDY %s, %s;\n", dst, src);
2818  shader_addline(buffer, "MUL%s %s, %s, ycorrection.y;\n", shader_arb_get_modifier(ins), dst, dst_name);
2819 }
2820 
2821 static void shader_hw_pow(const struct wined3d_shader_instruction *ins)
2822 {
2823  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2824  char src0[50], src1[50], dst[50];
2825  struct wined3d_shader_src_param src0_copy = ins->src[0];
2826  BOOL need_abs = FALSE;
2827  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2828  const char *one = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ONE);
2829 
2830  /* POW operates on the absolute value of the input */
2831  src0_copy.modifiers = abs_modifier(src0_copy.modifiers, &need_abs);
2832 
2833  shader_arb_get_dst_param(ins, &ins->dst[0], dst);
2834  shader_arb_get_src_param(ins, &src0_copy, 0, src0);
2835  shader_arb_get_src_param(ins, &ins->src[1], 1, src1);
2836 
2837  if (need_abs)
2838  shader_addline(buffer, "ABS TA.x, %s;\n", src0);
2839  else
2840  shader_addline(buffer, "MOV TA.x, %s;\n", src0);
2841 
2842  if (priv->target_version >= NV2)
2843  {
2844  shader_addline(buffer, "MOVC TA.y, %s;\n", src1);
2845  shader_addline(buffer, "POW%s %s, TA.x, TA.y;\n", shader_arb_get_modifier(ins), dst);
2846  shader_addline(buffer, "MOV %s (EQ.y), %s;\n", dst, one);
2847  }
2848  else
2849  {
2850  const char *zero = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_ZERO);
2851  const char *flt_eps = arb_get_helper_value(ins->ctx->reg_maps->shader_version.type, ARB_EPS);
2852 
2853  shader_addline(buffer, "ABS TA.y, %s;\n", src1);
2854  shader_addline(buffer, "SGE TA.y, -TA.y, %s;\n", zero);
2855  /* Possibly add flt_eps to avoid getting float special values */
2856  shader_addline(buffer, "MAD TA.z, TA.y, %s, %s;\n", flt_eps, src1);
2857  shader_addline(buffer, "POW%s TA.x, TA.x, TA.z;\n", shader_arb_get_modifier(ins));
2858  shader_addline(buffer, "MAD TA.x, -TA.x, TA.y, TA.x;\n");
2859  shader_addline(buffer, "MAD %s, TA.y, %s, TA.x;\n", dst, one);
2860  }
2861 }
2862 
2863 static void shader_hw_loop(const struct wined3d_shader_instruction *ins)
2864 {
2865  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2866  char src_name[50];
2867  BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
2868 
2869  /* src0 is aL */
2870  shader_arb_get_src_param(ins, &ins->src[1], 0, src_name);
2871 
2872  if(vshader)
2873  {
2874  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2875  struct list *e = list_head(&priv->control_frames);
2876  struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
2877 
2878  if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n");
2879  /* The constant loader makes sure to load -1 into iX.w */
2880  shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name);
2881  shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop);
2882  shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop);
2883  }
2884  else
2885  {
2886  shader_addline(buffer, "LOOP %s;\n", src_name);
2887  }
2888 }
2889 
2890 static void shader_hw_rep(const struct wined3d_shader_instruction *ins)
2891 {
2892  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2893  char src_name[50];
2894  BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
2895 
2896  shader_arb_get_src_param(ins, &ins->src[0], 0, src_name);
2897 
2898  /* The constant loader makes sure to load -1 into iX.w */
2899  if(vshader)
2900  {
2901  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2902  struct list *e = list_head(&priv->control_frames);
2903  struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
2904 
2905  if(priv->loop_depth > 1) shader_addline(buffer, "PUSHA aL;\n");
2906 
2907  shader_addline(buffer, "ARLC aL, %s.xywz;\n", src_name);
2908  shader_addline(buffer, "BRA loop_%u_end (LE.x);\n", control_frame->no.loop);
2909  shader_addline(buffer, "loop_%u_start:\n", control_frame->no.loop);
2910  }
2911  else
2912  {
2913  shader_addline(buffer, "REP %s;\n", src_name);
2914  }
2915 }
2916 
2917 static void shader_hw_endloop(const struct wined3d_shader_instruction *ins)
2918 {
2919  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2920  BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
2921 
2922  if(vshader)
2923  {
2924  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2925  struct list *e = list_head(&priv->control_frames);
2926  struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
2927 
2928  shader_addline(buffer, "ARAC aL.xy, aL;\n");
2929  shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop);
2930  shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop);
2931 
2932  if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n");
2933  }
2934  else
2935  {
2936  shader_addline(buffer, "ENDLOOP;\n");
2937  }
2938 }
2939 
2940 static void shader_hw_endrep(const struct wined3d_shader_instruction *ins)
2941 {
2942  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2943  BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
2944 
2945  if(vshader)
2946  {
2947  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
2948  struct list *e = list_head(&priv->control_frames);
2949  struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
2950 
2951  shader_addline(buffer, "ARAC aL.xy, aL;\n");
2952  shader_addline(buffer, "BRA loop_%u_start (GT.x);\n", control_frame->no.loop);
2953  shader_addline(buffer, "loop_%u_end:\n", control_frame->no.loop);
2954 
2955  if(priv->loop_depth > 1) shader_addline(buffer, "POPA aL;\n");
2956  }
2957  else
2958  {
2959  shader_addline(buffer, "ENDREP;\n");
2960  }
2961 }
2962 
2963 static const struct control_frame *find_last_loop(const struct shader_arb_ctx_priv *priv)
2964 {
2965  struct control_frame *control_frame;
2966 
2967  LIST_FOR_EACH_ENTRY(control_frame, &priv->control_frames, struct control_frame, entry)
2968  {
2969  if(control_frame->type == LOOP || control_frame->type == REP) return control_frame;
2970  }
2971  ERR("Could not find loop for break\n");
2972  return NULL;
2973 }
2974 
2975 static void shader_hw_break(const struct wined3d_shader_instruction *ins)
2976 {
2977  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
2978  const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data);
2979  BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
2980 
2981  if(vshader)
2982  {
2983  shader_addline(buffer, "BRA loop_%u_end;\n", control_frame->no.loop);
2984  }
2985  else
2986  {
2987  shader_addline(buffer, "BRK;\n");
2988  }
2989 }
2990 
2991 static const char *get_compare(enum wined3d_shader_rel_op op)
2992 {
2993  switch (op)
2994  {
2995  case WINED3D_SHADER_REL_OP_GT: return "GT";
2996  case WINED3D_SHADER_REL_OP_EQ: return "EQ";
2997  case WINED3D_SHADER_REL_OP_GE: return "GE";
2998  case WINED3D_SHADER_REL_OP_LT: return "LT";
2999  case WINED3D_SHADER_REL_OP_NE: return "NE";
3000  case WINED3D_SHADER_REL_OP_LE: return "LE";
3001  default:
3002  FIXME("Unrecognized operator %#x.\n", op);
3003  return "(\?\?)";
3004  }
3005 }
3006 
3008 {
3009  switch (op)
3010  {
3017  default:
3018  FIXME("Unrecognized operator %#x.\n", op);
3019  return -1;
3020  }
3021 }
3022 
3023 static void shader_hw_breakc(const struct wined3d_shader_instruction *ins)
3024 {
3025  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3026  BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
3027  const struct control_frame *control_frame = find_last_loop(ins->ctx->backend_data);
3028  char src_name0[50];
3029  char src_name1[50];
3030  const char *comp = get_compare(ins->flags);
3031 
3032  shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
3033  shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);
3034 
3035  if(vshader)
3036  {
3037  /* SUBC CC, src0, src1" works only in pixel shaders, so use TA to throw
3038  * away the subtraction result
3039  */
3040  shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
3041  shader_addline(buffer, "BRA loop_%u_end (%s.x);\n", control_frame->no.loop, comp);
3042  }
3043  else
3044  {
3045  shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
3046  shader_addline(buffer, "BRK (%s.x);\n", comp);
3047  }
3048 }
3049 
3050 static void shader_hw_ifc(const struct wined3d_shader_instruction *ins)
3051 {
3052  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3053  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
3054  struct list *e = list_head(&priv->control_frames);
3055  struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
3056  const char *comp;
3057  char src_name0[50];
3058  char src_name1[50];
3059  BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
3060 
3061  shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
3062  shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);
3063 
3064  if(vshader)
3065  {
3066  /* Invert the flag. We jump to the else label if the condition is NOT true */
3067  comp = get_compare(invert_compare(ins->flags));
3068  shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
3069  shader_addline(buffer, "BRA ifc_%u_else (%s.x);\n", control_frame->no.ifc, comp);
3070  }
3071  else
3072  {
3073  comp = get_compare(ins->flags);
3074  shader_addline(buffer, "SUBC TA, %s, %s;\n", src_name0, src_name1);
3075  shader_addline(buffer, "IF %s.x;\n", comp);
3076  }
3077 }
3078 
3079 static void shader_hw_else(const struct wined3d_shader_instruction *ins)
3080 {
3081  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3082  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
3083  struct list *e = list_head(&priv->control_frames);
3084  struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
3085  BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
3086 
3087  if(vshader)
3088  {
3089  shader_addline(buffer, "BRA ifc_%u_endif;\n", control_frame->no.ifc);
3090  shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc);
3091  control_frame->had_else = TRUE;
3092  }
3093  else
3094  {
3095  shader_addline(buffer, "ELSE;\n");
3096  }
3097 }
3098 
3099 static void shader_hw_endif(const struct wined3d_shader_instruction *ins)
3100 {
3101  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3102  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
3103  struct list *e = list_head(&priv->control_frames);
3104  struct control_frame *control_frame = LIST_ENTRY(e, struct control_frame, entry);
3105  BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
3106 
3107  if(vshader)
3108  {
3109  if(control_frame->had_else)
3110  {
3111  shader_addline(buffer, "ifc_%u_endif:\n", control_frame->no.ifc);
3112  }
3113  else
3114  {
3115  shader_addline(buffer, "#No else branch. else is endif\n");
3116  shader_addline(buffer, "ifc_%u_else:\n", control_frame->no.ifc);
3117  }
3118  }
3119  else
3120  {
3121  shader_addline(buffer, "ENDIF;\n");
3122  }
3123 }
3124 
3125 static void shader_hw_texldd(const struct wined3d_shader_instruction *ins)
3126 {
3127  DWORD sampler_idx = ins->src[1].reg.idx[0].offset;
3128  char reg_dest[40];
3129  char reg_src[3][40];
3130  WORD flags = TEX_DERIV;
3131 
3132  shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest);
3133  shader_arb_get_src_param(ins, &ins->src[0], 0, reg_src[0]);
3134  shader_arb_get_src_param(ins, &ins->src[2], 1, reg_src[1]);
3135  shader_arb_get_src_param(ins, &ins->src[3], 2, reg_src[2]);
3136 
3137  if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ;
3138  if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS;
3139 
3140  shader_hw_sample(ins, sampler_idx, reg_dest, reg_src[0], flags, reg_src[1], reg_src[2]);
3141 }
3142 
3143 static void shader_hw_texldl(const struct wined3d_shader_instruction *ins)
3144 {
3145  DWORD sampler_idx = ins->src[1].reg.idx[0].offset;
3146  char reg_dest[40];
3147  char reg_coord[40];
3148  WORD flags = TEX_LOD;
3149 
3150  shader_arb_get_dst_param(ins, &ins->dst[0], reg_dest);
3151  shader_arb_get_src_param(ins, &ins->src[0], 0, reg_coord);
3152 
3153  if (ins->flags & WINED3DSI_TEXLD_PROJECT) flags |= TEX_PROJ;
3154  if (ins->flags & WINED3DSI_TEXLD_BIAS) flags |= TEX_BIAS;
3155 
3156  shader_hw_sample(ins, sampler_idx, reg_dest, reg_coord, flags, NULL, NULL);
3157 }
3158 
3159 static void shader_hw_label(const struct wined3d_shader_instruction *ins)
3160 {
3161  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3162  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
3163 
3164  priv->in_main_func = FALSE;
3165  /* Call instructions activate the NV extensions, not labels and rets. If there is an uncalled
3166  * subroutine, don't generate a label that will make GL complain
3167  */
3168  if(priv->target_version == ARB) return;
3169 
3170  shader_addline(buffer, "l%u:\n", ins->src[0].reg.idx[0].offset);
3171 }
3172 
3173 static void vshader_add_footer(struct shader_arb_ctx_priv *priv_ctx,
3174  const struct arb_vshader_private *shader_data, const struct arb_vs_compile_args *args,
3175  const struct wined3d_shader_reg_maps *reg_maps, const struct wined3d_gl_info *gl_info,
3176  struct wined3d_string_buffer *buffer)
3177 {
3178  unsigned int i;
3179 
3180  /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used
3181  * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE),
3182  * the fog frag coord is thrown away. If the fog frag coord is used, but not written by
3183  * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0)
3184  */
3185  if (args->super.fog_src == VS_FOG_Z)
3186  {
3187  shader_addline(buffer, "MOV result.fogcoord, TMP_OUT.z;\n");
3188  }
3189  else
3190  {
3191  if (!reg_maps->fog)
3192  {
3193  /* posFixup.x is always 1.0, so we can safely use it */
3194  shader_addline(buffer, "ADD result.fogcoord, posFixup.x, -posFixup.x;\n");
3195  }
3196  else
3197  {
3198  /* Clamp fogcoord */
3199  const char *zero = arb_get_helper_value(reg_maps->shader_version.type, ARB_ZERO);
3200  const char *one = arb_get_helper_value(reg_maps->shader_version.type, ARB_ONE);
3201 
3202  shader_addline(buffer, "MIN TMP_FOGCOORD.x, TMP_FOGCOORD.x, %s;\n", one);
3203  shader_addline(buffer, "MAX result.fogcoord.x, TMP_FOGCOORD.x, %s;\n", zero);
3204  }
3205  }
3206 
3207  /* Clipplanes are always stored without y inversion */
3208  if (use_nv_clip(gl_info) && priv_ctx->target_version >= NV2)
3209  {
3210  if (args->super.clip_enabled)
3211  {
3212  for (i = 0; i < priv_ctx->vs_clipplanes; i++)
3213  {
3214  shader_addline(buffer, "DP4 result.clip[%u].x, TMP_OUT, state.clip[%u].plane;\n", i, i);
3215  }
3216  }
3217  }
3218  else if (args->clip.boolclip.clip_texcoord)
3219  {
3220  static const char component[4] = {'x', 'y', 'z', 'w'};
3221  unsigned int cur_clip = 0;
3223 
3224  for (i = 0; i < gl_info->limits.user_clip_distances; ++i)
3225  {
3226  if (args->clip.boolclip.clipplane_mask & (1u << i))
3227  {
3228  shader_addline(buffer, "DP4 TA.%c, TMP_OUT, state.clip[%u].plane;\n",
3229  component[cur_clip++], i);
3230  }
3231  }
3232  switch (cur_clip)
3233  {
3234  case 0:
3235  shader_addline(buffer, "MOV TA, %s;\n", zero);
3236  break;
3237  case 1:
3238  shader_addline(buffer, "MOV TA.yzw, %s;\n", zero);
3239  break;
3240  case 2:
3241  shader_addline(buffer, "MOV TA.zw, %s;\n", zero);
3242  break;
3243  case 3:
3244  shader_addline(buffer, "MOV TA.w, %s;\n", zero);
3245  break;
3246  }
3247  shader_addline(buffer, "MOV result.texcoord[%u], TA;\n",
3248  args->clip.boolclip.clip_texcoord - 1);
3249  }
3250 
3251  /* Write the final position.
3252  *
3253  * OpenGL coordinates specify the center of the pixel while d3d coords specify
3254  * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
3255  * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
3256  * contains 1.0 to allow a mad, but arb vs swizzles are too restricted for that.
3257  */
3258  if (!gl_info->supported[ARB_CLIP_CONTROL])
3259  {
3260  shader_addline(buffer, "MUL TA, posFixup, TMP_OUT.w;\n");
3261  shader_addline(buffer, "ADD TMP_OUT.x, TMP_OUT.x, TA.z;\n");
3262  shader_addline(buffer, "MAD TMP_OUT.y, TMP_OUT.y, posFixup.y, TA.w;\n");
3263 
3264  /* Z coord [0;1]->[-1;1] mapping, see comment in
3265  * get_projection_matrix() in utils.c. */
3266  if (need_helper_const(shader_data, reg_maps, gl_info))
3267  {
3269  shader_addline(buffer, "MAD TMP_OUT.z, TMP_OUT.z, %s, -TMP_OUT.w;\n", two);
3270  }
3271  else
3272  {
3273  shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, TMP_OUT.z;\n");
3274  shader_addline(buffer, "ADD TMP_OUT.z, TMP_OUT.z, -TMP_OUT.w;\n");
3275  }
3276  }
3277 
3278  shader_addline(buffer, "MOV result.position, TMP_OUT;\n");
3279 
3280  priv_ctx->footer_written = TRUE;
3281 }
3282 
3283 static void shader_hw_ret(const struct wined3d_shader_instruction *ins)
3284 {
3285  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3286  struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
3287  const struct wined3d_shader *shader = ins->ctx->shader;
3288  BOOL vshader = shader_is_vshader_version(ins->ctx->reg_maps->shader_version.type);
3289 
3290  if(priv->target_version == ARB) return;
3291 
3292  if(vshader)
3293  {
3294  if (priv->in_main_func) vshader_add_footer(priv, shader->backend_data,
3295  priv->cur_vs_args, ins->ctx->reg_maps, ins->ctx->gl_info, buffer);
3296  }
3297 
3298  shader_addline(buffer, "RET;\n");
3299 }
3300 
3301 static void shader_hw_call(const struct wined3d_shader_instruction *ins)
3302 {
3303  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3304  shader_addline(buffer, "CAL l%u;\n", ins->src[0].reg.idx[0].offset);
3305 }
3306 
3307 static BOOL shader_arb_compile(const struct wined3d_gl_info *gl_info, GLenum target, const char *src)
3308 {
3309  const char *ptr, *line;
3310  GLint native, pos;
3311 
3312  if (TRACE_ON(d3d_shader))
3313  {
3314  ptr = src;
3315  while ((line = get_line(&ptr))) TRACE_(d3d_shader)(" %.*s", (int)(ptr - line), line);
3316  }
3317 
3318  GL_EXTCALL(glProgramStringARB(target, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(src), src));
3319  checkGLcall("glProgramStringARB()");
3320 
3321  if (FIXME_ON(d3d_shader))
3322  {
3323  gl_info->gl_ops.gl.p_glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
3324  if (pos != -1)
3325  {
3326  FIXME_(d3d_shader)("Program error at position %d: %s\n\n", pos,
3327  debugstr_a((const char *)gl_info->gl_ops.gl.p_glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
3328  ptr = src;
3329  while ((line = get_line(&ptr))) FIXME_(d3d_shader)(" %.*s", (int)(ptr - line), line);
3330  FIXME_(d3d_shader)("\n");
3331 
3332  return FALSE;
3333  }
3334  }
3335 
3336  if (WARN_ON(d3d_perf))
3337  {
3338  GL_EXTCALL(glGetProgramivARB(target, GL_PROGRAM_UNDER_NATIVE_LIMITS_ARB, &native));
3339  checkGLcall("glGetProgramivARB()");
3340  if (!native)
3341  WARN_(d3d_perf)("Program exceeds native resource limits.\n");
3342  }
3343 
3344  return TRUE;
3345 }
3346 
3347 static void arbfp_add_sRGB_correction(struct wined3d_string_buffer *buffer, const char *fragcolor,
3348  const char *tmp1, const char *tmp2, const char *tmp3, const char *tmp4, BOOL condcode)
3349 {
3350  /* Perform sRGB write correction. See GLX_EXT_framebuffer_sRGB */
3351 
3352  if(condcode)
3353  {
3354  /* Sigh. MOVC CC doesn't work, so use one of the temps as dummy dest */
3355  shader_addline(buffer, "SUBC %s, %s.x, srgb_consts1.x;\n", tmp1, fragcolor);
3356  /* Calculate the > 0.0031308 case */
3357  shader_addline(buffer, "POW %s.x (GE), %s.x, srgb_consts0.x;\n", fragcolor, fragcolor);
3358  shader_addline(buffer, "POW %s.y (GE), %s.y, srgb_consts0.x;\n", fragcolor, fragcolor);
3359  shader_addline(buffer, "POW %s.z (GE), %s.z, srgb_consts0.x;\n", fragcolor, fragcolor);
3360  shader_addline(buffer, "MUL %s.xyz (GE), %s, srgb_consts0.y;\n", fragcolor, fragcolor);
3361  shader_addline(buffer, "SUB %s.xyz (GE), %s, srgb_consts0.z;\n", fragcolor, fragcolor);
3362  /* Calculate the < case */
3363  shader_addline(buffer, "MUL %s.xyz (LT), srgb_consts0.w, %s;\n", fragcolor, fragcolor);
3364  }
3365  else
3366  {
3367  /* Calculate the > 0.0031308 case */
3368  shader_addline(buffer, "POW %s.x, %s.x, srgb_consts0.x;\n", tmp1, fragcolor);
3369  shader_addline(buffer, "POW %s.y, %s.y, srgb_consts0.x;\n", tmp1, fragcolor);
3370  shader_addline(buffer, "POW %s.z, %s.z, srgb_consts0.x;\n", tmp1, fragcolor);
3371  shader_addline(buffer, "MUL %s, %s, srgb_consts0.y;\n", tmp1, tmp1);
3372  shader_addline(buffer, "SUB %s, %s, srgb_consts0.z;\n", tmp1, tmp1);
3373  /* Calculate the < case */
3374  shader_addline(buffer, "MUL %s, srgb_consts0.w, %s;\n", tmp2, fragcolor);
3375  /* Get 1.0 / 0.0 masks for > 0.0031308 and < 0.0031308 */
3376  shader_addline(buffer, "SLT %s, srgb_consts1.x, %s;\n", tmp3, fragcolor);
3377  shader_addline(buffer, "SGE %s, srgb_consts1.x, %s;\n", tmp4, fragcolor);
3378  /* Store the components > 0.0031308 in the destination */
3379  shader_addline(buffer, "MUL %s.xyz, %s, %s;\n", fragcolor, tmp1, tmp3);
3380  /* Add the components that are < 0.0031308 */
3381  shader_addline(buffer, "MAD %s.xyz, %s, %s, %s;\n", fragcolor, tmp2, tmp4, fragcolor);
3382  /* Move everything into result.color at once. Nvidia hardware cannot handle partial
3383  * result.color writes(.rgb first, then .a), or handle overwriting already written
3384  * components. The assembler uses a temporary register in this case, which is usually
3385  * not allocated from one of our registers that were used earlier.
3386  */
3387  }
3388  /* [0.0;1.0] clamping. Not needed, this is done implicitly */
3389 }
3390 
3392 {
3393  const struct wined3d_shader_lconst *constant;
3394 
3395  LIST_FOR_EACH_ENTRY(constant, &shader->constantsI, struct wined3d_shader_lconst, entry)
3396  {
3397  if (constant->idx == idx)
3398  {
3399  return constant->value;
3400  }
3401  }
3402  return NULL;
3403 }
3404 
3405 static void init_ps_input(const struct wined3d_shader *shader,
3406  const struct arb_ps_compile_args *args, struct shader_arb_ctx_priv *priv)
3407 {
3408  static const char * const texcoords[8] =
3409  {
3410  "fragment.texcoord[0]", "fragment.texcoord[1]", "fragment.texcoord[2]", "fragment.texcoord[3]",
3411  "fragment.texcoord[4]", "fragment.texcoord[5]", "fragment.texcoord[6]", "fragment.texcoord[7]"
3412  };
3413  unsigned int i;
3415  const char *semantic_name;
3417 
3418  if (args->super.vp_mode == WINED3D_VP_MODE_SHADER)
3419  {
3420  /* That one is easy. The vertex shaders provide v0-v7 in
3421  * fragment.texcoord and v8 and v9 in fragment.color. */
3422  for (i = 0; i < 8; ++i)
3423  {
3424  priv->ps_input[i] = texcoords[i];
3425  }
3426  priv->ps_input[8] = "fragment.color.primary";
3427  priv->ps_input[9] = "fragment.color.secondary";
3428  return;
3429  }
3430 
3431  /* The fragment shader has to collect the varyings on its own. In any case
3432  * properly load color0 and color1. In the case of pre-transformed
3433  * vertices also load texture coordinates. Set other attributes to 0.0.
3434  *
3435  * For fixed-function this behavior is correct, according to the tests.
3436  * For pre-transformed we'd either need a replacement shader that can load
3437  * other attributes like BINORMAL, or load the texture coordinate
3438  * attribute pointers to match the fragment shader signature. */
3439  for (i = 0; i < shader->input_signature.element_count; ++i)
3440  {
3441  input = &shader->input_signature.elements[i];
3442  if (!(semantic_name = input->semantic_name))
3443  continue;
3444  semantic_idx = input->semantic_idx;
3445 
3446  if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_COLOR))
3447  {
3448  if (!semantic_idx)
3449  priv->ps_input[input->register_idx] = "fragment.color.primary";
3450  else if (semantic_idx == 1)
3451  priv->ps_input[input->register_idx] = "fragment.color.secondary";
3452  else
3453  priv->ps_input[input->register_idx] = "0.0";
3454  }
3455  else if (args->super.vp_mode == WINED3D_VP_MODE_FF)
3456  {
3457  priv->ps_input[input->register_idx] = "0.0";
3458  }
3459  else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_TEXCOORD))
3460  {
3461  if (semantic_idx < 8)
3462  priv->ps_input[input->register_idx] = texcoords[semantic_idx];
3463  else
3464  priv->ps_input[input->register_idx] = "0.0";
3465  }
3466  else if (shader_match_semantic(semantic_name, WINED3D_DECL_USAGE_FOG))
3467  {
3468  if (!semantic_idx)
3469  priv->ps_input[input->register_idx] = "fragment.fogcoord";
3470  else
3471  priv->ps_input[input->register_idx] = "0.0";
3472  }
3473  else
3474  {
3475  priv->ps_input[input->register_idx] = "0.0";
3476  }
3477 
3478  TRACE("v%u, semantic %s%u is %s\n", input->register_idx,
3479  semantic_name, semantic_idx, priv->ps_input[input->register_idx]);
3480  }
3481 }
3482 
3484  const char *fragcolor, const char *tmp)
3485 {
3486  shader_addline(buffer, "SUB %s.x, state.fog.params.z, fragment.fogcoord.x;\n", tmp);
3487  shader_addline(buffer, "MUL_SAT %s.x, %s.x, state.fog.params.w;\n", tmp, tmp);
3488  shader_addline(buffer, "LRP %s.rgb, %s.x, %s, state.fog.color;\n", fragcolor, tmp, fragcolor);
3489 }
3490 
3491 /* Context activation is done by the caller. */
3493  const struct wined3d_gl_info *gl_info, struct wined3d_string_buffer *buffer,
3494  const struct arb_ps_compile_args *args, struct arb_ps_compiled_shader *compiled)
3495 {
3496  const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
3497  GLuint retval;
3498  char fragcolor[16];
3499  DWORD next_local = 0;
3500  struct shader_arb_ctx_priv priv_ctx;
3501  BOOL dcl_td = FALSE;
3502  BOOL want_nv_prog = FALSE;
3503  struct arb_pshader_private *shader_priv = shader->backend_data;
3504  DWORD map;
3505  BOOL custom_linear_fog = FALSE;
3506 
3507  char srgbtmp[4][4];
3508  char ftoa_tmp[17];
3509  unsigned int i, found = 0;
3510 
3511  for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
3512  {
3513  if (!(map & 1)
3514  || (shader->u.ps.color0_mov && i == shader->u.ps.color0_reg)
3515  || (reg_maps->shader_version.major < 2 && !i))
3516  continue;
3517 
3518  sprintf(srgbtmp[found], "R%u", i);
3519  ++found;
3520  if (found == 4) break;
3521  }
3522 
3523  switch(found) {
3524  case 0:
3525  sprintf(srgbtmp[0], "TA");
3526  sprintf(srgbtmp[1], "TB");
3527  sprintf(srgbtmp[2], "TC");
3528  sprintf(srgbtmp[3], "TD");
3529  dcl_td = TRUE;
3530  break;
3531  case 1:
3532  sprintf(srgbtmp[1], "TA");
3533  sprintf(srgbtmp[2], "TB");
3534  sprintf(srgbtmp[3], "TC");
3535  break;
3536  case 2:
3537  sprintf(srgbtmp[2], "TA");
3538  sprintf(srgbtmp[3], "TB");
3539  break;
3540  case 3:
3541  sprintf(srgbtmp[3], "TA");
3542  break;
3543  case 4:
3544  break;
3545  }
3546 
3547  /* Create the hw ARB shader */
3548  memset(&priv_ctx, 0, sizeof(priv_ctx));
3549  priv_ctx.cur_ps_args = args;
3550  priv_ctx.compiled_fprog = compiled;
3551  priv_ctx.cur_np2fixup_info = &compiled->np2fixup_info;
3552  init_ps_input(shader, args, &priv_ctx);
3553  list_init(&priv_ctx.control_frames);
3554  priv_ctx.ps_post_process = args->super.srgb_correction;
3555 
3556  /* Avoid enabling NV_fragment_program* if we do not need it.
3557  *
3558  * Enabling GL_NV_fragment_program_option causes the driver to occupy a temporary register,
3559  * and it slows down the shader execution noticeably(about 5%). Usually our instruction emulation
3560  * is faster than what we gain from using higher native instructions. There are some things though
3561  * that cannot be emulated. In that case enable the extensions.
3562  * If the extension is enabled, instruction handlers that support both ways will use it.
3563  *
3564  * Testing shows no performance difference between OPTION NV_fragment_program2 and NV_fragment_program.
3565  * So enable the best we can get.
3566  */
3567  if(reg_maps->usesdsx || reg_maps->usesdsy || reg_maps->loop_depth > 0 || reg_maps->usestexldd ||
3568  reg_maps->usestexldl || reg_maps->usesfacing || reg_maps->usesifc || reg_maps->usescall)
3569  {
3570  want_nv_prog = TRUE;
3571  }
3572 
3573  shader_addline(buffer, "!!ARBfp1.0\n");
3574  if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM2])
3575  {
3576  shader_addline(buffer, "OPTION NV_fragment_program2;\n");
3577  priv_ctx.target_version = NV3;
3578  }
3579  else if (want_nv_prog && gl_info->supported[NV_FRAGMENT_PROGRAM_OPTION])
3580  {
3581  shader_addline(buffer, "OPTION NV_fragment_program;\n");
3582  priv_ctx.target_version = NV2;
3583  } else {
3584  if(want_nv_prog)
3585  {
3586  /* This is an error - either we're advertising the wrong shader version, or aren't enforcing some
3587  * limits properly
3588  */
3589  ERR("The shader requires instructions that are not available in plain GL_ARB_fragment_program\n");
3590  ERR("Try GLSL\n");
3591  }
3592  priv_ctx.target_version = ARB;
3593  }
3594 
3595  if (reg_maps->rt_mask > 1)
3596  {
3597  shader_addline(buffer, "OPTION ARB_draw_buffers;\n");
3598  }
3599 
3600  if (reg_maps->shader_version.major < 3)
3601  {
3602  switch (args->super.fog)
3603  {
3605  break;
3607  if (gl_info->quirks & WINED3D_QUIRK_BROKEN_ARB_FOG)
3608  {
3609  custom_linear_fog = TRUE;
3610  priv_ctx.ps_post_process = TRUE;
3611  break;
3612  }
3613  shader_addline(buffer, "OPTION ARB_fog_linear;\n");
3614  break;
3616  shader_addline(buffer, "OPTION ARB_fog_exp;\n");
3617  break;
3619  shader_addline(buffer, "OPTION ARB_fog_exp2;\n");
3620  break;
3621  }
3622  }
3623 
3624  /* For now always declare the temps. At least the Nvidia assembler optimizes completely
3625  * unused temps away(but occupies them for the whole shader if they're used once). Always
3626  * declaring them avoids tricky bookkeeping work
3627  */
3628  shader_addline(buffer, "TEMP TA;\n"); /* Used for modifiers */
3629  shader_addline(buffer, "TEMP TB;\n"); /* Used for modifiers */
3630  shader_addline(buffer, "TEMP TC;\n"); /* Used for modifiers */
3631  if(dcl_td) shader_addline(buffer, "TEMP TD;\n"); /* Used for sRGB writing */
3632  shader_addline(buffer, "PARAM coefdiv = { 0.5, 0.25, 0.125, 0.0625 };\n");
3633  shader_addline(buffer, "PARAM coefmul = { 2, 4, 8, 16 };\n");
3634  wined3d_ftoa(eps, ftoa_tmp);
3635  shader_addline(buffer, "PARAM ps_helper_const = { 0.0, 1.0, %s, 0.0 };\n", ftoa_tmp);
3636 
3637  if (reg_maps->shader_version.major < 2)
3638  {
3639  strcpy(fragcolor, "R0");
3640  }
3641  else
3642  {
3643  if (priv_ctx.ps_post_process)
3644  {
3645  if (shader->u.ps.color0_mov)
3646  {
3647  sprintf(fragcolor, "R%u", shader->u.ps.color0_reg);
3648  }
3649  else
3650  {
3651  shader_addline(buffer, "TEMP TMP_COLOR;\n");
3652  strcpy(fragcolor, "TMP_COLOR");
3653  }
3654  } else {
3655  strcpy(fragcolor, "result.color");
3656  }
3657  }
3658 
3659  if (args->super.srgb_correction)
3660  {
3661  shader_addline(buffer, "PARAM srgb_consts0 = ");
3663  shader_addline(buffer, ";\n");
3664  shader_addline(buffer, "PARAM srgb_consts1 = ");
3666  shader_addline(buffer, ";\n");
3667  }
3668 
3669  /* Base Declarations */
3670  shader_generate_arb_declarations(shader, reg_maps, buffer, gl_info, NULL, &priv_ctx);
3671 
3672  for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i)
3673  {
3674  unsigned char bump_const;
3675 
3676  if (!(map & 1)) continue;
3677 
3678  bump_const = compiled->numbumpenvmatconsts;
3679  compiled->bumpenvmatconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED;
3680  compiled->bumpenvmatconst[bump_const].texunit = i;
3681  compiled->luminanceconst[bump_const].const_num = WINED3D_CONST_NUM_UNUSED;
3682  compiled->luminanceconst[bump_const].texunit = i;
3683 
3684  /* We can fit the constants into the constant limit for sure because texbem, texbeml, bem and beml are only supported
3685  * in 1.x shaders, and GL_ARB_fragment_program has a constant limit of 24 constants. So in the worst case we're loading
3686  * 8 shader constants, 8 bump matrices and 8 luminance parameters and are perfectly fine. (No NP2 fixup on bumpmapped
3687  * textures due to conditional NP2 restrictions)
3688  *
3689  * Use local constants to load the bump env parameters, not program.env. This avoids collisions with d3d constants of
3690  * shaders in newer shader models. Since the bump env parameters have to share their space with NP2 fixup constants,
3691  * their location is shader dependent anyway and they cannot be loaded globally.
3692  */
3693  compiled->bumpenvmatconst[bump_const].const_num = next_local++;
3694  shader_addline(buffer, "PARAM bumpenvmat%d = program.local[%d];\n",
3695  i, compiled->bumpenvmatconst[bump_const].const_num);
3696  compiled->numbumpenvmatconsts = bump_const + 1;
3697 
3698  if (!(reg_maps->luminanceparams & (1u << i)))
3699  continue;
3700 
3701  compiled->luminanceconst[bump_const].const_num = next_local++;
3702  shader_addline(buffer, "PARAM luminance%d = program.local[%d];\n",
3703  i, compiled->luminanceconst[bump_const].const_num);
3704  }
3705 
3706  for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
3707  {
3708  compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED;
3709  if (reg_maps->integer_constants & (1u << i) && priv_ctx.target_version >= NV2)
3710  {
3711  const DWORD *control_values = find_loop_control_values(shader, i);
3712 
3713  if(control_values)
3714  {
3715  shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i,
3716  control_values[0], control_values[1], control_values[2]);
3717  }
3718  else
3719  {
3720  compiled->int_consts[i] = next_local;
3721  compiled->num_int_consts++;
3722  shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++);
3723  }
3724  }
3725  }
3726 
3727  if(reg_maps->vpos || reg_maps->usesdsy)
3728  {
3729  compiled->ycorrection = next_local;
3730  shader_addline(buffer, "PARAM ycorrection = program.local[%u];\n", next_local++);
3731 
3732  if(reg_maps->vpos)
3733  {
3734  shader_addline(buffer, "TEMP vpos;\n");
3735  /* ycorrection.x: Backbuffer height(onscreen) or 0(offscreen).
3736  * ycorrection.y: -1.0(onscreen), 1.0(offscreen)
3737  * ycorrection.z: 1.0
3738  * ycorrection.w: 0.0
3739  */
3740  shader_addline(buffer, "MAD vpos, fragment.position, ycorrection.zyww, ycorrection.wxww;\n");
3741  shader_addline(buffer, "FLR vpos.xy, vpos;\n");
3742  }
3743  }
3744  else
3745  {
3747  }
3748 
3749  /* Load constants to fixup NP2 texcoords if there are still free constants left:
3750  * Constants (texture dimensions) for the NP2 fixup are loaded as local program parameters. This will consume
3751  * at most 8 (MAX_FRAGMENT_SAMPLERS / 2) parameters, which is highly unlikely, since the application had to
3752  * use 16 NP2 textures at the same time. In case that we run out of constants the fixup is simply not
3753  * applied / activated. This will probably result in wrong rendering of the texture, but will save us from
3754  * shader compilation errors and the subsequent errors when drawing with this shader. */
3755  if (priv_ctx.cur_ps_args->super.np2_fixup) {
3756  unsigned char cur_fixup_sampler = 0;
3757 
3758  struct arb_ps_np2fixup_info* const fixup = priv_ctx.cur_np2fixup_info;
3759  const WORD map = priv_ctx.cur_ps_args->super.np2_fixup;
3760  const UINT max_lconsts = gl_info->limits.arb_ps_local_constants;
3761 
3762  fixup->offset = next_local;
3763  fixup->super.active = 0;
3764 
3765  for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i)
3766  {
3767  if (!(map & (1u << i)))
3768  continue;
3769 
3770  if (fixup->offset + (cur_fixup_sampler >> 1) < max_lconsts)
3771  {
3772  fixup->super.active |= (1u << i);
3773  fixup->super.idx[i] = cur_fixup_sampler++;
3774  }
3775  else
3776  {
3777  FIXME("No free constant found to load NP2 fixup data into shader. "
3778  "Sampling from this texture will probably look wrong.\n");
3779  break;
3780  }
3781  }
3782 
3783  fixup->super.num_consts = (cur_fixup_sampler + 1) >> 1;
3784  if (fixup->super.num_consts) {
3785  shader_addline(buffer, "PARAM np2fixup[%u] = { program.env[%u..%u] };\n",
3786  fixup->super.num_consts, fixup->offset, fixup->super.num_consts + fixup->offset - 1);
3787  }
3788  }
3789 
3790  if (shader_priv->clipplane_emulation != ~0U && args->clip)
3791  {
3792  shader_addline(buffer, "KIL fragment.texcoord[%u];\n", shader_priv->clipplane_emulation);
3793  }
3794 
3795  /* Base Shader Body */
3796  if (FAILED(shader_generate_code(shader, buffer, reg_maps, &priv_ctx, NULL, NULL)))
3797  return 0;
3798 
3799  if(args->super.srgb_correction) {
3800  arbfp_add_sRGB_correction(buffer, fragcolor, srgbtmp[0], srgbtmp[1], srgbtmp[2], srgbtmp[3],
3801  priv_ctx.target_version >= NV2);
3802  }
3803 
3804  if (custom_linear_fog)
3805  arbfp_add_linear_fog(buffer, fragcolor, "TA");
3806 
3807  if(strcmp(fragcolor, "result.color")) {
3808  shader_addline(buffer, "MOV result.color, %s;\n", fragcolor);
3809  }
3810  shader_addline(buffer, "END\n");
3811 
3812  /* TODO: change to resource.glObjectHandle or something like that */
3813  GL_EXTCALL(glGenProgramsARB(1, &retval));
3814 
3815  TRACE("Creating a hw pixel shader, prg=%d\n", retval);
3816  GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, retval));
3817 
3818  TRACE("Created hw pixel shader, prg=%d\n", retval);
3819  if (!shader_arb_compile(gl_info, GL_FRAGMENT_PROGRAM_ARB, buffer->buffer))
3820  return 0;
3821 
3822  return retval;
3823 }
3824 
3825 static int compare_sig(const struct wined3d_shader_signature *sig1, const struct wined3d_shader_signature *sig2)
3826 {
3827  unsigned int i;
3828  int ret;
3829 
3830  if (sig1->element_count != sig2->element_count)
3831  return sig1->element_count < sig2->element_count ? -1 : 1;
3832 
3833  for (i = 0; i < sig1->element_count; ++i)
3834  {
3835  const struct wined3d_shader_signature_element *e1, *e2;
3836 
3837  e1 = &sig1->elements[i];
3838  e2 = &sig2->elements[i];
3839 
3840  if (!e1->semantic_name || !e2->semantic_name)
3841  {
3842  /* Compare pointers, not contents. One string is NULL (element
3843  * does not exist), the other one is not NULL. */
3844  if (e1->semantic_name != e2->semantic_name)
3845  return e1->semantic_name < e2->semantic_name ? -1 : 1;
3846  continue;
3847  }
3848 
3849  if ((ret = strcmp(e1->semantic_name, e2->semantic_name)))
3850  return ret;
3851  if (e1->semantic_idx != e2->semantic_idx)
3852  return e1->semantic_idx < e2->semantic_idx ? -1 : 1;
3853  if (e1->sysval_semantic != e2->sysval_semantic)
3854  return e1->sysval_semantic < e2->sysval_semantic ? -1 : 1;
3855  if (e1->component_type != e2->component_type)
3856  return e1->component_type < e2->component_type ? -1 : 1;
3857  if (e1->register_idx != e2->register_idx)
3858  return e1->register_idx < e2->register_idx ? -1 : 1;
3859  if (e1->mask != e2->mask)
3860  return e1->mask < e2->mask ? -1 : 1;
3861  }
3862  return 0;
3863 }
3864 
3865 static void clone_sig(struct wined3d_shader_signature *new, const struct wined3d_shader_signature *sig)
3866 {
3867  unsigned int i;
3868  char *name;
3869 
3870  new->element_count = sig->element_count;
3871  new->elements = heap_calloc(new->element_count, sizeof(*new->elements));
3872  for (i = 0; i < sig->element_count; ++i)
3873  {
3874  new->elements[i] = sig->elements[i];
3875 
3876  if (!new->elements[i].semantic_name)
3877  continue;
3878 
3879  /* Clone the semantic string */
3880  name = heap_alloc(strlen(sig->elements[i].semantic_name) + 1);
3881  strcpy(name, sig->elements[i].semantic_name);
3882  new->elements[i].semantic_name = name;
3883  }
3884 }
3885 
3887 {
3888  struct wine_rb_entry *entry = wine_rb_get(&priv->signature_tree, sig);
3889  struct ps_signature *found_sig;
3890 
3891  if (entry)
3892  {
3893  found_sig = WINE_RB_ENTRY_VALUE(entry, struct ps_signature, entry);
3894  TRACE("Found existing signature %u\n", found_sig->idx);
3895  return found_sig->idx;
3896  }
3897  found_sig = heap_alloc_zero(sizeof(*found_sig));
3898  clone_sig(&found_sig->sig, sig);
3899  found_sig->idx = priv->ps_sig_number++;
3900  TRACE("New signature stored and assigned number %u\n", found_sig->idx);
3901  if(wine_rb_put(&priv->signature_tree, sig, &found_sig->entry) == -1)
3902  {
3903  ERR("Failed to insert program entry.\n");
3904  }
3905  return found_sig->idx;
3906 }
3907 
3908 static void init_output_registers(const struct wined3d_shader *shader,
3909  const struct wined3d_shader_signature *ps_input_sig,
3910  struct shader_arb_ctx_priv *priv_ctx, struct arb_vs_compiled_shader *compiled)
3911 {
3912  unsigned int i, j;
3913  static const char * const texcoords[8] =
3914  {
3915  "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]",
3916  "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]"
3917  };
3918  /* Write generic input varyings 0 to 7 to result.texcoord[], varying 8 to result.color.primary
3919  * and varying 9 to result.color.secondary
3920  */
3921  static const char * const decl_idx_to_string[MAX_REG_INPUT] =
3922  {
3923  "result.texcoord[0]", "result.texcoord[1]", "result.texcoord[2]", "result.texcoord[3]",
3924  "result.texcoord[4]", "result.texcoord[5]", "result.texcoord[6]", "result.texcoord[7]",
3925  "result.color.primary", "result.color.secondary"
3926  };
3927 
3928  if (!ps_input_sig)
3929  {
3930  TRACE("Pixel shader uses builtin varyings\n");
3931  /* Map builtins to builtins */
3932  for(i = 0; i < 8; i++)
3933  {
3934  priv_ctx->texcrd_output[i] = texcoords[i];
3935  }
3936  priv_ctx->color_output[0] = "result.color.primary";
3937  priv_ctx->color_output[1] = "result.color.secondary";
3938  priv_ctx->fog_output = "TMP_FOGCOORD";
3939 
3940  /* Map declared regs to builtins. Use "TA" to /dev/null unread output */
3941  for (i = 0; i < shader->output_signature.element_count; ++i)
3942  {
3943  const struct wined3d_shader_signature_element *output = &shader->output_signature.elements[i];
3944 
3945  if (!output->semantic_name)
3946  continue;
3947 
3949  {
3950  TRACE("o%u is TMP_OUT\n", output->register_idx);
3951  if (!output->semantic_idx)
3952  priv_ctx->vs_output[output->register_idx] = "TMP_OUT";
3953  else
3954  priv_ctx->vs_output[output->register_idx] = "TA";
3955  }
3957  {
3958  TRACE("o%u is result.pointsize\n", output->register_idx);
3959  if (!output->semantic_idx)
3960  priv_ctx->vs_output[output->register_idx] = "result.pointsize";
3961  else
3962  priv_ctx->vs_output[output->register_idx] = "TA";
3963  }
3965  {
3966  TRACE("o%u is result.color.?, idx %u\n", output->register_idx, output->semantic_idx);
3967  if (!output->semantic_idx)
3968  priv_ctx->vs_output[output->register_idx] = "result.color.primary";
3969  else if (output->semantic_idx == 1)
3970  priv_ctx->vs_output[output->register_idx] = "result.color.secondary";
3971  else priv_ctx->vs_output[output->register_idx] = "TA";
3972  }
3974  {
3975  TRACE("o%u is result.texcoord[%u]\n", output->register_idx, output->semantic_idx);
3976  if (output->semantic_idx >= 8)
3977  priv_ctx->vs_output[output->register_idx] = "TA";
3978  else
3979  priv_ctx->vs_output[output->register_idx] = texcoords[output->semantic_idx];
3980  }
3982  {
3983  TRACE("o%u is result.fogcoord\n", output->register_idx);
3984  if (output->semantic_idx > 0)
3985  priv_ctx->vs_output[output->register_idx] = "TA";
3986  else
3987  priv_ctx->vs_output[output->register_idx] = "result.fogcoord";
3988  }
3989  else
3990  {
3991  priv_ctx->vs_output[output->register_idx] = "TA";
3992  }
3993  }
3994  return;
3995  }
3996 
3997  TRACE("Pixel shader uses declared varyings\n");
3998 
3999  /* Map builtin to declared. /dev/null the results by default to the TA temp reg */
4000  for(i = 0; i < 8; i++)
4001  {
4002  priv_ctx->texcrd_output[i] = "TA";
4003  }
4004  priv_ctx->color_output[0] = "TA";
4005  priv_ctx->color_output[1] = "TA";
4006  priv_ctx->fog_output = "TA";
4007 
4008  for (i = 0; i < ps_input_sig->element_count; ++i)
4009  {
4010  const struct wined3d_shader_signature_element *input = &ps_input_sig->elements[i];
4011 
4012  if (!input->semantic_name)
4013  continue;
4014 
4015  /* If a declared input register is not written by builtin arguments, don't write to it.
4016  * GL_NV_vertex_program makes sure the input defaults to 0.0, which is correct with D3D
4017  *
4018  * Don't care about POSITION and PSIZE here - this is a builtin vertex shader, position goes
4019  * to TMP_OUT in any case
4020  */
4022  {
4023  if (input->semantic_idx < 8)
4024  priv_ctx->texcrd_output[input->semantic_idx] = decl_idx_to_string[input->register_idx];
4025  }
4027  {
4028  if (input->semantic_idx < 2)
4029  priv_ctx->color_output[input->semantic_idx] = decl_idx_to_string[input->register_idx];
4030  }
4032  {
4033  if (!input->semantic_idx)
4034  priv_ctx->fog_output = decl_idx_to_string[input->register_idx];
4035  }
4036  else
4037  {
4038  continue;
4039  }
4040 
4041  if (!strcmp(decl_idx_to_string[input->register_idx], "result.color.primary")
4042  || !strcmp(decl_idx_to_string[input->register_idx], "result.color.secondary"))
4043  {
4044  compiled->need_color_unclamp = TRUE;
4045  }
4046  }
4047 
4048  /* Map declared to declared */
4049  for (i = 0; i < shader->output_signature.element_count; ++i)
4050  {
4051  const struct wined3d_shader_signature_element *output = &shader->output_signature.elements[i];
4052 
4053  /* Write unread output to TA to throw them away */
4054  priv_ctx->vs_output[output->register_idx] = "TA";
4055 
4056  if (!output->semantic_name)
4057  continue;
4058 
4060  {
4061  priv_ctx->vs_output[output->register_idx] = "TMP_OUT";
4062  continue;
4063  }
4065  {
4066  priv_ctx->vs_output[output->register_idx] = "result.pointsize";
4067  continue;
4068  }
4069 
4070  for (j = 0; j < ps_input_sig->element_count; ++j)
4071  {
4072  const struct wined3d_shader_signature_element *input = &ps_input_sig->elements[j];
4073 
4074  if (!input->semantic_name)
4075  continue;
4076 
4077  if (!strcmp(input->semantic_name, output->semantic_name)
4078  && input->semantic_idx == output->semantic_idx)
4079  {
4080  priv_ctx->vs_output[output->register_idx] = decl_idx_to_string[input->register_idx];
4081 
4082  if (!strcmp(priv_ctx->vs_output[output->register_idx], "result.color.primary")
4083  || !strcmp(priv_ctx->vs_output[output->register_idx], "result.color.secondary"))
4084  {
4085  compiled->need_color_unclamp = TRUE;
4086  }
4087  }
4088  }
4089  }
4090 }
4091 
4092 /* Context activation is done by the caller. */
4094  const struct wined3d_gl_info *gl_info, struct wined3d_string_buffer *buffer,
4095  const struct arb_vs_compile_args *args, struct arb_vs_compiled_shader *compiled,
4096  const struct wined3d_shader_signature *ps_input_sig)
4097 {
4098  const struct arb_vshader_private *shader_data = shader->backend_data;
4099  const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
4100  struct shader_arb_priv *priv = shader->device->shader_priv;
4101  GLuint ret;
4102  DWORD next_local = 0;
4103  struct shader_arb_ctx_priv priv_ctx;
4104  unsigned int i;
4105 
4106  memset(&priv_ctx, 0, sizeof(priv_ctx));
4107  priv_ctx.cur_vs_args = args;
4108  list_init(&priv_ctx.control_frames);
4109  init_output_registers(shader, ps_input_sig, &priv_ctx, compiled);
4110 
4111  /* Create the hw ARB shader */
4112  shader_addline(buffer, "!!ARBvp1.0\n");
4113 
4114  /* Always enable the NV extension if available. Unlike fragment shaders, there is no
4115  * mesurable performance penalty, and we can always make use of it for clipplanes.
4116  */
4117  if (gl_info->supported[NV_VERTEX_PROGRAM3])
4118  {
4119  shader_addline(buffer, "OPTION NV_vertex_program3;\n");
4120  priv_ctx.target_version = NV3;
4121  shader_addline(buffer, "ADDRESS aL;\n");
4122  }
4123  else if (gl_info->supported[NV_VERTEX_PROGRAM2_OPTION])
4124  {
4125  shader_addline(buffer, "OPTION NV_vertex_program2;\n");
4126  priv_ctx.target_version = NV2;
4127  shader_addline(buffer, "ADDRESS aL;\n");
4128  } else {
4129  priv_ctx.target_version = ARB;
4130  }
4131 
4132  shader_addline(buffer, "TEMP TMP_OUT;\n");
4133  if (reg_maps->fog)
4134  shader_addline(buffer, "TEMP TMP_FOGCOORD;\n");
4135  if (need_helper_const(shader_data, reg_maps, gl_info))
4136  {
4137  char ftoa_tmp[17];
4138  wined3d_ftoa(eps, ftoa_tmp);
4139  shader_addline(buffer, "PARAM helper_const = { 0.0, 1.0, 2.0, %s};\n", ftoa_tmp);
4140  }
4141  if (need_rel_addr_const(shader_data, reg_maps, gl_info))
4142  {
4143  shader_addline(buffer, "PARAM rel_addr_const = { 0.5, %d.0, 0.0, 0.0 };\n", shader_data->rel_offset);
4144  shader_addline(buffer, "TEMP A0_SHADOW;\n");
4145  }
4146 
4147  shader_addline(buffer, "TEMP TA;\n");
4148  shader_addline(buffer, "TEMP TB;\n");
4149 
4150  /* Base Declarations */
4151  shader_generate_arb_declarations(shader, reg_maps, buffer, gl_info,
4152  &priv_ctx.vs_clipplanes, &priv_ctx);
4153 
4154  for (i = 0; i < WINED3D_MAX_CONSTS_I; ++i)
4155  {
4156  compiled->int_consts[i] = WINED3D_CONST_NUM_UNUSED;
4157  if (reg_maps->integer_constants & (1u << i) && priv_ctx.target_version >= NV2)
4158  {
4159  const DWORD *control_values = find_loop_control_values(shader, i);
4160 
4161  if(control_values)
4162  {
4163  shader_addline(buffer, "PARAM I%u = {%u, %u, %u, -1};\n", i,
4164  control_values[0], control_values[1], control_values[2]);
4165  }
4166  else
4167  {
4168  compiled->int_consts[i] = next_local;
4169  compiled->num_int_consts++;
4170  shader_addline(buffer, "PARAM I%u = program.local[%u];\n", i, next_local++);
4171  }
4172  }
4173  }
4174 
4175  /* We need a constant to fixup the final position */
4176  shader_addline(buffer, "PARAM posFixup = program.local[%u];\n", next_local);
4177  compiled->pos_fixup = next_local++;
4178 
4179  /* Initialize output parameters. GL_ARB_vertex_program does not require special initialization values
4180  * for output parameters. D3D in theory does not do that either, but some applications depend on a
4181  * proper initialization of the secondary color, and programs using the fixed function pipeline without
4182  * a replacement shader depend on the texcoord.w being set properly.
4183  *
4184  * GL_NV_vertex_program defines that all output values are initialized to {0.0, 0.0, 0.0, 1.0}. This
4185  * assertion is in effect even when using GL_ARB_vertex_program without any NV specific additions. So
4186  * skip this if NV_vertex_program is supported. Otherwise, initialize the secondary color. For the tex-
4187  * coords, we have a flag in the opengl caps. Many cards do not require the texcoord being set, and
4188  * this can eat a number of instructions, so skip it unless this cap is set as well
4189  */
4190  if (!gl_info->supported[NV_VERTEX_PROGRAM])
4191  {
4192  const char *color_init = arb_get_helper_value(WINED3D_SHADER_TYPE_VERTEX, ARB_0001);
4193  shader_addline(buffer, "MOV result.color.secondary, %s;\n", color_init);
4194 
4195  if (gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W && !priv->ffp_proj_control)
4196  {
4197  int i;
4199  for(i = 0; i < MAX_REG_TEXCRD; i++)
4200  {
4201  if (reg_maps->u.texcoord_mask[i] && reg_maps->u.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL)
4202  shader_addline(buffer, "MOV result.texcoord[%u].w, %s\n", i, one);
4203  }
4204  }
4205  }
4206 
4207  /* The shader starts with the main function */
4208  priv_ctx.in_main_func = TRUE;
4209  /* Base Shader Body */
4210  if (FAILED(shader_generate_code(shader, buffer, reg_maps, &priv_ctx, NULL, NULL)))
4211  return -1;
4212 
4213  if (!priv_ctx.footer_written) vshader_add_footer(&priv_ctx,
4214  shader_data, args, reg_maps, gl_info, buffer);
4215 
4216  shader_addline(buffer, "END\n");
4217 
4218  /* TODO: change to resource.glObjectHandle or something like that */
4219  GL_EXTCALL(glGenProgramsARB(1, &ret));
4220 
4221  TRACE("Creating a hw vertex shader, prg=%d\n", ret);
4222  GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB, ret));
4223 
4224  TRACE("Created hw vertex shader, prg=%d\n", ret);
4225  if (!shader_arb_compile(gl_info, GL_VERTEX_PROGRAM_ARB, buffer->buffer))
4226  return -1;
4227 
4228  return ret;
4229 }
4230 
4231 /* Context activation is done by the caller. */
4233  const struct arb_ps_compile_args *args)
4234 {
4235  struct wined3d_device *device = shader->device;
4236  const struct wined3d_gl_info *gl_info = &device->adapter->gl_info;
4237  const struct wined3d_d3d_info *d3d_info = &device->adapter->d3d_info;
4238  UINT i;
4239  DWORD new_size;
4240  struct arb_ps_compiled_shader *new_array;
4241  struct wined3d_string_buffer buffer;
4242  struct arb_pshader_private *shader_data;
4243  GLuint ret;
4244 
4245  if (!shader->backend_data)
4246  {
4247  struct shader_arb_priv *priv = device->shader_priv;
4248 
4249  shader->backend_data = heap_alloc_zero(sizeof(*shader_data));
4250  shader_data = shader->backend_data;
4251  shader_data->clamp_consts = shader->reg_maps.shader_version.major == 1;
4252 
4253  if (shader->reg_maps.shader_version.major < 3)
4254  shader_data->input_signature_idx = ~0U;
4255  else
4256  shader_data->input_signature_idx = find_input_signature(priv, &shader->input_signature);
4257 
4258  TRACE("Shader got assigned input signature index %u\n", shader_data->input_signature_idx);
4259 
4260  if (!d3d_info->vs_clipping)
4262  d3d_info->limits.ffp_blend_stages - 1);
4263  else
4264  shader_data->clipplane_emulation = ~0U;
4265  }
4266  shader_data = shader->backend_data;
4267 
4268  /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
4269  * so a linear search is more performant than a hashmap or a binary search
4270  * (cache coherency etc)
4271  */
4272  for (i = 0; i < shader_data->num_gl_shaders; ++i)
4273  {
4274  if (!memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args)))
4275  return &shader_data->gl_shaders[i];
4276  }
4277 
4278  TRACE("No matching GL shader found, compiling a new shader\n");
4279  if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
4280  if (shader_data->num_gl_shaders)
4281  {
4282  new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
4283  new_array = HeapReAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, shader_data->gl_shaders,
4284  new_size * sizeof(*shader_data->gl_shaders));
4285  }
4286  else
4287  {
4288  new_array = heap_alloc_zero(sizeof(*shader_data->gl_shaders));
4289  new_size = 1;
4290  }
4291 
4292  if(!new_array) {
4293  ERR("Out of memory\n");
4294  return 0;
4295  }
4296  shader_data->gl_shaders = new_array;
4297  shader_data->shader_array_size = new_size;
4298  }
4299 
4300  shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
4301 
4302  pixelshader_update_resource_types(shader, args->super.tex_types);
4303 
4304  if (!string_buffer_init(&buffer))
4305  {
4306  ERR("Failed to initialize shader buffer.\n");
4307  return 0;
4308  }
4309 
4310  ret = shader_arb_generate_pshader(shader, gl_info, &buffer, args,
4311  &shader_data->gl_shaders[shader_data->num_gl_shaders]);
4312  string_buffer_free(&buffer);
4313  shader_data->gl_shaders[shader_data->num_gl_shaders].prgId = ret;
4314 
4315  return &shader_data->gl_shaders[shader_data->num_gl_shaders++];
4316 }
4317 
4318 static inline BOOL vs_args_equal(const struct arb_vs_compile_args *stored, const struct arb_vs_compile_args *new,
4319  const DWORD use_map, BOOL skip_int) {
4320  if((stored->super.swizzle_map & use_map) != new->super.swizzle_map) return FALSE;
4321  if(stored->super.clip_enabled != new->super.clip_enabled) return FALSE;
4322  if(stored->super.fog_src != new->super.fog_src) return FALSE;
4323  if(stored->clip.boolclip_compare != new->clip.boolclip_compare) return FALSE;
4324  if(stored->ps_signature != new->ps_signature) return FALSE;
4325  if(stored->vertex.samplers_compare != new->vertex.samplers_compare) return FALSE;
4326  if(skip_int) return TRUE;
4327 
4328  return !memcmp(stored->loop_ctrl, new->loop_ctrl, sizeof(stored->loop_ctrl));
4329 }
4330 
4332  const struct wined3d_gl_info *gl_info, DWORD use_map, const struct arb_vs_compile_args *args,
4333  const struct wined3d_shader_signature *ps_input_sig)
4334 {
4335  UINT i;
4336  DWORD new_size;
4337  struct arb_vs_compiled_shader *new_array;
4338  struct wined3d_string_buffer buffer;
4339  struct arb_vshader_private *shader_data;
4340  GLuint ret;
4341 
4342  if (!shader->backend_data)
4343  {
4344  const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
4345 
4346  shader->backend_data = heap_alloc_zero(sizeof(*shader_data));
4347  shader_data = shader->backend_data;
4348 
4349  if ((gl_info->quirks & WINED3D_QUIRK_ARB_VS_OFFSET_LIMIT)
4350  && reg_maps->min_rel_offset <= reg_maps->max_rel_offset)
4351  {
4352  if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 127)
4353  {
4354  FIXME("The difference between the minimum and maximum relative offset is > 127.\n");
4355  FIXME("Which this OpenGL implementation does not support. Try using GLSL.\n");
4356  FIXME("Min: %u, Max: %u.\n", reg_maps->min_rel_offset, reg_maps->max_rel_offset);
4357  }
4358  else if (reg_maps->max_rel_offset - reg_maps->min_rel_offset > 63)
4359  shader_data->rel_offset = reg_maps->min_rel_offset + 63;
4360  else if (reg_maps->max_rel_offset > 63)
4361  shader_data->rel_offset = reg_maps->min_rel_offset;
4362  }
4363  }
4364  shader_data = shader->backend_data;
4365 
4366  /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
4367  * so a linear search is more performant than a hashmap or a binary search
4368  * (cache coherency etc)
4369  */
4370  for(i = 0; i < shader_data->num_gl_shaders; i++) {
4371  if (vs_args_equal(&shader_data->gl_shaders[i].args, args,
4372  use_map, gl_info->supported[NV_VERTEX_PROGRAM2_OPTION]))
4373  {
4374  return &shader_data->gl_shaders[i];
4375  }
4376  }
4377 
4378  TRACE("No matching GL shader found, compiling a new shader\n");
4379