ReactOS  0.4.14-dev-49-gfb4591c
glsl_shader.c
Go to the documentation of this file.
1 /*
2  * GLSL pixel and vertex shader implementation
3  *
4  * Copyright 2006 Jason Green
5  * Copyright 2006-2007 Henri Verbeet
6  * Copyright 2007-2009, 2013 Stefan Dösinger for CodeWeavers
7  * Copyright 2009-2011 Henri Verbeet for CodeWeavers
8  *
9  * This library is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * This library is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with this library; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22  */
23 
24 /*
25  * D3D shader asm has swizzles on source parameters, and write masks for
26  * destination parameters. GLSL uses swizzles for both. The result of this is
27  * that for example "mov dst.xw, src.zyxw" becomes "dst.xw = src.zw" in GLSL.
28  * Ie, to generate a proper GLSL source swizzle, we need to take the D3D write
29  * mask for the destination parameter into account.
30  */
31 
32 #include "config.h"
33 #include "wine/port.h"
34 
35 #include <limits.h>
36 #include <stdio.h>
37 #ifdef HAVE_FLOAT_H
38 # include <float.h>
39 #endif
40 
41 #include "wined3d_private.h"
42 
43 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
46 
47 #define WINED3D_GLSL_SAMPLE_PROJECTED 0x01
48 #define WINED3D_GLSL_SAMPLE_LOD 0x02
49 #define WINED3D_GLSL_SAMPLE_GRAD 0x04
50 #define WINED3D_GLSL_SAMPLE_LOAD 0x08
51 #define WINED3D_GLSL_SAMPLE_OFFSET 0x10
52 
53 static const struct
54 {
55  unsigned int coord_size;
56  unsigned int resinfo_size;
57  const char *type_part;
58 }
60 {
61  {0, 0, ""}, /* WINED3D_SHADER_RESOURCE_NONE */
62  {1, 1, "Buffer"}, /* WINED3D_SHADER_RESOURCE_BUFFER */
63  {1, 1, "1D"}, /* WINED3D_SHADER_RESOURCE_TEXTURE_1D */
64  {2, 2, "2D"}, /* WINED3D_SHADER_RESOURCE_TEXTURE_2D */
65  {2, 2, ""}, /* WINED3D_SHADER_RESOURCE_TEXTURE_2DMS */
66  {3, 3, "3D"}, /* WINED3D_SHADER_RESOURCE_TEXTURE_3D */
67  {3, 2, "Cube"}, /* WINED3D_SHADER_RESOURCE_TEXTURE_CUBE */
68  {2, 2, ""}, /* WINED3D_SHADER_RESOURCE_TEXTURE_1DARRAY */
69  {3, 3, "2DArray"}, /* WINED3D_SHADER_RESOURCE_TEXTURE_2DARRAY */
70  {3, 3, ""}, /* WINED3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */
71  {4, 3, ""}, /* WINED3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */
72 };
73 
75 {
76  char reg_name[150];
77  char mask_str[6];
78 };
79 
81 {
82  char reg_name[150];
83  char param_str[200];
84 };
85 
87 {
89  unsigned int coord_mask;
90  unsigned int deriv_mask;
93  unsigned int offset_size;
95 };
96 
98 {
102 };
103 
105 {
106  unsigned int idx;
107  unsigned int version;
108 };
109 
111 {
114  unsigned int *positions;
115  unsigned int size;
116 };
117 
118 /* GLSL shader private data */
120 {
126  unsigned char *stack;
128 
135 };
136 
138 {
146 
157  struct
158  {
179 };
180 
182 {
185 };
186 
188 {
191 
193 };
194 
196 {
199 
201 };
202 
204 {
225 };
226 
228 {
231 };
232 
233 /* Struct to maintain data about a linked GLSL program */
235 {
245  unsigned int constant_version;
247  DWORD clip_distance_mask : 8; /* MAX_CLIP_DISTANCES, 8 */
249 };
250 
252 {
259 };
260 
267 };
268 
270 {
274 };
275 
277 {
281 };
282 
284 {
287 };
288 
290 {
292 };
293 
295 {
298 };
299 
301 {
304 };
305 
307 {
309 };
310 
312 {
313  union
314  {
321  } gl_shaders;
323 };
324 
326 {
330 };
331 
333 {
337 };
338 
340 {
342  const struct wined3d_gl_info *gl_info;
343 };
344 
345 static void shader_glsl_generate_shader_epilogue(const struct wined3d_shader_context *ctx);
346 
347 static const char *debug_gl_shader_type(GLenum type)
348 {
349  switch (type)
350  {
351 #define WINED3D_TO_STR(u) case u: return #u
358 #undef WINED3D_TO_STR
359  default:
360  return wine_dbg_sprintf("UNKNOWN(%#x)", type);
361  }
362 }
363 
365 {
366  switch (type)
367  {
369  return "vs";
370 
372  return "hs";
373 
375  return "ds";
376 
378  return "gs";
379 
381  return "ps";
382 
384  return "cs";
385 
386  default:
387  FIXME("Unhandled shader type %#x.\n", type);
388  return "unknown";
389  }
390 }
391 
392 static unsigned int shader_glsl_get_version(const struct wined3d_gl_info *gl_info)
393 {
394  if (gl_info->glsl_version >= MAKEDWORD_VERSION(4, 40))
395  return 440;
396  else if (gl_info->glsl_version >= MAKEDWORD_VERSION(1, 50))
397  return 150;
398  else if (gl_info->glsl_version >= MAKEDWORD_VERSION(1, 30))
399  return 130;
400  else
401  return 120;
402 }
403 
405  const struct wined3d_gl_info *gl_info)
406 {
407  shader_addline(buffer, "#version %u\n", shader_glsl_get_version(gl_info));
408 }
409 
411 {
412  char str[4][17];
413 
414  wined3d_ftoa(values[0], str[0]);
415  wined3d_ftoa(values[1], str[1]);
416  wined3d_ftoa(values[2], str[2]);
417  wined3d_ftoa(values[3], str[3]);
418  shader_addline(buffer, "vec4(%s, %s, %s, %s)", str[0], str[1], str[2], str[3]);
419 }
420 
422  const int *values, unsigned int size)
423 {
424  int i;
425 
426  if (!size || size > 4)
427  {
428  ERR("Invalid vector size %u.\n", size);
429  return;
430  }
431 
432  if (size > 1)
433  shader_addline(buffer, "ivec%u(", size);
434 
435  for (i = 0; i < size; ++i)
436  shader_addline(buffer, i ? ", %#x" : "%#x", values[i]);
437 
438  if (size > 1)
439  shader_addline(buffer, ")");
440 }
441 
442 static const char *get_info_log_line(const char **ptr)
443 {
444  const char *p, *q;
445 
446  p = *ptr;
447  if (!(q = strstr(p, "\n")))
448  {
449  if (!*p) return NULL;
450  *ptr += strlen(p);
451  return p;
452  }
453  *ptr = q + 1;
454 
455  return p;
456 }
457 
458 /* Context activation is done by the caller. */
459 void print_glsl_info_log(const struct wined3d_gl_info *gl_info, GLuint id, BOOL program)
460 {
461  int length = 0;
462  char *log;
463 
464  if (!WARN_ON(d3d_shader) && !FIXME_ON(d3d_shader))
465  return;
466 
467  if (program)
468  GL_EXTCALL(glGetProgramiv(id, GL_INFO_LOG_LENGTH, &length));
469  else
470  GL_EXTCALL(glGetShaderiv(id, GL_INFO_LOG_LENGTH, &length));
471 
472  /* A size of 1 is just a null-terminated string, so the log should be bigger than
473  * that if there are errors. */
474  if (length > 1)
475  {
476  const char *ptr, *line;
477 
478  log = heap_alloc(length);
479  /* The info log is supposed to be zero-terminated, but at least some
480  * versions of fglrx don't terminate the string properly. The reported
481  * length does include the terminator, so explicitly set it to zero
482  * here. */
483  log[length - 1] = 0;
484  if (program)
485  GL_EXTCALL(glGetProgramInfoLog(id, length, NULL, log));
486  else
487  GL_EXTCALL(glGetShaderInfoLog(id, length, NULL, log));
488 
489  ptr = log;
490  if (gl_info->quirks & WINED3D_QUIRK_INFO_LOG_SPAM)
491  {
492  WARN("Info log received from GLSL shader #%u:\n", id);
493  while ((line = get_info_log_line(&ptr))) WARN(" %.*s", (int)(ptr - line), line);
494  }
495  else
496  {
497  FIXME("Info log received from GLSL shader #%u:\n", id);
498  while ((line = get_info_log_line(&ptr))) FIXME(" %.*s", (int)(ptr - line), line);
499  }
500  heap_free(log);
501  }
502 }
503 
504 /* Context activation is done by the caller. */
505 static void shader_glsl_compile(const struct wined3d_gl_info *gl_info, GLuint shader, const char *src)
506 {
507  const char *ptr, *line;
508 
509  TRACE("Compiling shader object %u.\n", shader);
510 
511  if (TRACE_ON(d3d_shader))
512  {
513  ptr = src;
514  while ((line = get_info_log_line(&ptr))) TRACE_(d3d_shader)(" %.*s", (int)(ptr - line), line);
515  }
516 
517  GL_EXTCALL(glShaderSource(shader, 1, &src, NULL));
518  checkGLcall("glShaderSource");
519  GL_EXTCALL(glCompileShader(shader));
520  checkGLcall("glCompileShader");
521  print_glsl_info_log(gl_info, shader, FALSE);
522 }
523 
524 /* Context activation is done by the caller. */
526 {
527  GLint i, shader_count, source_size = -1;
528  GLuint *shaders;
529  char *source = NULL;
530 
531  GL_EXTCALL(glGetProgramiv(program, GL_ATTACHED_SHADERS, &shader_count));
532  if (!(shaders = heap_calloc(shader_count, sizeof(*shaders))))
533  {
534  ERR("Failed to allocate shader array memory.\n");
535  return;
536  }
537 
538  GL_EXTCALL(glGetAttachedShaders(program, shader_count, NULL, shaders));
539  for (i = 0; i < shader_count; ++i)
540  {
541  const char *ptr, *line;
542  GLint tmp;
543 
544  GL_EXTCALL(glGetShaderiv(shaders[i], GL_SHADER_SOURCE_LENGTH, &tmp));
545 
546  if (source_size < tmp)
547  {
548  heap_free(source);
549 
550  if (!(source = heap_alloc_zero(tmp)))
551  {
552  ERR("Failed to allocate %d bytes for shader source.\n", tmp);
554  return;
555  }
556  source_size = tmp;
557  }
558 
559  FIXME("Shader %u:\n", shaders[i]);
560  GL_EXTCALL(glGetShaderiv(shaders[i], GL_SHADER_TYPE, &tmp));
561  FIXME(" GL_SHADER_TYPE: %s.\n", debug_gl_shader_type(tmp));
562  GL_EXTCALL(glGetShaderiv(shaders[i], GL_COMPILE_STATUS, &tmp));
563  FIXME(" GL_COMPILE_STATUS: %d.\n", tmp);
564  FIXME("\n");
565 
566  ptr = source;
567  GL_EXTCALL(glGetShaderSource(shaders[i], source_size, NULL, source));
568  while ((line = get_info_log_line(&ptr))) FIXME(" %.*s", (int)(ptr - line), line);
569  FIXME("\n");
570  }
571 
572  heap_free(source);
574 }
575 
576 /* Context activation is done by the caller. */
578 {
579  GLint tmp;
580 
581  if (!TRACE_ON(d3d_shader) && !FIXME_ON(d3d_shader))
582  return;
583 
584  GL_EXTCALL(glGetProgramiv(program, GL_LINK_STATUS, &tmp));
585  if (!tmp)
586  {
587  FIXME("Program %u link status invalid.\n", program);
589  }
590 
591  print_glsl_info_log(gl_info, program, TRUE);
592 }
593 
595 {
596  /* Layout qualifiers were introduced in GLSL 1.40. The Nvidia Legacy GPU
597  * driver (series 340.xx) doesn't parse layout qualifiers in older GLSL
598  * versions. */
599  return shader_glsl_get_version(gl_info) >= 140;
600 }
601 
603 {
605 }
606 
608  struct shader_glsl_priv *priv, GLuint program_id,
609  const struct wined3d_shader_reg_maps *reg_maps)
610 {
611  const char *prefix = shader_glsl_get_prefix(reg_maps->shader_version.type);
612  struct wined3d_string_buffer *name;
613  unsigned int i, base, count;
614  GLuint block_idx;
615 
617  return;
618 
621  for (i = 0; i < count; ++i)
622  {
623  if (!reg_maps->cb_sizes[i])
624  continue;
625 
626  string_buffer_sprintf(name, "block_%s_cb%u", prefix, i);
627  block_idx = GL_EXTCALL(glGetUniformBlockIndex(program_id, name->buffer));
628  GL_EXTCALL(glUniformBlockBinding(program_id, block_idx, base + i));
629  }
630  checkGLcall("glUniformBlockBinding");
632 }
633 
634 /* Context activation is done by the caller. */
635 static void shader_glsl_load_samplers_range(const struct wined3d_gl_info *gl_info,
636  struct shader_glsl_priv *priv, GLuint program_id, const char *prefix,
637  unsigned int base, unsigned int count, const DWORD *tex_unit_map)
638 {
639  struct wined3d_string_buffer *sampler_name = string_buffer_get(&priv->string_buffers);
640  unsigned int i, mapped_unit;
641  GLint name_loc;
642 
643  for (i = 0; i < count; ++i)
644  {
645  string_buffer_sprintf(sampler_name, "%s_sampler%u", prefix, i);
646  name_loc = GL_EXTCALL(glGetUniformLocation(program_id, sampler_name->buffer));
647  if (name_loc == -1)
648  continue;
649 
650  mapped_unit = tex_unit_map ? tex_unit_map[base + i] : base + i;
651  if (mapped_unit == WINED3D_UNMAPPED_STAGE || mapped_unit >= gl_info->limits.combined_samplers)
652  {
653  ERR("Trying to load sampler %s on unsupported unit %u.\n", sampler_name->buffer, mapped_unit);
654  continue;
655  }
656 
657  TRACE("Loading sampler %s on unit %u.\n", sampler_name->buffer, mapped_unit);
658  GL_EXTCALL(glUniform1i(name_loc, mapped_unit));
659  }
660  checkGLcall("Load sampler bindings");
661  string_buffer_release(&priv->string_buffers, sampler_name);
662 }
663 
664 static unsigned int shader_glsl_map_tex_unit(const struct wined3d_context *context,
665  const struct wined3d_shader_version *shader_version, unsigned int sampler_idx)
666 {
667  const DWORD *tex_unit_map;
668  unsigned int base, count;
669 
670  tex_unit_map = context_get_tex_unit_mapping(context, shader_version, &base, &count);
671  if (sampler_idx >= count)
672  return WINED3D_UNMAPPED_STAGE;
673  if (!tex_unit_map)
674  return base + sampler_idx;
675  return tex_unit_map[base + sampler_idx];
676 }
677 
679  const struct wined3d_context *context, const struct wined3d_shader_version *shader_version,
680  unsigned int sampler_idx)
681 {
682  unsigned int mapped_unit = shader_glsl_map_tex_unit(context, shader_version, sampler_idx);
683  if (mapped_unit != WINED3D_UNMAPPED_STAGE)
684  shader_addline(buffer, "layout(binding = %u)\n", mapped_unit);
685  else
686  ERR("Unmapped sampler %u.\n", sampler_idx);
687 }
688 
689 /* Context activation is done by the caller. */
691  struct shader_glsl_priv *priv, GLuint program_id, const struct wined3d_shader_reg_maps *reg_maps)
692 {
693  const struct wined3d_gl_info *gl_info = context->gl_info;
694  const struct wined3d_shader_version *shader_version;
695  const DWORD *tex_unit_map;
696  unsigned int base, count;
697  const char *prefix;
698 
700  return;
701 
702  shader_version = reg_maps ? &reg_maps->shader_version : NULL;
703  prefix = shader_glsl_get_prefix(shader_version ? shader_version->type : WINED3D_SHADER_TYPE_PIXEL);
704  tex_unit_map = context_get_tex_unit_mapping(context, shader_version, &base, &count);
705  shader_glsl_load_samplers_range(gl_info, priv, program_id, prefix, base, count, tex_unit_map);
706 }
707 
708 static void shader_glsl_load_icb(const struct wined3d_gl_info *gl_info, struct shader_glsl_priv *priv,
709  GLuint program_id, const struct wined3d_shader_reg_maps *reg_maps)
710 {
711  const struct wined3d_shader_immediate_constant_buffer *icb = reg_maps->icb;
712 
713  if (icb)
714  {
715  struct wined3d_string_buffer *icb_name = string_buffer_get(&priv->string_buffers);
716  const char *prefix = shader_glsl_get_prefix(reg_maps->shader_version.type);
717  GLint icb_location;
718 
719  string_buffer_sprintf(icb_name, "%s_icb", prefix);
720  icb_location = GL_EXTCALL(glGetUniformLocation(program_id, icb_name->buffer));
721  GL_EXTCALL(glUniform4fv(icb_location, icb->vec4_count, (const GLfloat *)icb->data));
722  checkGLcall("Load immediate constant buffer");
723 
724  string_buffer_release(&priv->string_buffers, icb_name);
725  }
726 }
727 
728 /* Context activation is done by the caller. */
729 static void shader_glsl_load_images(const struct wined3d_gl_info *gl_info, struct shader_glsl_priv *priv,
730  GLuint program_id, const struct wined3d_shader_reg_maps *reg_maps)
731 {
732  const char *prefix = shader_glsl_get_prefix(reg_maps->shader_version.type);
733  struct wined3d_string_buffer *name;
734  GLint location;
735  unsigned int i;
736 
738  return;
739 
741  for (i = 0; i < MAX_UNORDERED_ACCESS_VIEWS; ++i)
742  {
743  if (!reg_maps->uav_resource_info[i].type)
744  continue;
745 
746  string_buffer_sprintf(name, "%s_image%u", prefix, i);
747  location = GL_EXTCALL(glGetUniformLocation(program_id, name->buffer));
748  if (location == -1)
749  continue;
750 
751  TRACE("Loading image %s on unit %u.\n", name->buffer, i);
752  GL_EXTCALL(glUniform1i(location, i));
753  }
754  checkGLcall("Load image bindings");
756 }
757 
758 /* Context activation is done by the caller. */
760  struct shader_glsl_priv *priv, GLuint program_id, const struct wined3d_shader *shader)
761 {
762  const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
763 
764  shader_glsl_init_uniform_block_bindings(context->gl_info, priv, program_id, reg_maps);
765  shader_glsl_load_icb(context->gl_info, priv, program_id, reg_maps);
766  /* Texture unit mapping is set up to be the same each time the shader
767  * program is used so we can hardcode the sampler uniform values. */
768  shader_glsl_load_samplers(context, priv, program_id, reg_maps);
769 }
770 
771 static void append_transform_feedback_varying(const char **varyings, unsigned int *varying_count,
772  char **strings, unsigned int *strings_length, struct wined3d_string_buffer *buffer)
773 {
774  if (varyings && *strings)
775  {
776  char *ptr = *strings;
777 
778  varyings[*varying_count] = ptr;
779 
780  memcpy(ptr, buffer->buffer, buffer->content_size + 1);
781  ptr += buffer->content_size + 1;
782 
783  *strings = ptr;
784  }
785 
786  *strings_length += buffer->content_size + 1;
787  ++(*varying_count);
788 }
789 
791  unsigned int *varying_count, char **strings, unsigned int *strings_length,
792  struct wined3d_string_buffer *buffer, unsigned int component_count)
793 {
794  unsigned int j;
795 
796  for (j = 0; j < component_count / 4; ++j)
797  {
798  string_buffer_sprintf(buffer, "gl_SkipComponents4");
799  append_transform_feedback_varying(varyings, varying_count, strings, strings_length, buffer);
800  }
801  if (component_count % 4)
802  {
803  string_buffer_sprintf(buffer, "gl_SkipComponents%u", component_count % 4);
804  append_transform_feedback_varying(varyings, varying_count, strings, strings_length, buffer);
805  }
806 }
807 
809  struct wined3d_string_buffer *buffer, const char **varyings, unsigned int *varying_count,
810  char *strings, unsigned int *strings_length, GLenum buffer_mode)
811 {
812  unsigned int i, buffer_idx, count, length, highest_output_slot, stride;
813 
814  count = length = 0;
815  highest_output_slot = 0;
816  for (buffer_idx = 0; buffer_idx < WINED3D_MAX_STREAM_OUTPUT_BUFFERS; ++buffer_idx)
817  {
818  stride = 0;
819 
820  for (i = 0; i < so_desc->element_count; ++i)
821  {
822  const struct wined3d_stream_output_element *e = &so_desc->elements[i];
823 
824  highest_output_slot = max(highest_output_slot, e->output_slot);
825  if (e->output_slot != buffer_idx)
826  continue;
827 
828  if (e->stream_idx)
829  {
830  FIXME("Unhandled stream %u.\n", e->stream_idx);
831  continue;
832  }
833 
834  stride += e->component_count;
835 
836  if (e->register_idx == WINED3D_STREAM_OUTPUT_GAP)
837  {
839  &strings, &length, buffer, e->component_count);
840  continue;
841  }
842 
843  if (e->component_idx || e->component_count != 4)
844  {
846  {
847  FIXME("Unsupported component range %u-%u.\n", e->component_idx, e->component_count);
849  &strings, &length, buffer, e->component_count);
850  continue;
851  }
852 
853  string_buffer_sprintf(buffer, "shader_in_out.reg%u_%u_%u",
854  e->register_idx, e->component_idx, e->component_idx + e->component_count - 1);
856  }
857  else
858  {
859  string_buffer_sprintf(buffer, "shader_in_out.reg%u", e->register_idx);
861  }
862  }
863 
864  if (buffer_idx < so_desc->buffer_stride_count
865  && stride < so_desc->buffer_strides[buffer_idx] / 4)
866  {
867  unsigned int component_count = so_desc->buffer_strides[buffer_idx] / 4 - stride;
870  }
871 
872  if (highest_output_slot <= buffer_idx)
873  break;
874 
875  if (buffer_mode == GL_INTERLEAVED_ATTRIBS)
876  {
877  string_buffer_sprintf(buffer, "gl_NextBuffer");
879  }
880  }
881 
882  if (varying_count)
883  *varying_count = count;
884  if (strings_length)
885  *strings_length = length;
886 }
887 
889  struct shader_glsl_priv *priv, GLuint program_id, const struct wined3d_shader *shader)
890 {
891  const struct wined3d_stream_output_desc *so_desc = &shader->u.gs.so_desc;
892  const struct wined3d_gl_info *gl_info = context->gl_info;
894  unsigned int i, count, length;
895  const char **varyings;
896  char *strings;
897  GLenum mode;
898 
899  if (!so_desc->element_count)
900  return;
901 
902  if (gl_info->supported[ARB_TRANSFORM_FEEDBACK3])
903  {
905  }
906  else
907  {
908  unsigned int element_count[WINED3D_MAX_STREAM_OUTPUT_BUFFERS] = {0};
909 
910  for (i = 0; i < so_desc->element_count; ++i)
911  {
912  if (so_desc->elements[i].register_idx == WINED3D_STREAM_OUTPUT_GAP)
913  {
914  FIXME("ARB_transform_feedback3 is needed for stream output gaps.\n");
915  return;
916  }
917  ++element_count[so_desc->elements[i].output_slot];
918  }
919 
920  if (element_count[0] == so_desc->element_count)
921  {
923  }
924  else
925  {
927  for (i = 0; i < ARRAY_SIZE(element_count); ++i)
928  {
929  if (element_count[i] != 1)
930  break;
931  }
932  for (; i < ARRAY_SIZE(element_count); ++i)
933  {
934  if (element_count[i])
935  {
936  FIXME("Only single element per buffer is allowed in separate mode.\n");
937  return;
938  }
939  }
940  }
941  }
942 
944 
946 
947  if (!(varyings = heap_calloc(count, sizeof(*varyings))))
948  {
949  ERR("Out of memory.\n");
951  return;
952  }
953  if (!(strings = heap_calloc(length, sizeof(*strings))))
954  {
955  ERR("Out of memory.\n");
958  return;
959  }
960 
962  GL_EXTCALL(glTransformFeedbackVaryings(program_id, count, varyings, mode));
963  checkGLcall("glTransformFeedbackVaryings");
964 
968 }
969 
970 /* Context activation is done by the caller. */
971 static inline void walk_constant_heap(const struct wined3d_gl_info *gl_info, const struct wined3d_vec4 *constants,
972  const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
973 {
974  unsigned int start = ~0U, end = 0;
975  int stack_idx = 0;
976  unsigned int heap_idx = 1;
977  unsigned int idx;
978 
979  if (heap->entries[heap_idx].version <= version) return;
980 
981  idx = heap->entries[heap_idx].idx;
982  if (constant_locations[idx] != -1)
983  start = end = idx;
984  stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
985 
986  while (stack_idx >= 0)
987  {
988  /* Note that we fall through to the next case statement. */
989  switch(stack[stack_idx])
990  {
992  {
993  unsigned int left_idx = heap_idx << 1;
994  if (left_idx < heap->size && heap->entries[left_idx].version > version)
995  {
996  heap_idx = left_idx;
997  idx = heap->entries[heap_idx].idx;
998  if (constant_locations[idx] != -1)
999  {
1000  if (start > idx)
1001  start = idx;
1002  if (end < idx)
1003  end = idx;
1004  }
1005 
1006  stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
1007  stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
1008  break;
1009  }
1010  }
1011 
1013  {
1014  unsigned int right_idx = (heap_idx << 1) + 1;
1015  if (right_idx < heap->size && heap->entries[right_idx].version > version)
1016  {
1017  heap_idx = right_idx;
1018  idx = heap->entries[heap_idx].idx;
1019  if (constant_locations[idx] != -1)
1020  {
1021  if (start > idx)
1022  start = idx;
1023  if (end < idx)
1024  end = idx;
1025  }
1026 
1027  stack[stack_idx++] = HEAP_NODE_POP;
1028  stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
1029  break;
1030  }
1031  }
1032 
1033  case HEAP_NODE_POP:
1034  heap_idx >>= 1;
1035  --stack_idx;
1036  break;
1037  }
1038  }
1039  if (start <= end)
1040  GL_EXTCALL(glUniform4fv(constant_locations[start], end - start + 1, &constants[start].x));
1041  checkGLcall("walk_constant_heap()");
1042 }
1043 
1044 /* Context activation is done by the caller. */
1045 static inline void apply_clamped_constant(const struct wined3d_gl_info *gl_info,
1046  GLint location, const struct wined3d_vec4 *data)
1047 {
1048  GLfloat clamped_constant[4];
1049 
1050  if (location == -1) return;
1051 
1052  clamped_constant[0] = data->x < -1.0f ? -1.0f : data->x > 1.0f ? 1.0f : data->x;
1053  clamped_constant[1] = data->y < -1.0f ? -1.0f : data->y > 1.0f ? 1.0f : data->y;
1054  clamped_constant[2] = data->z < -1.0f ? -1.0f : data->z > 1.0f ? 1.0f : data->z;
1055  clamped_constant[3] = data->w < -1.0f ? -1.0f : data->w > 1.0f ? 1.0f : data->w;
1056 
1057  GL_EXTCALL(glUniform4fv(location, 1, clamped_constant));
1058 }
1059 
1060 /* Context activation is done by the caller. */
1061 static inline void walk_constant_heap_clamped(const struct wined3d_gl_info *gl_info,
1062  const struct wined3d_vec4 *constants, const GLint *constant_locations,
1063  const struct constant_heap *heap, unsigned char *stack, DWORD version)
1064 {
1065  int stack_idx = 0;
1066  unsigned int heap_idx = 1;
1067  unsigned int idx;
1068 
1069  if (heap->entries[heap_idx].version <= version) return;
1070 
1071  idx = heap->entries[heap_idx].idx;
1072  apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx]);
1073  stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
1074 
1075  while (stack_idx >= 0)
1076  {
1077  /* Note that we fall through to the next case statement. */
1078  switch(stack[stack_idx])
1079  {
1081  {
1082  unsigned int left_idx = heap_idx << 1;
1083  if (left_idx < heap->size && heap->entries[left_idx].version > version)
1084  {
1085  heap_idx = left_idx;
1086  idx = heap->entries[heap_idx].idx;
1087  apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx]);
1088 
1089  stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
1090  stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
1091  break;
1092  }
1093  }
1094 
1096  {
1097  unsigned int right_idx = (heap_idx << 1) + 1;
1098  if (right_idx < heap->size && heap->entries[right_idx].version > version)
1099  {
1100  heap_idx = right_idx;
1101  idx = heap->entries[heap_idx].idx;
1102  apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx]);
1103 
1104  stack[stack_idx++] = HEAP_NODE_POP;
1105  stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
1106  break;
1107  }
1108  }
1109 
1110  case HEAP_NODE_POP:
1111  heap_idx >>= 1;
1112  --stack_idx;
1113  break;
1114  }
1115  }
1116  checkGLcall("walk_constant_heap_clamped()");
1117 }
1118 
1119 /* Context activation is done by the caller. */
1120 static void shader_glsl_load_constants_f(const struct wined3d_shader *shader, const struct wined3d_gl_info *gl_info,
1121  const struct wined3d_vec4 *constants, const GLint *constant_locations, const struct constant_heap *heap,
1122  unsigned char *stack, unsigned int version)
1123 {
1124  const struct wined3d_shader_lconst *lconst;
1125 
1126  /* 1.X pshaders have the constants clamped to [-1;1] implicitly. */
1127  if (shader->reg_maps.shader_version.major == 1
1128  && shader->reg_maps.shader_version.type == WINED3D_SHADER_TYPE_PIXEL)
1129  walk_constant_heap_clamped(gl_info, constants, constant_locations, heap, stack, version);
1130  else
1131  walk_constant_heap(gl_info, constants, constant_locations, heap, stack, version);
1132 
1133  if (!shader->load_local_constsF)
1134  {
1135  TRACE("No need to load local float constants for this shader.\n");
1136  return;
1137  }
1138 
1139  /* Immediate constants are clamped to [-1;1] at shader creation time if needed */
1140  LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
1141  {
1142  GL_EXTCALL(glUniform4fv(constant_locations[lconst->idx], 1, (const GLfloat *)lconst->value));
1143  }
1144  checkGLcall("glUniform4fv()");
1145 }
1146 
1147 /* Context activation is done by the caller. */
1148 static void shader_glsl_load_constants_i(const struct wined3d_shader *shader, const struct wined3d_gl_info *gl_info,
1149  const struct wined3d_ivec4 *constants, const GLint locations[WINED3D_MAX_CONSTS_I], WORD constants_set)
1150 {
1151  unsigned int i;
1152  struct list* ptr;
1153 
1154  for (i = 0; constants_set; constants_set >>= 1, ++i)
1155  {
1156  if (!(constants_set & 1)) continue;
1157 
1158  /* We found this uniform name in the program - go ahead and send the data */
1159  GL_EXTCALL(glUniform4iv(locations[i], 1, &constants[i].x));
1160  }
1161 
1162  /* Load immediate constants */
1163  ptr = list_head(&shader->constantsI);
1164  while (ptr)
1165  {
1166  const struct wined3d_shader_lconst *lconst = LIST_ENTRY(ptr, const struct wined3d_shader_lconst, entry);
1167  unsigned int idx = lconst->idx;
1168  const GLint *values = (const GLint *)lconst->value;
1169 
1170  /* We found this uniform name in the program - go ahead and send the data */
1171  GL_EXTCALL(glUniform4iv(locations[idx], 1, values));
1172  ptr = list_next(&shader->constantsI, ptr);
1173  }
1174  checkGLcall("glUniform4iv()");
1175 }
1176 
1177 /* Context activation is done by the caller. */
1178 static void shader_glsl_load_constantsB(const struct wined3d_shader *shader, const struct wined3d_gl_info *gl_info,
1179  const GLint locations[WINED3D_MAX_CONSTS_B], const BOOL *constants, WORD constants_set)
1180 {
1181  unsigned int i;
1182  struct list* ptr;
1183 
1184  for (i = 0; constants_set; constants_set >>= 1, ++i)
1185  {
1186  if (!(constants_set & 1)) continue;
1187 
1188  GL_EXTCALL(glUniform1iv(locations[i], 1, &constants[i]));
1189  }
1190 
1191  /* Load immediate constants */
1192  ptr = list_head(&shader->constantsB);
1193  while (ptr)
1194  {
1195  const struct wined3d_shader_lconst *lconst = LIST_ENTRY(ptr, const struct wined3d_shader_lconst, entry);
1196  unsigned int idx = lconst->idx;
1197  const GLint *values = (const GLint *)lconst->value;
1198 
1199  GL_EXTCALL(glUniform1iv(locations[idx], 1, values));
1200  ptr = list_next(&shader->constantsB, ptr);
1201  }
1202  checkGLcall("glUniform1iv()");
1203 }
1204 
1206 {
1207  WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry)->constant_version = 0;
1208 }
1209 
1210 /* Context activation is done by the caller (state handler). */
1212  const struct wined3d_gl_info *gl_info, const struct wined3d_state *state)
1213 {
1214  struct
1215  {
1216  float sx, sy;
1217  }
1218  np2fixup_constants[MAX_FRAGMENT_SAMPLERS];
1219  UINT fixup = ps->np2_fixup_info->active;
1220  UINT i;
1221 
1222  for (i = 0; fixup; fixup >>= 1, ++i)
1223  {
1224  const struct wined3d_texture *tex = state->textures[i];
1225  unsigned char idx = ps->np2_fixup_info->idx[i];
1226 
1227  if (!tex)
1228  {
1229  ERR("Nonexistent texture is flagged for NP2 texcoord fixup.\n");
1230  continue;
1231  }
1232 
1233  np2fixup_constants[idx].sx = tex->pow2_matrix[0];
1234  np2fixup_constants[idx].sy = tex->pow2_matrix[5];
1235  }
1236 
1237  GL_EXTCALL(glUniform4fv(ps->np2_fixup_location, ps->np2_fixup_info->num_consts, &np2fixup_constants[0].sx));
1238 }
1239 
1241  const struct wined3d_state *state, unsigned int tex, struct glsl_shader_prog_link *prog)
1242 {
1243  const struct wined3d_gl_info *gl_info = context->gl_info;
1244  struct wined3d_matrix mat;
1245 
1246  if (tex >= MAX_TEXTURES)
1247  return;
1248  if (prog->vs.texture_matrix_location[tex] == -1)
1249  return;
1250 
1252  GL_EXTCALL(glUniformMatrix4fv(prog->vs.texture_matrix_location[tex], 1, FALSE, &mat._11));
1253  checkGLcall("glUniformMatrix4fv");
1254 }
1255 
1257  const struct wined3d_state *state, struct glsl_shader_prog_link *prog)
1258 {
1259  const struct wined3d_gl_info *gl_info = context->gl_info;
1260 
1261  if (state->render_states[WINED3D_RS_SPECULARENABLE])
1262  {
1263  GL_EXTCALL(glUniform4fv(prog->vs.material_specular_location, 1, &state->material.specular.r));
1264  GL_EXTCALL(glUniform1f(prog->vs.material_shininess_location, state->material.power));
1265  }
1266  else
1267  {
1268  static const float black[] = {0.0f, 0.0f, 0.0f, 0.0f};
1269 
1270  GL_EXTCALL(glUniform4fv(prog->vs.material_specular_location, 1, black));
1271  }
1272  GL_EXTCALL(glUniform4fv(prog->vs.material_ambient_location, 1, &state->material.ambient.r));
1273  GL_EXTCALL(glUniform4fv(prog->vs.material_diffuse_location, 1, &state->material.diffuse.r));
1274  GL_EXTCALL(glUniform4fv(prog->vs.material_emissive_location, 1, &state->material.emissive.r));
1275  checkGLcall("setting FFP material uniforms");
1276 }
1277 
1279  const struct wined3d_state *state, struct glsl_shader_prog_link *prog)
1280 {
1281  const struct wined3d_gl_info *gl_info = context->gl_info;
1282  struct wined3d_color color;
1283 
1285  GL_EXTCALL(glUniform3fv(prog->vs.light_ambient_location, 1, &color.r));
1286  checkGLcall("glUniform3fv");
1287 }
1288 
1289 static void multiply_vector_matrix(struct wined3d_vec4 *dest, const struct wined3d_vec4 *src1,
1290  const struct wined3d_matrix *src2)
1291 {
1292  struct wined3d_vec4 temp;
1293 
1294  temp.x = (src1->x * src2->_11) + (src1->y * src2->_21) + (src1->z * src2->_31) + (src1->w * src2->_41);
1295  temp.y = (src1->x * src2->_12) + (src1->y * src2->_22) + (src1->z * src2->_32) + (src1->w * src2->_42);
1296  temp.z = (src1->x * src2->_13) + (src1->y * src2->_23) + (src1->z * src2->_33) + (src1->w * src2->_43);
1297  temp.w = (src1->x * src2->_14) + (src1->y * src2->_24) + (src1->z * src2->_34) + (src1->w * src2->_44);
1298 
1299  *dest = temp;
1300 }
1301 
1303  const struct wined3d_state *state, unsigned int light, const struct wined3d_light_info *light_info,
1304  struct glsl_shader_prog_link *prog)
1305 {
1306  const struct wined3d_matrix *view = &state->transforms[WINED3D_TS_VIEW];
1307  const struct wined3d_gl_info *gl_info = context->gl_info;
1308  struct wined3d_vec4 vec4;
1309 
1310  GL_EXTCALL(glUniform4fv(prog->vs.light_location[light].diffuse, 1, &light_info->OriginalParms.diffuse.r));
1311  GL_EXTCALL(glUniform4fv(prog->vs.light_location[light].specular, 1, &light_info->OriginalParms.specular.r));
1312  GL_EXTCALL(glUniform4fv(prog->vs.light_location[light].ambient, 1, &light_info->OriginalParms.ambient.r));
1313 
1314  switch (light_info->OriginalParms.type)
1315  {
1316  case WINED3D_LIGHT_POINT:
1317  multiply_vector_matrix(&vec4, &light_info->position, view);
1318  GL_EXTCALL(glUniform4fv(prog->vs.light_location[light].position, 1, &vec4.x));
1319  GL_EXTCALL(glUniform1f(prog->vs.light_location[light].range, light_info->OriginalParms.range));
1320  GL_EXTCALL(glUniform1f(prog->vs.light_location[light].c_att, light_info->OriginalParms.attenuation0));
1321  GL_EXTCALL(glUniform1f(prog->vs.light_location[light].l_att, light_info->OriginalParms.attenuation1));
1322  GL_EXTCALL(glUniform1f(prog->vs.light_location[light].q_att, light_info->OriginalParms.attenuation2));
1323  break;
1324 
1325  case WINED3D_LIGHT_SPOT:
1326  multiply_vector_matrix(&vec4, &light_info->position, view);
1327  GL_EXTCALL(glUniform4fv(prog->vs.light_location[light].position, 1, &vec4.x));
1328 
1329  multiply_vector_matrix(&vec4, &light_info->direction, view);
1330  GL_EXTCALL(glUniform3fv(prog->vs.light_location[light].direction, 1, &vec4.x));
1331 
1332  GL_EXTCALL(glUniform1f(prog->vs.light_location[light].range, light_info->OriginalParms.range));
1333  GL_EXTCALL(glUniform1f(prog->vs.light_location[light].falloff, light_info->OriginalParms.falloff));
1334  GL_EXTCALL(glUniform1f(prog->vs.light_location[light].c_att, light_info->OriginalParms.attenuation0));
1335  GL_EXTCALL(glUniform1f(prog->vs.light_location[light].l_att, light_info->OriginalParms.attenuation1));
1336  GL_EXTCALL(glUniform1f(prog->vs.light_location[light].q_att, light_info->OriginalParms.attenuation2));
1337  GL_EXTCALL(glUniform1f(prog->vs.light_location[light].cos_htheta, cosf(light_info->OriginalParms.theta / 2.0f)));
1338  GL_EXTCALL(glUniform1f(prog->vs.light_location[light].cos_hphi, cosf(light_info->OriginalParms.phi / 2.0f)));
1339  break;
1340 
1342  multiply_vector_matrix(&vec4, &light_info->direction, view);
1343  GL_EXTCALL(glUniform3fv(prog->vs.light_location[light].direction, 1, &vec4.x));
1344  break;
1345 
1347  multiply_vector_matrix(&vec4, &light_info->position, view);
1348  GL_EXTCALL(glUniform4fv(prog->vs.light_location[light].position, 1, &vec4.x));
1349  break;
1350 
1351  default:
1352  FIXME("Unrecognized light type %#x.\n", light_info->OriginalParms.type);
1353  }
1354  checkGLcall("setting FFP lights uniforms");
1355 }
1356 
1358  const struct wined3d_state *state, struct glsl_shader_prog_link *prog)
1359 {
1360  const struct wined3d_gl_info *gl_info = context->gl_info;
1361  float min, max;
1362  float size, att[3];
1363 
1365 
1366  GL_EXTCALL(glUniform1f(prog->vs.pointsize_min_location, min));
1367  checkGLcall("glUniform1f");
1368  GL_EXTCALL(glUniform1f(prog->vs.pointsize_max_location, max));
1369  checkGLcall("glUniform1f");
1370 
1371  get_pointsize(context, state, &size, att);
1372 
1373  GL_EXTCALL(glUniform1f(prog->vs.pointsize_location, size));
1374  checkGLcall("glUniform1f");
1375  GL_EXTCALL(glUniform1f(prog->vs.pointsize_c_att_location, att[0]));
1376  checkGLcall("glUniform1f");
1377  GL_EXTCALL(glUniform1f(prog->vs.pointsize_l_att_location, att[1]));
1378  checkGLcall("glUniform1f");
1379  GL_EXTCALL(glUniform1f(prog->vs.pointsize_q_att_location, att[2]));
1380  checkGLcall("glUniform1f");
1381 }
1382 
1384  const struct wined3d_state *state, struct glsl_shader_prog_link *prog)
1385 {
1386  const struct wined3d_gl_info *gl_info = context->gl_info;
1387  struct wined3d_color color;
1388  float start, end, scale;
1389  union
1390  {
1391  DWORD d;
1392  float f;
1393  } tmpvalue;
1394 
1396  GL_EXTCALL(glUniform4fv(prog->ps.fog_color_location, 1, &color.r));
1397  tmpvalue.d = state->render_states[WINED3D_RS_FOGDENSITY];
1398  GL_EXTCALL(glUniform1f(prog->ps.fog_density_location, tmpvalue.f));
1400  scale = 1.0f / (end - start);
1401  GL_EXTCALL(glUniform1f(prog->ps.fog_end_location, end));
1402  GL_EXTCALL(glUniform1f(prog->ps.fog_scale_location, scale));
1403  checkGLcall("fog emulation uniforms");
1404 }
1405 
1407  const struct wined3d_state *state, unsigned int index, struct glsl_shader_prog_link *prog)
1408 {
1409  const struct wined3d_gl_info *gl_info = context->gl_info;
1410  struct wined3d_matrix matrix;
1411  struct wined3d_vec4 plane;
1412 
1413  plane = state->clip_planes[index];
1414 
1415  /* Clip planes are affected by the view transform in d3d for FFP draws. */
1416  if (!use_vs(state))
1417  {
1418  invert_matrix(&matrix, &state->transforms[WINED3D_TS_VIEW]);
1421  }
1422 
1423  GL_EXTCALL(glUniform4fv(prog->vs.clip_planes_location + index, 1, &plane.x));
1424 }
1425 
1426 /* Context activation is done by the caller (state handler). */
1428  const struct wined3d_gl_info *gl_info, const struct wined3d_state *state)
1429 {
1430  struct wined3d_color float_key[2];
1431  const struct wined3d_texture *texture = state->textures[0];
1432 
1433  wined3d_format_get_float_color_key(texture->resource.format, &texture->async.src_blt_color_key, float_key);
1434  GL_EXTCALL(glUniform4fv(ps->color_key_location, 2, &float_key[0].r));
1435 }
1436 
1437 /* Context activation is done by the caller. */
1438 static void get_normal_matrix(struct wined3d_context *context, struct wined3d_matrix *mat, float *normal)
1439 {
1440  int i, j;
1441 
1442  if (context->d3d_info->wined3d_creation_flags & WINED3D_LEGACY_FFP_LIGHTING)
1444  else
1445  invert_matrix(mat, mat);
1446  /* Tests show that singular modelview matrices are used unchanged as normal
1447  * matrices on D3D3 and older. There seems to be no clearly consistent
1448  * behavior on newer D3D versions so always follow older ddraw behavior. */
1449  for (i = 0; i < 3; ++i)
1450  for (j = 0; j < 3; ++j)
1451  normal[i * 3 + j] = (&mat->_11)[j * 4 + i];
1452 }
1453 
1454 /* Context activation is done by the caller (state handler). */
1455 static void shader_glsl_load_constants(void *shader_priv, struct wined3d_context *context,
1456  const struct wined3d_state *state)
1457 {
1458  const struct glsl_context_data *ctx_data = context->shader_backend_data;
1459  const struct wined3d_shader *vshader = state->shader[WINED3D_SHADER_TYPE_VERTEX];
1460  const struct wined3d_shader *pshader = state->shader[WINED3D_SHADER_TYPE_PIXEL];
1461  const struct wined3d_gl_info *gl_info = context->gl_info;
1462  struct shader_glsl_priv *priv = shader_priv;
1463  float position_fixup[4];
1464  float normal[3 * 3];
1465  DWORD update_mask;
1466 
1467  struct glsl_shader_prog_link *prog = ctx_data->glsl_program;
1469  int i;
1470 
1471  if (!prog) {
1472  /* No GLSL program set - nothing to do. */
1473  return;
1474  }
1475  constant_version = prog->constant_version;
1476  update_mask = context->constant_update_mask & prog->constant_update_mask;
1477 
1478  if (update_mask & WINED3D_SHADER_CONST_VS_F)
1479  shader_glsl_load_constants_f(vshader, gl_info, state->vs_consts_f,
1480  prog->vs.uniform_f_locations, &priv->vconst_heap, priv->stack, constant_version);
1481 
1482  if (update_mask & WINED3D_SHADER_CONST_VS_I)
1483  shader_glsl_load_constants_i(vshader, gl_info, state->vs_consts_i,
1484  prog->vs.uniform_i_locations, vshader->reg_maps.integer_constants);
1485 
1486  if (update_mask & WINED3D_SHADER_CONST_VS_B)
1487  shader_glsl_load_constantsB(vshader, gl_info, prog->vs.uniform_b_locations, state->vs_consts_b,
1488  vshader->reg_maps.boolean_constants);
1489 
1490  if (update_mask & WINED3D_SHADER_CONST_VS_CLIP_PLANES)
1491  {
1492  for (i = 0; i < gl_info->limits.user_clip_distances; ++i)
1494  }
1495 
1496  if (update_mask & WINED3D_SHADER_CONST_VS_POINTSIZE)
1498 
1499  if (update_mask & WINED3D_SHADER_CONST_POS_FIXUP)
1500  {
1501  shader_get_position_fixup(context, state, position_fixup);
1502  if (state->shader[WINED3D_SHADER_TYPE_GEOMETRY])
1503  GL_EXTCALL(glUniform4fv(prog->gs.pos_fixup_location, 1, position_fixup));
1504  else if (state->shader[WINED3D_SHADER_TYPE_DOMAIN])
1505  GL_EXTCALL(glUniform4fv(prog->ds.pos_fixup_location, 1, position_fixup));
1506  else
1507  GL_EXTCALL(glUniform4fv(prog->vs.pos_fixup_location, 1, position_fixup));
1508  checkGLcall("glUniform4fv");
1509  }
1510 
1511  if (update_mask & WINED3D_SHADER_CONST_FFP_MODELVIEW)
1512  {
1513  struct wined3d_matrix mat;
1514 
1516  GL_EXTCALL(glUniformMatrix4fv(prog->vs.modelview_matrix_location[0], 1, FALSE, &mat._11));
1517  checkGLcall("glUniformMatrix4fv");
1518 
1520  GL_EXTCALL(glUniformMatrix3fv(prog->vs.normal_matrix_location[0], 1, FALSE, normal));
1521  checkGLcall("glUniformMatrix3fv");
1522  }
1523 
1524  if (update_mask & WINED3D_SHADER_CONST_FFP_VERTEXBLEND)
1525  {
1526  struct wined3d_matrix mat;
1527 
1528  for (i = 1; i < MAX_VERTEX_INDEX_BLENDS; ++i)
1529  {
1530  if (prog->vs.modelview_matrix_location[i] == -1)
1531  break;
1532  if (!(update_mask & WINED3D_SHADER_CONST_FFP_VERTEXBLEND_INDEX(i)))
1533  continue;
1534 
1536  GL_EXTCALL(glUniformMatrix4fv(prog->vs.modelview_matrix_location[i], 1, FALSE, &mat._11));
1537  checkGLcall("glUniformMatrix4fv");
1538 
1540  GL_EXTCALL(glUniformMatrix3fv(prog->vs.normal_matrix_location[i], 1, FALSE, normal));
1541  checkGLcall("glUniformMatrix3fv");
1542  }
1543  }
1544 
1545  if (update_mask & WINED3D_SHADER_CONST_FFP_PROJ)
1546  {
1547  struct wined3d_matrix projection;
1548 
1549  get_projection_matrix(context, state, &projection);
1550  GL_EXTCALL(glUniformMatrix4fv(prog->vs.projection_matrix_location, 1, FALSE, &projection._11));
1551  checkGLcall("glUniformMatrix4fv");
1552  }
1553 
1554  if (update_mask & WINED3D_SHADER_CONST_FFP_TEXMATRIX)
1555  {
1556  for (i = 0; i < MAX_TEXTURES; ++i)
1558  }
1559 
1560  if (update_mask & WINED3D_SHADER_CONST_FFP_MATERIAL)
1562 
1563  if (update_mask & WINED3D_SHADER_CONST_FFP_LIGHTS)
1564  {
1565  unsigned int point_idx, spot_idx, directional_idx, parallel_point_idx;
1566  DWORD point_count = 0;
1567  DWORD spot_count = 0;
1568  DWORD directional_count = 0;
1569  DWORD parallel_point_count = 0;
1570 
1571  for (i = 0; i < MAX_ACTIVE_LIGHTS; ++i)
1572  {
1573  if (!state->lights[i])
1574  continue;
1575 
1576  switch (state->lights[i]->OriginalParms.type)
1577  {
1578  case WINED3D_LIGHT_POINT:
1579  ++point_count;
1580  break;
1581  case WINED3D_LIGHT_SPOT:
1582  ++spot_count;
1583  break;
1585  ++directional_count;
1586  break;
1588  ++parallel_point_count;
1589  break;
1590  default:
1591  FIXME("Unhandled light type %#x.\n", state->lights[i]->OriginalParms.type);
1592  break;
1593  }
1594  }
1595  point_idx = 0;
1596  spot_idx = point_idx + point_count;
1597  directional_idx = spot_idx + spot_count;
1598  parallel_point_idx = directional_idx + directional_count;
1599 
1601  for (i = 0; i < MAX_ACTIVE_LIGHTS; ++i)
1602  {
1603  const struct wined3d_light_info *light_info = state->lights[i];
1604  unsigned int idx;
1605 
1606  if (!light_info)
1607  continue;
1608 
1609  switch (light_info->OriginalParms.type)
1610  {
1611  case WINED3D_LIGHT_POINT:
1612  idx = point_idx++;
1613  break;
1614  case WINED3D_LIGHT_SPOT:
1615  idx = spot_idx++;
1616  break;
1618  idx = directional_idx++;
1619  break;
1621  idx = parallel_point_idx++;
1622  break;
1623  default:
1624  FIXME("Unhandled light type %#x.\n", light_info->OriginalParms.type);
1625  continue;
1626  }
1628  }
1629  }
1630 
1631  if (update_mask & WINED3D_SHADER_CONST_PS_F)
1632  shader_glsl_load_constants_f(pshader, gl_info, state->ps_consts_f,
1633  prog->ps.uniform_f_locations, &priv->pconst_heap, priv->stack, constant_version);
1634 
1635  if (update_mask & WINED3D_SHADER_CONST_PS_I)
1636  shader_glsl_load_constants_i(pshader, gl_info, state->ps_consts_i,
1637  prog->ps.uniform_i_locations, pshader->reg_maps.integer_constants);
1638 
1639  if (update_mask & WINED3D_SHADER_CONST_PS_B)
1640  shader_glsl_load_constantsB(pshader, gl_info, prog->ps.uniform_b_locations, state->ps_consts_b,
1641  pshader->reg_maps.boolean_constants);
1642 
1643  if (update_mask & WINED3D_SHADER_CONST_PS_BUMP_ENV)
1644  {
1645  for (i = 0; i < MAX_TEXTURES; ++i)
1646  {
1647  if (prog->ps.bumpenv_mat_location[i] == -1)
1648  continue;
1649 
1650  GL_EXTCALL(glUniformMatrix2fv(prog->ps.bumpenv_mat_location[i], 1, 0,
1651  (const GLfloat *)&state->texture_states[i][WINED3D_TSS_BUMPENV_MAT00]));
1652 
1653  if (prog->ps.bumpenv_lum_scale_location[i] != -1)
1654  {
1655  GL_EXTCALL(glUniform1fv(prog->ps.bumpenv_lum_scale_location[i], 1,
1656  (const GLfloat *)&state->texture_states[i][WINED3D_TSS_BUMPENV_LSCALE]));
1657  GL_EXTCALL(glUniform1fv(prog->ps.bumpenv_lum_offset_location[i], 1,
1658  (const GLfloat *)&state->texture_states[i][WINED3D_TSS_BUMPENV_LOFFSET]));
1659  }
1660  }
1661 
1662  checkGLcall("bump env uniforms");
1663  }
1664 
1665  if (update_mask & WINED3D_SHADER_CONST_PS_Y_CORR)
1666  {
1667  const struct wined3d_vec4 correction_params =
1668  {
1669  /* Position is relative to the framebuffer, not the viewport. */
1670  context->render_offscreen ? 0.0f : (float)state->fb->render_targets[0]->height,
1671  context->render_offscreen ? 1.0f : -1.0f,
1672  0.0f,
1673  0.0f,
1674  };
1675 
1676  GL_EXTCALL(glUniform4fv(prog->ps.ycorrection_location, 1, &correction_params.x));
1677  }
1678 
1679  if (update_mask & WINED3D_SHADER_CONST_PS_NP2_FIXUP)
1681  if (update_mask & WINED3D_SHADER_CONST_FFP_COLOR_KEY)
1683 
1684  if (update_mask & WINED3D_SHADER_CONST_FFP_PS)
1685  {
1686  struct wined3d_color color;
1687 
1688  if (prog->ps.tex_factor_location != -1)
1689  {
1691  GL_EXTCALL(glUniform4fv(prog->ps.tex_factor_location, 1, &color.r));
1692  }
1693 
1694  if (state->render_states[WINED3D_RS_SPECULARENABLE])
1695  GL_EXTCALL(glUniform4f(prog->ps.specular_enable_location, 1.0f, 1.0f, 1.0f, 0.0f));
1696  else
1697  GL_EXTCALL(glUniform4f(prog->ps.specular_enable_location, 0.0f, 0.0f, 0.0f, 0.0f));
1698 
1699  for (i = 0; i < MAX_TEXTURES; ++i)
1700  {
1701  if (prog->ps.tss_constant_location[i] == -1)
1702  continue;
1703 
1705  GL_EXTCALL(glUniform4fv(prog->ps.tss_constant_location[i], 1, &color.r));
1706  }
1707 
1708  checkGLcall("fixed function uniforms");
1709  }
1710 
1711  if (update_mask & WINED3D_SHADER_CONST_PS_FOG)
1713 
1714  if (update_mask & WINED3D_SHADER_CONST_PS_ALPHA_TEST)
1715  {
1716  float ref = state->render_states[WINED3D_RS_ALPHAREF] / 255.0f;
1717 
1718  GL_EXTCALL(glUniform1f(prog->ps.alpha_test_ref_location, ref));
1719  checkGLcall("alpha test emulation uniform");
1720  }
1721 
1722  if (priv->next_constant_version == UINT_MAX)
1723  {
1724  TRACE("Max constant version reached, resetting to 0.\n");
1726  priv->next_constant_version = 1;
1727  }
1728  else
1729  {
1730  prog->constant_version = priv->next_constant_version++;
1731  }
1732 }
1733 
1734 static void update_heap_entry(struct constant_heap *heap, unsigned int idx, DWORD new_version)
1735 {
1736  struct constant_entry *entries = heap->entries;
1737  unsigned int *positions = heap->positions;
1738  unsigned int heap_idx, parent_idx;
1739 
1740  if (!heap->contained[idx])
1741  {
1742  heap_idx = heap->size++;
1743  heap->contained[idx] = TRUE;
1744  }
1745  else
1746  {
1747  heap_idx = positions[idx];
1748  }
1749 
1750  while (heap_idx > 1)
1751  {
1752  parent_idx = heap_idx >> 1;
1753 
1754  if (new_version <= entries[parent_idx].version) break;
1755 
1756  entries[heap_idx] = entries[parent_idx];
1757  positions[entries[parent_idx].idx] = heap_idx;
1758  heap_idx = parent_idx;
1759  }
1760 
1761  entries[heap_idx].version = new_version;
1762  entries[heap_idx].idx = idx;
1763  positions[idx] = heap_idx;
1764 }
1765 
1767 {
1768  struct shader_glsl_priv *priv = device->shader_priv;
1769  struct constant_heap *heap = &priv->vconst_heap;
1770  UINT i;
1771 
1772  for (i = start; i < count + start; ++i)
1773  {
1775  }
1776 }
1777 
1779 {
1780  struct shader_glsl_priv *priv = device->shader_priv;
1781  struct constant_heap *heap = &priv->pconst_heap;
1782  UINT i;
1783 
1784  for (i = start; i < count + start; ++i)
1785  {
1787  }
1788 }
1789 
1790 static unsigned int vec4_varyings(DWORD shader_major, const struct wined3d_gl_info *gl_info)
1791 {
1792  unsigned int ret = gl_info->limits.glsl_varyings / 4;
1793  /* 4.0 shaders do not write clip coords because d3d10 does not support user clipplanes */
1794  if(shader_major > 3) return ret;
1795 
1796  /* 3.0 shaders may need an extra varying for the clip coord on some cards(mostly dx10 ones) */
1797  if (gl_info->quirks & WINED3D_QUIRK_GLSL_CLIP_VARYING) ret -= 1;
1798  return ret;
1799 }
1800 
1801 static BOOL needs_legacy_glsl_syntax(const struct wined3d_gl_info *gl_info)
1802 {
1803  return gl_info->glsl_version < MAKEDWORD_VERSION(1, 30);
1804 }
1805 
1807 {
1808  return gl_info->supported[ARB_EXPLICIT_ATTRIB_LOCATION]
1810 }
1811 
1813 {
1814  return shader_glsl_get_version(gl_info) >= 150;
1815 }
1816 
1817 static const char *get_attribute_keyword(const struct wined3d_gl_info *gl_info)
1818 {
1819  return needs_legacy_glsl_syntax(gl_info) ? "attribute" : "in";
1820 }
1821 
1822 static void PRINTF_ATTR(4, 5) declare_in_varying(const struct wined3d_gl_info *gl_info,
1823  struct wined3d_string_buffer *buffer, BOOL flat, const char *format, ...)
1824 {
1825  va_list args;
1826  int ret;
1827 
1828  shader_addline(buffer, "%s%s ", flat ? "flat " : "",
1829  needs_legacy_glsl_syntax(gl_info) ? "varying" : "in");
1830  for (;;)
1831  {
1832  va_start(args, format);
1834  va_end(args);
1835  if (!ret)
1836  return;
1838  return;
1839  }
1840 }
1841 
1842 static void PRINTF_ATTR(4, 5) declare_out_varying(const struct wined3d_gl_info *gl_info,
1843  struct wined3d_string_buffer *buffer, BOOL flat, const char *format, ...)
1844 {
1845  va_list args;
1846  int ret;
1847 
1848  shader_addline(buffer, "%s%s ", flat ? "flat " : "",
1849  needs_legacy_glsl_syntax(gl_info) ? "varying" : "out");
1850  for (;;)
1851  {
1852  va_start(args, format);
1854  va_end(args);
1855  if (!ret)
1856  return;
1858  return;
1859  }
1860 }
1861 
1862 static const char *shader_glsl_shader_input_name(const struct wined3d_gl_info *gl_info)
1863 {
1864  return shader_glsl_use_interface_blocks(gl_info) ? "shader_in.reg" : "ps_link";
1865 }
1866 
1867 static const char *shader_glsl_shader_output_name(const struct wined3d_gl_info *gl_info)
1868 {
1869  return shader_glsl_use_interface_blocks(gl_info) ? "shader_out.reg" : "ps_link";
1870 }
1871 
1873 {
1874  switch (mode)
1875  {
1876  case WINED3DSIM_CONSTANT:
1877  return "flat";
1879  return "noperspective";
1880  default:
1881  FIXME("Unhandled interpolation mode %#x.\n", mode);
1882  case WINED3DSIM_NONE:
1883  case WINED3DSIM_LINEAR:
1884  return "";
1885  }
1886 }
1887 
1889  const DWORD *packed_interpolation_mode, unsigned int register_idx)
1890 {
1891  return wined3d_extract_bits(packed_interpolation_mode,
1893 }
1894 
1895 static void shader_glsl_declare_shader_inputs(const struct wined3d_gl_info *gl_info,
1896  struct wined3d_string_buffer *buffer, unsigned int element_count,
1897  const DWORD *interpolation_mode, BOOL unroll)
1898 {
1900  unsigned int i;
1901 
1902  if (shader_glsl_use_interface_blocks(gl_info))
1903  {
1904  if (unroll)
1905  {
1906  shader_addline(buffer, "in shader_in_out {\n");
1907  for (i = 0; i < element_count; ++i)
1908  {
1909  mode = wined3d_extract_interpolation_mode(interpolation_mode, i);
1911  }
1912  shader_addline(buffer, "} shader_in;\n");
1913  }
1914  else
1915  {
1916  shader_addline(buffer, "in shader_in_out { vec4 reg[%u]; } shader_in;\n", element_count);
1917  }
1918  }
1919  else
1920  {
1921  declare_in_varying(gl_info, buffer, FALSE, "vec4 ps_link[%u];\n", element_count);
1922  }
1923 }
1924 
1925 static void shader_glsl_declare_shader_outputs(const struct wined3d_gl_info *gl_info,
1926  struct wined3d_string_buffer *buffer, unsigned int element_count, BOOL rasterizer_setup,
1927  const DWORD *interpolation_mode)
1928 {
1930  unsigned int i;
1931 
1932  if (shader_glsl_use_interface_blocks(gl_info))
1933  {
1934  if (rasterizer_setup)
1935  {
1936  shader_addline(buffer, "out shader_in_out {\n");
1937  for (i = 0; i < element_count; ++i)
1938  {
1939  const char *interpolation_qualifiers = "";
1941  {
1942  mode = wined3d_extract_interpolation_mode(interpolation_mode, i);
1943  interpolation_qualifiers = shader_glsl_interpolation_qualifiers(mode);
1944  }
1945  shader_addline(buffer, "%s vec4 reg%u;\n", interpolation_qualifiers, i);
1946  }
1947  shader_addline(buffer, "} shader_out;\n");
1948  }
1949  else
1950  {
1951  shader_addline(buffer, "out shader_in_out { vec4 reg[%u]; } shader_out;\n", element_count);
1952  }
1953  }
1954  else
1955  {
1956  declare_out_varying(gl_info, buffer, FALSE, "vec4 ps_link[%u];\n", element_count);
1957  }
1958 }
1959 
1960 static const char *glsl_primitive_type_from_d3d(enum wined3d_primitive_type primitive_type)
1961 {
1962  switch (primitive_type)
1963  {
1964  case WINED3D_PT_POINTLIST:
1965  return "points";
1966 
1967  case WINED3D_PT_LINELIST:
1968  return "lines";
1969 
1970  case WINED3D_PT_LINESTRIP:
1971  return "line_strip";
1972 
1974  return "triangles";
1975 
1977  return "triangle_strip";
1978 
1980  return "lines_adjacency";
1981 
1983  return "triangles_adjacency";
1984 
1985  default:
1986  FIXME("Unhandled primitive type %s.\n", debug_d3dprimitivetype(primitive_type));
1987  return "";
1988  }
1989 }
1990 
1991 static BOOL glsl_is_color_reg_read(const struct wined3d_shader *shader, unsigned int idx)
1992 {
1993  const struct wined3d_shader_signature *input_signature = &shader->input_signature;
1994  const struct wined3d_shader_reg_maps *reg_maps = &shader->reg_maps;
1995  DWORD input_reg_used = shader->u.ps.input_reg_used;
1996  unsigned int i;
1997 
1998  if (reg_maps->shader_version.major < 3)
1999  return input_reg_used & (1u << idx);
2000 
2001  for (i = 0; i < input_signature->element_count; ++i)
2002  {
2003  const struct wined3d_shader_signature_element *input = &input_signature->elements[i];
2004 
2005  if (!(reg_maps->input_registers & (1u << input->register_idx)))
2006  continue;
2007 
2009  && input->semantic_idx == idx)
2010  return input_reg_used & (1u << input->register_idx);
2011  }
2012  return FALSE;
2013 }
2014 
2016  const struct ps_compile_args *ps_args, unsigned int resource_idx, unsigned int sampler_idx)
2017 {
2018  const struct wined3d_shader_version *version = &shader->reg_maps.shader_version;
2019 
2020  if (version->major >= 4)
2021  return shader->reg_maps.sampler_comparison_mode & (1u << sampler_idx);
2022  else
2023  return version->type == WINED3D_SHADER_TYPE_PIXEL && (ps_args->shadow & (1u << resource_idx));
2024 }
2025 
2027  const struct wined3d_gl_info *gl_info, const char *vector_type, const char *scalar_type,
2028  unsigned int index)
2029 {
2030  shader_addline(buffer, "%s %s4 vs_in_%s%u;\n",
2031  get_attribute_keyword(gl_info), vector_type, scalar_type, index);
2032  shader_addline(buffer, "vec4 vs_in%u = %sBitsToFloat(vs_in_%s%u);\n",
2033  index, scalar_type, scalar_type, index);
2034 }
2035 
2037  const struct wined3d_gl_info *gl_info, const struct wined3d_shader_signature_element *e)
2038 {
2039  unsigned int index = e->register_idx;
2040 
2041  if (e->sysval_semantic == WINED3D_SV_VERTEX_ID)
2042  {
2043  shader_addline(buffer, "vec4 vs_in%u = vec4(intBitsToFloat(gl_VertexID), 0.0, 0.0, 0.0);\n",
2044  index);
2045  return;
2046  }
2047  if (e->sysval_semantic == WINED3D_SV_INSTANCE_ID)
2048  {
2049  shader_addline(buffer, "vec4 vs_in%u = vec4(intBitsToFloat(gl_InstanceID), 0.0, 0.0, 0.0);\n",
2050  index);
2051  return;
2052  }
2053  if (e->sysval_semantic && e->sysval_semantic != WINED3D_SV_POSITION)
2054  FIXME("Unhandled sysval semantic %#x.\n", e->sysval_semantic);
2055 
2057  shader_addline(buffer, "layout(location = %u) ", index);
2058 
2059  switch (e->component_type)
2060  {
2061  case WINED3D_TYPE_UINT:
2062  shader_glsl_declare_typed_vertex_attribute(buffer, gl_info, "uvec", "uint", index);
2063  break;
2064  case WINED3D_TYPE_INT:
2065  shader_glsl_declare_typed_vertex_attribute(buffer, gl_info, "ivec", "int", index);
2066  break;
2067 
2068  default:
2069  FIXME("Unhandled type %#x.\n", e->component_type);
2070  /* Fall through. */
2071  case WINED3D_TYPE_UNKNOWN:
2072  case WINED3D_TYPE_FLOAT:
2073  shader_addline(buffer, "%s vec4 vs_in%u;\n", get_attribute_keyword(gl_info), index);
2074  break;
2075  }
2076 }
2077 
2080  struct wined3d_string_buffer *buffer, const struct wined3d_shader *shader,
2081  const struct wined3d_shader_reg_maps *reg_maps, const struct shader_glsl_ctx_priv *ctx_priv)
2082 {
2083  const struct wined3d_shader_version *version = &reg_maps->shader_version;
2084  const struct vs_compile_args *vs_args = ctx_priv->cur_vs_args;
2085  const struct ps_compile_args *ps_args = ctx_priv->cur_ps_args;
2086  const struct wined3d_gl_info *gl_info = context->gl_info;
2087  const struct wined3d_shader_indexable_temp *idx_temp_reg;
2088  unsigned int uniform_block_base, uniform_block_count;
2089  const struct wined3d_shader_lconst *lconst;
2090  const char *prefix;
2091  unsigned int i;
2092  DWORD map;
2093 
2094  prefix = shader_glsl_get_prefix(version->type);
2095 
2096  /* Prototype the subroutines */
2097  for (i = 0, map = reg_maps->labels; map; map >>= 1, ++i)
2098  {
2099  if (map & 1) shader_addline(buffer, "void subroutine%u();\n", i);
2100  }
2101 
2102  /* Declare the constants (aka uniforms) */
2103  if (shader->limits->constant_float > 0)
2104  {
2105  unsigned max_constantsF;
2106 
2107  /* Unless the shader uses indirect addressing, always declare the
2108  * maximum array size and ignore that we need some uniforms privately.
2109  * E.g. if GL supports 256 uniforms, and we need 2 for the pos fixup
2110  * and immediate values, still declare VC[256]. If the shader needs
2111  * more uniforms than we have it won't work in any case. If it uses
2112  * less, the compiler will figure out which uniforms are really used
2113  * and strip them out. This allows a shader to use c255 on a dx9 card,
2114  * as long as it doesn't also use all the other constants.
2115  *
2116  * If the shader uses indirect addressing the compiler must assume
2117  * that all declared uniforms are used. In this case, declare only the
2118  * amount that we're assured to have.
2119  *
2120  * Thus we run into problems in these two cases:
2121  * 1) The shader really uses more uniforms than supported.
2122  * 2) The shader uses indirect addressing, less constants than
2123  * supported, but uses a constant index > #supported consts. */
2124  if (version->type == WINED3D_SHADER_TYPE_PIXEL)
2125  {
2126  /* No indirect addressing here. */
2127  max_constantsF = gl_info->limits.glsl_ps_float_constants;
2128  }
2129  else
2130  {
2131  if (reg_maps->usesrelconstF)
2132  {
2133  /* Subtract the other potential uniforms from the max
2134  * available (bools, ints, and 1 row of projection matrix).
2135  * Subtract another uniform for immediate values, which have
2136  * to be loaded via uniform by the driver as well. The shader
2137  * code only uses 0.5, 2.0, 1.0, 128 and -128 in vertex
2138  * shader code, so one vec4 should be enough. (Unfortunately
2139  * the Nvidia driver doesn't store 128 and -128 in one float).
2140  *
2141  * Writing gl_ClipVertex requires one uniform for each
2142  * clipplane as well. */
2143  max_constantsF = gl_info->limits.glsl_vs_float_constants - 3;
2144  if (vs_args->clip_enabled)
2145  max_constantsF -= gl_info->limits.user_clip_distances;
2146  max_constantsF -= wined3d_popcount(reg_maps->integer_constants);
2147  /* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly,
2148  * so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but
2149  * for now take this into account when calculating the number of available constants
2150  */
2151  max_constantsF -= wined3d_popcount(reg_maps->boolean_constants);
2152  /* Set by driver quirks in directx.c */
2153  max_constantsF -= gl_info->reserved_glsl_constants;
2154 
2155  if (max_constantsF < shader->limits->constant_float)
2156  {
2157  static unsigned int once;
2158 
2159  if (!once++)
2160  ERR_(winediag)("The hardware does not support enough uniform components to run this shader,"
2161  " it may not render correctly.\n");
2162  else
2163  WARN("The hardware does not support enough uniform components to run this shader.\n");
2164  }
2165  }
2166  else
2167  {
2168  max_constantsF = gl_info->limits.glsl_vs_float_constants;
2169  }
2170  }
2171  max_constantsF = min(shader->limits->constant_float, max_constantsF);
2172  shader_addline(buffer, "uniform vec4 %s_c[%u];\n", prefix, max_constantsF);
2173  }
2174 
2175  /* Always declare the full set of constants, the compiler can remove the
2176  * unused ones because d3d doesn't (yet) support indirect int and bool
2177  * constant addressing. This avoids problems if the app uses e.g. i0 and i9. */
2178  if (shader->limits->constant_int > 0 && reg_maps->integer_constants)
2179  shader_addline(buffer, "uniform ivec4 %s_i[%u];\n", prefix, shader->limits->constant_int);
2180 
2181  if (shader->limits->constant_bool > 0 && reg_maps->boolean_constants)
2182  shader_addline(buffer, "uniform bool %s_b[%u];\n", prefix, shader->limits->constant_bool);
2183 
2184  /* Declare immediate constant buffer */
2185  if (reg_maps->icb)
2186  shader_addline(buffer, "uniform vec4 %s_icb[%u];\n", prefix, reg_maps->icb->vec4_count);
2187 
2188  /* Declare constant buffers */
2190  &uniform_block_base, &uniform_block_count);
2191  for (i = 0; i < min(uniform_block_count, WINED3D_MAX_CBS); ++i)
2192  {
2193  if (reg_maps->cb_sizes[i])
2194  {
2195  shader_addline(buffer, "layout(std140");
2197  shader_addline(buffer, ", binding = %u", uniform_block_base + i);
2198  shader_addline(buffer, ") uniform block_%s_cb%u { vec4 %s_cb%u[%u]; };\n",
2199  prefix, i, prefix, i, reg_maps->cb_sizes[i]);
2200  }
2201  }
2202 
2203  /* Declare texture samplers */
2204  for (i = 0; i < reg_maps->sampler_map.count; ++i)
2205  {
2207  const char *sampler_type_prefix, *sampler_type;
2208  BOOL shadow_sampler, tex_rect;
2209 
2210  entry = &reg_maps->sampler_map.entries[i];
2211 
2212  if (entry->resource_idx >= ARRAY_SIZE(reg_maps->resource_info))
2213  {
2214  ERR("Invalid resource index %u.\n", entry->resource_idx);
2215  continue;
2216  }
2217 
2218  switch (reg_maps->resource_info[entry->resource_idx].data_type)
2219  {
2220  case WINED3D_DATA_FLOAT:
2221  case WINED3D_DATA_UNORM:
2222  case WINED3D_DATA_SNORM:
2223  sampler_type_prefix = "";
2224  break;
2225 
2226  case WINED3D_DATA_INT:
2227  sampler_type_prefix = "i";
2228  break;
2229 
2230  case WINED3D_DATA_UINT:
2231  sampler_type_prefix = "u";
2232  break;
2233 
2234  default:
2235  sampler_type_prefix = "";
2236  ERR("Unhandled resource data type %#x.\n", reg_maps->resource_info[i].data_type);
2237  break;
2238  }
2239 
2240  shadow_sampler = glsl_is_shadow_sampler(shader, ps_args, entry->resource_idx, entry->sampler_idx);
2241  switch (reg_maps->resource_info[entry->resource_idx].type)
2242  {
2244  sampler_type = "samplerBuffer";
2245  break;
2246 
2248  if (shadow_sampler)
2249  sampler_type = "sampler1DShadow";
2250  else
2251  sampler_type = "sampler1D";
2252  break;
2253 
2255  tex_rect = version->type == WINED3D_SHADER_TYPE_PIXEL
2256  && (ps_args->np2_fixup & (1u << entry->resource_idx))
2257  && gl_info->supported[ARB_TEXTURE_RECTANGLE];
2258  if (shadow_sampler)
2259  {
2260  if (tex_rect)
2261  sampler_type = "sampler2DRectShadow";
2262  else
2263  sampler_type = "sampler2DShadow";
2264  }
2265  else
2266  {
2267  if (tex_rect)
2268  sampler_type = "sampler2DRect";
2269  else
2270  sampler_type = "sampler2D";
2271  }
2272  break;
2273 
2275  if (shadow_sampler)
2276  FIXME("Unsupported 3D shadow sampler.\n");
2277  sampler_type = "sampler3D";
2278  break;
2279 
2281  if (shadow_sampler)
2282  sampler_type = "samplerCubeShadow";
2283  else
2284  sampler_type = "samplerCube";
2285  break;
2286 
2288  if (shadow_sampler)
2289  sampler_type = "sampler1DArrayShadow";
2290  else
2291  sampler_type = "sampler1DArray";
2292  break;
2293 
2295  if (shadow_sampler)
2296  sampler_type = "sampler2DArrayShadow";
2297  else
2298  sampler_type = "sampler2DArray";
2299  break;
2300 
2302  if (shadow_sampler)
2303  sampler_type = "samplerCubeArrayShadow";
2304  else
2305  sampler_type = "samplerCubeArray";
2306  break;
2307 
2309  sampler_type = "sampler2DMS";
2310  break;
2311 
2313  sampler_type = "sampler2DMSArray";
2314  break;
2315 
2316  default:
2317  sampler_type = "unsupported_sampler";
2318  FIXME("Unhandled resource type %#x.\n", reg_maps->resource_info[entry->resource_idx].type);
2319  break;
2320  }
2321 
2324  shader_addline(buffer, "uniform %s%s %s_sampler%u;\n",
2325  sampler_type_prefix, sampler_type, prefix, entry->bind_idx);
2326  }
2327 
2328  /* Declare images */
2329  for (i = 0; i < ARRAY_SIZE(reg_maps->uav_resource_info); ++i)
2330  {
2331  const char *image_type_prefix, *image_type, *read_format;
2332 
2333  if (!reg_maps->uav_resource_info[i].type)
2334  continue;
2335 
2336  switch (reg_maps->uav_resource_info[i].data_type)
2337  {
2338  case WINED3D_DATA_FLOAT:
2339  case WINED3D_DATA_UNORM:
2340  case WINED3D_DATA_SNORM:
2341  image_type_prefix = "";
2342  read_format = "r32f";
2343  break;
2344 
2345  case WINED3D_DATA_INT:
2346  image_type_prefix = "i";
2347  read_format = "r32i";
2348  break;
2349 
2350  case WINED3D_DATA_UINT:
2351  image_type_prefix = "u";
2352  read_format = "r32ui";
2353  break;
2354 
2355  default:
2356  image_type_prefix = "";
2357  read_format = "";
2358  ERR("Unhandled resource data type %#x.\n", reg_maps->uav_resource_info[i].data_type);
2359  break;
2360  }
2361 
2362  switch (reg_maps->uav_resource_info[i].type)
2363  {
2365  image_type = "imageBuffer";
2366  break;
2367 
2369  image_type = "image2D";
2370  break;
2371 
2373  image_type = "image3D";
2374  break;
2375 
2377  image_type = "image2DArray";
2378  break;
2379 
2380  default:
2381  image_type = "unsupported_image";
2382  FIXME("Unhandled resource type %#x.\n", reg_maps->uav_resource_info[i].type);
2383  break;
2384  }
2385 
2387  shader_addline(buffer, "layout(binding = %u)\n", i);
2388  if (reg_maps->uav_read_mask & (1u << i))
2389  shader_addline(buffer, "layout(%s) uniform %s%s %s_image%u;\n",
2390  read_format, image_type_prefix, image_type, prefix, i);
2391  else
2392  shader_addline(buffer, "writeonly uniform %s%s %s_image%u;\n",
2393  image_type_prefix, image_type, prefix, i);
2394 
2395  if (reg_maps->uav_counter_mask & (1u << i))
2396  shader_addline(buffer, "layout(binding = %u) uniform atomic_uint %s_counter%u;\n",
2397  i, prefix, i);
2398  }
2399 
2400  /* Declare address variables */
2401  for (i = 0, map = reg_maps->address; map; map >>= 1, ++i)
2402  {
2403  if (map & 1) shader_addline(buffer, "ivec4 A%u;\n", i);
2404  }
2405 
2406  /* Declare output register temporaries */
2407  if (shader->limits->packed_output)
2408  shader_addline(buffer, "vec4 %s_out[%u];\n", prefix, shader->limits->packed_output);
2409 
2410  /* Declare temporary variables */
2411  if (reg_maps->temporary_count)
2412  {
2413  for (i = 0; i < reg_maps->temporary_count; ++i)
2414  shader_addline(buffer, "vec4 R%u;\n", i);
2415  }
2416  else if (version->major < 4)
2417  {
2418  for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
2419  {
2420  if (map & 1)
2421  shader_addline(buffer, "vec4 R%u;\n", i);
2422  }
2423  }
2424 
2425  /* Declare indexable temporary variables */
2426  LIST_FOR_EACH_ENTRY(idx_temp_reg, &reg_maps->indexable_temps, struct wined3d_shader_indexable_temp, entry)
2427  {
2428  if (idx_temp_reg->component_count != 4)
2429  FIXME("Ignoring component count %u.\n", idx_temp_reg->component_count);
2430  shader_addline(buffer, "vec4 X%u[%u];\n", idx_temp_reg->register_idx, idx_temp_reg->register_size);
2431  }
2432 
2433  /* Declare loop registers aLx */
2434  if (version->major < 4)
2435  {
2436  for (i = 0; i < reg_maps->loop_depth; ++i)
2437  {
2438  shader_addline(buffer, "int aL%u;\n", i);
2439  shader_addline(buffer, "int tmpInt%u;\n", i);
2440  }
2441  }
2442 
2443  /* Temporary variables for matrix operations */
2444  shader_addline(buffer, "vec4 tmp0;\n");
2445  shader_addline(buffer, "vec4 tmp1;\n");
2446 
2447  if (!shader->load_local_constsF)
2448  {
2449  LIST_FOR_EACH_ENTRY(lconst, &shader->constantsF, struct wined3d_shader_lconst, entry)
2450  {
2451  shader_addline(buffer, "const vec4 %s_lc%u = ", prefix, lconst->idx);
2452  shader_glsl_append_imm_vec4(buffer, (const float *)lconst->value);
2453  shader_addline(buffer, ";\n");
2454  }
2455  }
2456 }
2457 
2458 /* Prototypes */
2459 static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins,
2460  const struct wined3d_shader_src_param *wined3d_src, DWORD mask, struct glsl_src_param *glsl_src);
2461 
2463 static const char * const shift_glsl_tab[] = {
2464  "", /* 0 (none) */
2465  "2.0 * ", /* 1 (x2) */
2466  "4.0 * ", /* 2 (x4) */
2467  "8.0 * ", /* 3 (x8) */
2468  "16.0 * ", /* 4 (x16) */
2469  "32.0 * ", /* 5 (x32) */
2470  "", /* 6 (x64) */
2471  "", /* 7 (x128) */
2472  "", /* 8 (d256) */
2473  "", /* 9 (d128) */
2474  "", /* 10 (d64) */
2475  "", /* 11 (d32) */
2476  "0.0625 * ", /* 12 (d16) */
2477  "0.125 * ", /* 13 (d8) */
2478  "0.25 * ", /* 14 (d4) */
2479  "0.5 * " /* 15 (d2) */
2480 };
2481 
2482 /* Generate a GLSL parameter that does the input modifier computation and return the input register/mask to use */
2484  const char *in_reg, const char *in_regswizzle, char *out_str)
2485 {
2486  switch (src_modifier)
2487  {
2488  case WINED3DSPSM_DZ: /* Need to handle this in the instructions itself (texld & texcrd). */
2489  case WINED3DSPSM_DW:
2490  case WINED3DSPSM_NONE:
2491  sprintf(out_str, "%s%s", in_reg, in_regswizzle);
2492  break;
2493  case WINED3DSPSM_NEG:
2494  sprintf(out_str, "-%s%s", in_reg, in_regswizzle);
2495  break;
2496  case WINED3DSPSM_NOT:
2497  sprintf(out_str, "!%s%s", in_reg, in_regswizzle);
2498  break;
2499  case WINED3DSPSM_BIAS:
2500  sprintf(out_str, "(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
2501  break;
2502  case WINED3DSPSM_BIASNEG:
2503  sprintf(out_str, "-(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
2504  break;
2505  case WINED3DSPSM_SIGN:
2506  sprintf(out_str, "(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
2507  break;
2508  case WINED3DSPSM_SIGNNEG:
2509  sprintf(out_str, "-(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
2510  break;
2511  case WINED3DSPSM_COMP:
2512  sprintf(out_str, "(1.0 - %s%s)", in_reg, in_regswizzle);
2513  break;
2514  case WINED3DSPSM_X2:
2515  sprintf(out_str, "(2.0 * %s%s)", in_reg, in_regswizzle);
2516  break;
2517  case WINED3DSPSM_X2NEG:
2518  sprintf(out_str, "-(2.0 * %s%s)", in_reg, in_regswizzle);
2519  break;
2520  case WINED3DSPSM_ABS:
2521  sprintf(out_str, "abs(%s%s)", in_reg, in_regswizzle);
2522  break;
2523  case WINED3DSPSM_ABSNEG:
2524  sprintf(out_str, "-abs(%s%s)", in_reg, in_regswizzle);
2525  break;
2526  default:
2527  FIXME("Unhandled modifier %u\n", src_modifier);
2528  sprintf(out_str, "%s%s", in_reg, in_regswizzle);
2529  }
2530 }
2531 
2532 static void shader_glsl_fixup_scalar_register_variable(char *register_name,
2533  const char *glsl_variable, const struct wined3d_gl_info *gl_info)
2534 {
2535  /* The ARB_shading_language_420pack extension allows swizzle operations on
2536  * scalars. */
2538  sprintf(register_name, "%s", glsl_variable);
2539  else
2540  sprintf(register_name, "ivec2(%s, 0)", glsl_variable);
2541 }
2542 
2546  enum wined3d_data_type data_type, char *register_name, BOOL *is_color,
2547  const struct wined3d_shader_instruction *ins)
2548 {
2549  /* oPos, oFog and oPts in D3D */
2550  static const char * const hwrastout_reg_names[] = {"vs_out[10]", "vs_out[11].x", "vs_out[11].y"};
2551 
2552  const struct wined3d_shader *shader = ins->ctx->shader;
2553  const struct wined3d_shader_reg_maps *reg_maps = ins->ctx->reg_maps;
2554  const struct wined3d_shader_version *version = &reg_maps->shader_version;
2555  const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
2556  const char *prefix = shader_glsl_get_prefix(version->type);
2557  struct glsl_src_param rel_param0, rel_param1;
2558  char imm_str[4][17];
2559 
2560  if (reg->idx[0].offset != ~0U && reg->idx[0].rel_addr)
2561  shader_glsl_add_src_param(ins, reg->idx[0].rel_addr, WINED3DSP_WRITEMASK_0, &rel_param0);
2562  if (reg->idx[1].offset != ~0U && reg->idx[1].rel_addr)
2563  shader_glsl_add_src_param(ins, reg->idx[1].rel_addr, WINED3DSP_WRITEMASK_0, &rel_param1);
2564  *is_color = FALSE;
2565 
2566  switch (reg->type)
2567  {
2568  case WINED3DSPR_TEMP:
2569  sprintf(register_name, "R%u", reg->idx[0].offset);
2570  break;
2571 
2572  case WINED3DSPR_INPUT:
2574  if (version->type == WINED3D_SHADER_TYPE_VERTEX)
2575  {
2576  struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
2577 
2578  if (reg->idx[0].rel_addr)
2579  FIXME("VS3 input registers relative addressing.\n");
2580  if (priv->cur_vs_args->swizzle_map & (1u << reg->idx[0].offset))
2581  *is_color = TRUE;
2582  if (reg->idx[0].rel_addr)
2583  {
2584  sprintf(register_name, "%s_in[%s + %u]",
2585  prefix, rel_param0.param_str, reg->idx[0].offset);
2586  }
2587  else
2588  {
2589  sprintf(register_name, "%s_in%u", prefix, reg->idx[0].offset);
2590  }
2591  break;
2592  }
2593 
2594  if (version->type == WINED3D_SHADER_TYPE_HULL
2597  {
2598  if (reg->idx[0].rel_addr)
2599  {
2600  if (reg->idx[1].rel_addr)
2601  sprintf(register_name, "shader_in[%s + %u].reg[%s + %u]",
2602  rel_param0.param_str, reg->idx[0].offset,
2603  rel_param1.param_str, reg->idx[1].offset);
2604  else
2605  sprintf(register_name, "shader_in[%s + %u].reg[%u]",
2606  rel_param0.param_str, reg->idx[0].offset,
2607  reg->idx[1].offset);
2608  }
2609  else if (reg->idx[1].rel_addr)
2610  sprintf(register_name, "shader_in[%u].reg[%s + %u]", reg->idx[0].offset,
2611  rel_param1.param_str, reg->idx[1].offset);
2612  else
2613  sprintf(register_name, "shader_in[%u].reg[%u]",
2614  reg->idx[0].offset, reg->idx[1].offset);
2615  break;
2616  }
2617 
2618  /* pixel shaders >= 3.0 */
2619  if (version->major >= 3)
2620  {
2621  DWORD idx = shader->u.ps.input_reg_map[reg->idx[0].offset];
2622  unsigned int in_count = vec4_varyings(version->major, gl_info);
2623 
2624  if (reg->idx[0].rel_addr)
2625  {
2626  /* Removing a + 0 would be an obvious optimization, but
2627  * OS X doesn't see the NOP operation there. */
2628  if (idx)
2629  {
2630  if (needs_legacy_glsl_syntax(gl_info)
2631  && shader->u.ps.declared_in_count > in_count)
2632  {
2633  sprintf(register_name,
2634  "((%s + %u) > %u ? (%s + %u) > %u ? gl_SecondaryColor : gl_Color : %s_in[%s + %u])",
2635  rel_param0.param_str, idx, in_count - 1, rel_param0.param_str, idx, in_count,
2636  prefix, rel_param0.param_str, idx);
2637  }
2638  else
2639  {
2640  sprintf(register_name, "%s_in[%s + %u]", prefix, rel_param0.param_str, idx);
2641  }
2642  }
2643  else
2644  {
2645  if (needs_legacy_glsl_syntax(gl_info)
2646  && shader->u.ps.declared_in_count > in_count)
2647  {
2648  sprintf(register_name, "((%s) > %u ? (%s) > %u ? gl_SecondaryColor : gl_Color : %s_in[%s])",
2649  rel_param0.param_str, in_count - 1, rel_param0.param_str, in_count,
2650  prefix, rel_param0.param_str);
2651  }
2652  else
2653  {
2654  sprintf(register_name, "%s_in[%s]", prefix, rel_param0.param_str);
2655  }
2656  }
2657  }
2658  else
2659  {
2660  if (idx == in_count) sprintf(register_name, "gl_Color");
2661  else if (idx == in_count + 1) sprintf(register_name, "gl_SecondaryColor");
2662  else sprintf(register_name, "%s_in[%u]", prefix, idx);
2663  }
2664  }
2665  else
2666  {
2667  if (!reg->idx[0].offset)
2668  strcpy(register_name, "ffp_varying_diffuse");
2669  else
2670  strcpy(register_name, "ffp_varying_specular");
2671  break;
2672  }
2673  break;
2674 
2675  case WINED3DSPR_CONST:
2676  {
2677  /* Relative addressing */
2678  if (reg->idx[0].rel_addr)
2679  {
2681  sprintf(register_name, "(%s + %u >= 0 && %s + %u < %u ? %s_c[%s + %u] : vec4(0.0))",
2682  rel_param0.param_str, reg->idx[0].offset,
2683  rel_param0.param_str, reg->idx[0].offset, shader->limits->constant_float,
2684  prefix, rel_param0.param_str, reg->idx[0].offset);
2685  else if (reg->idx[0].offset)
2686  sprintf(register_name, "%s_c[%s + %u]", prefix, rel_param0.param_str, reg->idx[0].offset);
2687  else
2688  sprintf(register_name, "%s_c[%s]", prefix, rel_param0.param_str);
2689  }
2690  else
2691  {
2692  if (shader_constant_is_local(shader, reg->idx[0].offset))
2693  sprintf(register_name, "%s_lc%u", prefix, reg->idx[0].offset);
2694  else
2695  sprintf(register_name, "%s_c[%u]", prefix, reg->idx[0].offset);
2696  }
2697  }
2698  break;
2699 
2700  case WINED3DSPR_CONSTINT:
2701  sprintf(register_name, "%s_i[%u]", prefix, reg->idx[0].offset);
2702  break;
2703 
2704  case WINED3DSPR_CONSTBOOL:
2705  sprintf(register_name, "%s_b[%u]", prefix, reg->idx[0].offset);
2706  break;
2707 
2708  case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
2709  if (version->type == WINED3D_SHADER_TYPE_PIXEL)
2710  sprintf(register_name, "T%u", reg->idx[0].offset);
2711  else
2712  sprintf(register_name, "A%u", reg->idx[0].offset);
2713  break;
2714 
2715  case WINED3DSPR_LOOP:
2716  sprintf(register_name, "aL%u", ins->ctx->state->current_loop_reg - 1);
2717  break;
2718 
2719  case WINED3DSPR_SAMPLER:
2720  sprintf(register_name, "%s_sampler%u", prefix, reg->idx[0].offset);
2721  break;
2722 
2723  case WINED3DSPR_COLOROUT:
2724  /* FIXME: should check dual_buffers when dual blending is enabled */
2725  if (reg->idx[0].offset >= gl_info->limits.buffers)
2726  WARN("Write to render target %u, only %d supported.\n",
2727  reg->idx[0].offset, gl_info->limits.buffers);
2728 
2729  sprintf(register_name, needs_legacy_glsl_syntax(gl_info) ? "gl_FragData[%u]" : "ps_out%u",
2730  reg->idx[0].offset);
2731  break;
2732 
2733  case WINED3DSPR_RASTOUT:
2734  sprintf(register_name, "%s", hwrastout_reg_names[reg->idx[0].offset]);
2735  break;
2736 
2737  case WINED3DSPR_DEPTHOUT:
2738  case WINED3DSPR_DEPTHOUTGE:
2739  case WINED3DSPR_DEPTHOUTLE:
2740  sprintf(register_name, "gl_FragDepth");
2741  break;
2742 
2743  case WINED3DSPR_ATTROUT:
2744  if (!reg->idx[0].offset)
2745  sprintf(register_name, "%s_out[8]", prefix);
2746  else
2747  sprintf(register_name, "%s_out[9]", prefix);
2748  break;
2749 
2750  case WINED3DSPR_TEXCRDOUT:
2751  /* Vertex shaders >= 3.0: WINED3DSPR_OUTPUT */
2752  if (reg->idx[0].rel_addr)
2753  sprintf(register_name, "%s_out[%s + %u]",
2754  prefix, rel_param0.param_str, reg->idx[0].offset);
2755  else
2756  sprintf(register_name, "%s_out[%u]", prefix, reg->idx[0].offset);
2757  break;
2758 
2759  case WINED3DSPR_MISCTYPE:
2760  if (!reg->idx[0].offset)
2761  {
2762  /* vPos */
2763  sprintf(register_name, "vpos");
2764  }
2765  else if (reg->idx[0].offset == 1)
2766  {
2767  /* Note that gl_FrontFacing is a bool, while vFace is
2768  * a float for which the sign determines front/back */
2769  sprintf(register_name, "(gl_FrontFacing ? 1.0 : -1.0)");
2770  }
2771  else
2772  {
2773  FIXME("Unhandled misctype register %u.\n", reg->idx[0].offset);
2774  sprintf(register_name, "unrecognized_register");
2775  }
2776  break;
2777 
2778  case WINED3DSPR_IMMCONST:
2779  switch (reg->immconst_type)
2780  {
2782  switch (data_type)
2783  {
2784  case WINED3D_DATA_FLOAT:
2785  if (gl_info->supported[ARB_SHADER_BIT_ENCODING])
2786  sprintf(register_name, "uintBitsToFloat(%#xu)", reg->u.immconst_data[0]);
2787  else
2788  wined3d_ftoa(*(const float *)reg->u.immconst_data, register_name);
2789  break;
2790  case WINED3D_DATA_INT:
2791  sprintf(register_name, "%#x", reg->u.immconst_data[0]);
2792  break;
2793  case WINED3D_DATA_RESOURCE:
2794  case WINED3D_DATA_SAMPLER:
2795  case WINED3D_DATA_UINT:
2796  sprintf(register_name, "%#xu", reg->u.immconst_data[0]);
2797  break;
2798  default:
2799  sprintf(register_name, "<unhandled data type %#x>", data_type);
2800  break;
2801  }
2802  break;
2803 
2804  case WINED3D_IMMCONST_VEC4:
2805  switch (data_type)
2806  {
2807  case WINED3D_DATA_FLOAT:
2808  if (gl_info->supported[ARB_SHADER_BIT_ENCODING])
2809  {
2810  sprintf(register_name, "uintBitsToFloat(uvec4(%#xu, %#xu, %#xu, %#xu))",
2811  reg->u.immconst_data[0], reg->u.immconst_data[1],
2812  reg->u.immconst_data[2], reg->u.immconst_data[3]);
2813  }
2814  else
2815  {
2816  wined3d_ftoa(*(const float *)&reg->u.immconst_data[0], imm_str[0]);
2817  wined3d_ftoa(*(const float *)&reg->u.immconst_data[1], imm_str[1]);
2818  wined3d_ftoa(*(const float *)&reg->u.immconst_data[2], imm_str[2]);
2819  wined3d_ftoa(*(const float *)&reg->u.immconst_data[3], imm_str[3]);
2820  sprintf(register_name, "vec4(%s, %s, %s, %s)",
2821  imm_str[0], imm_str[1], imm_str[2], imm_str[3]);
2822  }
2823  break;
2824  case WINED3D_DATA_INT:
2825  sprintf(register_name, "ivec4(%#x, %#x, %#x, %#x)",
2826  reg->u.immconst_data[0], reg->u.immconst_data[1],
2827  reg->u.immconst_data[2], reg->u.immconst_data[3]);
2828  break;
2829  case WINED3D_DATA_RESOURCE:
2830  case WINED3D_DATA_SAMPLER:
2831  case WINED3D_DATA_UINT:
2832  sprintf(register_name, "uvec4(%#xu, %#xu, %#xu, %#xu)",
2833  reg->u.immconst_data[0], reg->u.immconst_data[1],
2834  reg->u.immconst_data[2], reg->u.immconst_data[3]);
2835  break;
2836  default:
2837  sprintf(register_name, "<unhandled data type %#x>", data_type);
2838  break;
2839  }
2840  break;
2841 
2842  default:
2843  FIXME("Unhandled immconst type %#x\n", reg->immconst_type);
2844  sprintf(register_name, "<unhandled_immconst_type %#x>", reg->immconst_type);
2845  }
2846  break;
2847 
2849  if (reg->idx[1].rel_addr)
2850  sprintf(register_name, "%s_cb%u[%s + %u]",
2851  prefix, reg->idx[0].offset, rel_param1.param_str, reg->idx[1].offset);
2852  else
2853  sprintf(register_name, "%s_cb%u[%u]", prefix, reg->idx[0].offset, reg->idx[1].offset);
2854  break;
2855 
2857  if (reg->idx[0].rel_addr)
2858  sprintf(register_name, "%s_icb[%s + %u]", prefix, rel_param0.param_str, reg->idx[0].offset);
2859  else
2860  sprintf(register_name, "%s_icb[%u]", prefix, reg->idx[0].offset);
2861  break;
2862 
2863  case WINED3DSPR_PRIMID:
2865  sprintf(register_name, "gl_PrimitiveIDIn");
2866  else
2867  sprintf(register_name, "gl_PrimitiveID");
2868  break;
2869 
2870  case WINED3DSPR_IDXTEMP:
2871  if (reg->idx[1].rel_addr)
2872  sprintf(register_name, "X%u[%s + %u]", reg->idx[0].offset, rel_param1.param_str, reg->idx[1].offset);
2873  else
2874  sprintf(register_name, "X%u[%u]", reg->idx[0].offset, reg->idx[1].offset);
2875  break;
2876 
2879  "int(gl_LocalInvocationIndex)", gl_info);
2880  break;
2881 
2882  case WINED3DSPR_GSINSTID:
2883  case WINED3DSPR_OUTPOINTID:
2885  "gl_InvocationID", gl_info);
2886  break;
2887 
2888  case WINED3DSPR_THREADID:
2889  sprintf(register_name, "ivec3(gl_GlobalInvocationID)");
2890  break;
2891 
2893  sprintf(register_name, "ivec3(gl_WorkGroupID)");
2894  break;
2895 
2897  sprintf(register_name, "ivec3(gl_LocalInvocationID)");
2898  break;
2899 
2900  case WINED3DSPR_FORKINSTID:
2901  case WINED3DSPR_JOININSTID:
2903  "phase_instance_id", gl_info);
2904  break;
2905 
2906  case WINED3DSPR_TESSCOORD:
2907  sprintf(register_name, "gl_TessCoord");
2908  break;
2909 
2911  if (reg->idx[0].rel_addr)
2912  {
2913  if (reg->idx[1].rel_addr)
2914  sprintf(register_name, "shader_out[%s + %u].reg[%s + %u]",
2915  rel_param0.param_str, reg->idx[0].offset,
2916  rel_param1.param_str, reg->idx[1].offset);
2917  else
2918  sprintf(register_name, "shader_out[%s + %u].reg[%u]",
2919  rel_param0.param_str, reg->idx[0].offset,
2920  reg->idx[1].offset);
2921  }
2922  else if (reg->idx[1].rel_addr)
2923  {
2924  sprintf(register_name, "shader_out[%u].reg[%s + %u]",
2925  reg->idx[0].offset, rel_param1.param_str,
2926  reg->idx[1].offset);
2927  }
2928  else
2929  {
2930  sprintf(register_name, "shader_out[%u].reg[%u]",
2931  reg->idx[0].offset, reg->idx[1].offset);
2932  }
2933  break;
2934 
2935  case WINED3DSPR_PATCHCONST:
2936  if (version->type == WINED3D_SHADER_TYPE_HULL)
2937  sprintf(register_name, "hs_out[%u]", reg->idx[0].offset);
2938  else
2939  sprintf(register_name, "vpc[%u]", reg->idx[0].offset);
2940  break;
2941 
2942  default:
2943  FIXME("Unhandled register type %#x.\n", reg->type);
2944  sprintf(register_name, "unrecognized_register");
2945  break;
2946  }
2947 }
2948 
2949 static void shader_glsl_write_mask_to_str(DWORD write_mask, char *str)
2950 {
2951  *str++ = '.';
2952  if (write_mask & WINED3DSP_WRITEMASK_0) *str++ = 'x';
2953  if (write_mask & WINED3DSP_WRITEMASK_1) *str++ = 'y';
2954  if (write_mask & WINED3DSP_WRITEMASK_2) *str++ = 'z';
2955  if (write_mask & WINED3DSP_WRITEMASK_3) *str++ = 'w';
2956  *str = '\0';
2957 }
2958 
2959 /* Get the GLSL write mask for the destination register */
2960 static DWORD shader_glsl_get_write_mask(const struct wined3d_shader_dst_param *param, char *write_mask)
2961 {
2962  DWORD mask = param->write_mask;
2963 
2964  if (shader_is_scalar(&param->reg))
2965  {
2967  *write_mask = '\0';
2968  }
2969  else
2970  {
2971  shader_glsl_write_mask_to_str(mask, write_mask);
2972  }
2973 
2974  return mask;
2975 }
2976 
2977 static unsigned int shader_glsl_get_write_mask_size(DWORD write_mask)
2978 {
2979  unsigned int size = 0;
2980 
2981  if (write_mask & WINED3DSP_WRITEMASK_0) ++size;
2982  if (write_mask & WINED3DSP_WRITEMASK_1) ++size;
2983  if (write_mask & WINED3DSP_WRITEMASK_2) ++size;
2984  if (write_mask & WINED3DSP_WRITEMASK_3) ++size;
2985 
2986  return size;
2987 }
2988 
2990  unsigned int component_idx)
2991 {
2992  /* swizzle bits fields: wwzzyyxx */
2993  return (swizzle >> (2 * component_idx)) & 0x3;
2994 }
2995 
2997 {
2998  /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
2999  * but addressed as "rgba". To fix this we need to swap the register's x
3000  * and z components. */
3001  const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
3002  unsigned int i;
3003 
3004  *str++ = '.';
3005  for (i = 0; i < 4; ++i)
3006  {
3007  if (mask & (WINED3DSP_WRITEMASK_0 << i))
3008  *str++ = swizzle_chars[shader_glsl_swizzle_get_component(swizzle, i)];
3009  }
3010  *str = '\0';
3011 }
3012 
3014  BOOL fixup, DWORD mask, char *swizzle_str)
3015 {
3016  if (shader_is_scalar(&param->reg))
3017  *swizzle_str = '\0';
3018  else
3019  shader_glsl_swizzle_to_str(param->swizzle, fixup, mask, swizzle_str);
3020 }
3021 
3022 static void shader_glsl_sprintf_cast(struct wined3d_string_buffer *dst_param, const char *src_param,
3023  enum wined3d_data_type dst_data_type, enum wined3d_data_type src_data_type)
3024 {
3025  if (dst_data_type == src_data_type)
3026  {
3027  string_buffer_sprintf(dst_param, "%s", src_param);
3028  return;
3029  }
3030 
3031  if (src_data_type == WINED3D_DATA_FLOAT)
3032  {
3033  switch (dst_data_type)
3034  {
3035  case WINED3D_DATA_INT:
3036  string_buffer_sprintf(dst_param, "floatBitsToInt(%s)", src_param);
3037  return;
3038  case WINED3D_DATA_RESOURCE:
3039  case WINED3D_DATA_SAMPLER:
3040  case WINED3D_DATA_UINT:
3041  string_buffer_sprintf(dst_param, "floatBitsToUint(%s)", src_param);
3042  return;
3043  default:
3044  break;
3045  }
3046  }
3047 
3048  if (src_data_type == WINED3D_DATA_UINT && dst_data_type == WINED3D_DATA_FLOAT)
3049  {
3050  string_buffer_sprintf(dst_param, "uintBitsToFloat(%s)", src_param);
3051  return;
3052  }
3053 
3054  if (src_data_type == WINED3D_DATA_INT && dst_data_type == WINED3D_DATA_FLOAT)
3055  {
3056  string_buffer_sprintf(dst_param, "intBitsToFloat(%s)", src_param);
3057  return;
3058  }
3059 
3060  FIXME("Unhandled cast from %#x to %#x.\n", src_data_type, dst_data_type);
3061  string_buffer_sprintf(dst_param, "%s", src_param);
3062 }
3063 
3064 /* From a given parameter token, generate the corresponding GLSL string.
3065  * Also, return the actual register name and swizzle in case the
3066  * caller needs this information as well. */
3068  const struct wined3d_shader_src_param *wined3d_src, DWORD mask, struct glsl_src_param *glsl_src,
3069  enum wined3d_data_type data_type)
3070 {
3071  struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
3073  enum wined3d_data_type param_data_type;
3074  BOOL is_color = FALSE;
3075  char swizzle_str[6];
3076 
3077  glsl_src->reg_name[0] = '\0';
3078  glsl_src->param_str[0] = '\0';
3079  swizzle_str[0] = '\0';
3080 
3081  shader_glsl_get_register_name(&wined3d_src->reg, data_type, glsl_src->reg_name, &is_color, ins);
3082  shader_glsl_get_swizzle(wined3d_src, is_color, mask, swizzle_str);
3083 
3084  switch (wined3d_src->reg.type)
3085  {
3086  case WINED3DSPR_IMMCONST:
3087  param_data_type = data_type;
3088  break;
3089  case WINED3DSPR_FORKINSTID:
3090  case WINED3DSPR_GSINSTID:
3091  case WINED3DSPR_JOININSTID:
3094  case WINED3DSPR_OUTPOINTID:
3095  case WINED3DSPR_PRIMID:
3097  case WINED3DSPR_THREADID:
3098  param_data_type = WINED3D_DATA_INT;
3099  break;
3100  default:
3101  param_data_type = WINED3D_DATA_FLOAT;
3102  break;
3103  }
3104 
3105  shader_glsl_sprintf_cast(reg_name, glsl_src->reg_name, data_type, param_data_type);
3106  shader_glsl_gen_modifier(wined3d_src->modifiers, reg_name->buffer, swizzle_str, glsl_src->param_str);
3107 
3109 }
3110 
3112  const struct wined3d_shader_src_param *wined3d_src, DWORD mask, struct glsl_src_param *glsl_src)
3113 {
3114  shader_glsl_add_src_param_ext(ins, wined3d_src, mask, glsl_src, wined3d_src->reg.data_type);
3115 }
3116 
3117 /* From a given parameter token, generate the corresponding GLSL string.
3118  * Also, return the actual register name and swizzle in case the
3119  * caller needs this information as well. */
3121  const struct wined3d_shader_dst_param *wined3d_dst, struct glsl_dst_param *glsl_dst)
3122 {
3123  BOOL is_color = FALSE;
3124 
3125  glsl_dst->mask_str[0] = '\0';
3126  glsl_dst->reg_name[0] = '\0';
3127 
3128  shader_glsl_get_register_name(&wined3d_dst->reg, wined3d_dst->reg.data_type,
3129  glsl_dst->reg_name, &is_color, ins);
3130  return shader_glsl_get_write_mask(wined3d_dst, glsl_dst->mask_str);
3131 }
3132 
3133 /* Append the destination part of the instruction to the buffer, return the effective write mask */
3135  const struct wined3d_shader_instruction *ins, const struct wined3d_shader_dst_param *dst,
3136  enum wined3d_data_type data_type)
3137 {
3138  struct glsl_dst_param glsl_dst;
3139  DWORD mask;
3140 
3141  if ((mask = shader_glsl_add_dst_param(ins, dst, &glsl_dst)))
3142  {
3143  switch (data_type)
3144  {
3145  case WINED3D_DATA_FLOAT:
3146  shader_addline(buffer, "%s%s = %s(",
3147  glsl_dst.reg_name, glsl_dst.mask_str, shift_glsl_tab[dst->shift]);
3148  break;
3149  case WINED3D_DATA_INT:
3150  shader_addline(buffer, "%s%s = %sintBitsToFloat(",
3151  glsl_dst.reg_name, glsl_dst.mask_str, shift_glsl_tab[dst->shift]);
3152  break;
3153  case WINED3D_DATA_RESOURCE:
3154  case WINED3D_DATA_SAMPLER:
3155  case WINED3D_DATA_UINT:
3156  shader_addline(buffer, "%s%s = %suintBitsToFloat(",
3157  glsl_dst.reg_name, glsl_dst.mask_str, shift_glsl_tab[dst->shift]);
3158  break;
3159  default:
3160  FIXME("Unhandled data type %#x.\n", data_type);
3161  shader_addline(buffer, "%s%s = %s(",
3162  glsl_dst.reg_name, glsl_dst.mask_str, shift_glsl_tab[dst->shift]);
3163  break;
3164  }
3165  }
3166 
3167  return mask;
3168 }
3169 
3170 /* Append the destination part of the instruction to the buffer, return the effective write mask */
3172 {
3173  return shader_glsl_append_dst_ext(buffer, ins, &ins->dst[0], ins->dst[0].reg.data_type);
3174 }
3175 
3178 {
3179  struct glsl_dst_param dst_param;
3180  DWORD modifiers;
3181 
3182  if (!ins->dst_count) return;
3183 
3184  modifiers = ins->dst[0].modifiers;
3185  if (!modifiers) return;
3186 
3187  shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
3188 
3189  if (modifiers & WINED3DSPDM_SATURATE)
3190  {
3191  /* _SAT means to clamp the value of the register to between 0 and 1 */
3192  shader_addline(ins->ctx->buffer, "%s%s = clamp(%s%s, 0.0, 1.0);\n", dst_param.reg_name,
3193  dst_param.mask_str, dst_param.reg_name, dst_param.mask_str);
3194  }
3195 
3196  if (modifiers & WINED3DSPDM_MSAMPCENTROID)
3197  {
3198  FIXME("_centroid modifier not handled\n");
3199  }
3200 
3201  if (modifiers & WINED3DSPDM_PARTIALPRECISION)
3202  {
3203  /* MSDN says this modifier can be safely ignored, so that's what we'll do. */
3204  }
3205 }
3206 
3208 {
3209  switch (op)
3210  {
3211  case WINED3D_SHADER_REL_OP_GT: return ">";
3212  case WINED3D_SHADER_REL_OP_EQ: return "==";
3213  case WINED3D_SHADER_REL_OP_GE: return ">=";
3214  case WINED3D_SHADER_REL_OP_LT: return "<";
3215  case WINED3D_SHADER_REL_OP_NE: return "!=";
3216  case WINED3D_SHADER_REL_OP_LE: return "<=";
3217  default:
3218  FIXME("Unrecognized operator %#x.\n", op);
3219  return "(\?\?)";
3220  }
3221 }
3222 
3223 static BOOL shader_glsl_has_core_grad(const struct wined3d_gl_info *gl_info)
3224 {
3225  return shader_glsl_get_version(gl_info) >= 130 || gl_info->supported[EXT_GPU_SHADER4];
3226 }
3227 
3229  unsigned int *coord_size, unsigned int *deriv_size)
3230 {
3231  const BOOL is_array = resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_1DARRAY
3232  || resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_2DARRAY;
3233 
3234  *coord_size = resource_type_info[resource_type].coord_size;
3235  *deriv_size = *coord_size;
3236  if (is_array)
3237  --(*deriv_size);
3238 }
3239 
3241  DWORD resource_idx, DWORD sampler_idx, DWORD flags, struct glsl_sample_function *sample_function)
3242 {
3243  enum wined3d_shader_resource_type resource_type = ctx->reg_maps->resource_info[resource_idx].type;
3244  struct shader_glsl_ctx_priv *priv = ctx->backend_data;
3245  const struct wined3d_gl_info *gl_info = ctx->gl_info;
3246  BOOL legacy_syntax = needs_legacy_glsl_syntax(gl_info);
3247  BOOL shadow = glsl_is_shadow_sampler(ctx->shader, priv->cur_ps_args, resource_idx, sampler_idx);
3249  BOOL texrect = ctx->reg_maps->shader_version.type == WINED3D_SHADER_TYPE_PIXEL
3250  && priv->cur_ps_args->np2_fixup & (1u << resource_idx)
3251  && gl_info->supported[ARB_TEXTURE_RECTANGLE];
3255  const char *base = "texture", *type_part = "", *suffix = "";
3256  unsigned int coord_size, deriv_size;
3257 
3258  sample_function->data_type = ctx->reg_maps->resource_info[resource_idx].data_type;
3259  sample_function->emulate_lod = WINED3D_SHADER_RESOURCE_NONE;
3260 
3261  if (resource_type >= ARRAY_SIZE(resource_type_info))
3262  {
3263  ERR("Unexpected resource type %#x.\n", resource_type);
3264  resource_type = WINED3D_SHADER_RESOURCE_TEXTURE_2D;
3265  }
3266 
3267  /* Note that there's no such thing as a projected cube texture. */
3268  if (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_CUBE)
3269  projected = FALSE;
3270 
3271  if (shadow && lod)
3272  {
3273  switch (resource_type)
3274  {
3275  /* emulate textureLod(sampler2DArrayShadow, ...) using textureGradOffset */
3277  sample_function->emulate_lod = resource_type;
3278  grad = offset = TRUE;
3279  lod = FALSE;
3280  break;
3281 
3282  /* emulate textureLod(samplerCubeShadow, ...) using shadowCubeGrad */
3284  sample_function->emulate_lod = resource_type;
3285  grad = legacy_syntax = TRUE;
3286  lod = FALSE;
3287  break;
3288 
3289  default:
3290  break;
3291  }
3292  }
3293 
3294  if (legacy_syntax)
3295  {
3296  if (shadow)
3297  base = "shadow";
3298 
3299  type_part = resource_type_info[resource_type].type_part;
3300  if (resource_type == WINED3D_SHADER_RESOURCE_TEXTURE_2D && texrect)
3301  type_part = "2DRect";
3302  if (!type_part[0] && resource_type != WINED3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY)
3303  FIXME("Unhandled resource type %#x.\n", resource_type);
3304 
3305  if (!lod && grad && !shader_glsl_has_core_grad(gl_info))
3306  {
3307  if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
3308  suffix = "ARB";
3309  else
3310  FIXME("Unsupported grad function.\n");
3311  }
3312  }
3313 
3315  {
3316  static const DWORD texel_fetch_flags = WINED3D_GLSL_SAMPLE_LOAD | WINED3D_GLSL_SAMPLE_OFFSET;
3317  if (flags & ~texel_fetch_flags)
3318  ERR("Unexpected flags %#x for texelFetch.\n", flags & ~texel_fetch_flags);
3319 
3320  base = "texelFetch";
3321  type_part = "";
3322  }
3323 
3324  sample_function->name = string_buffer_get(priv->string_buffers);
3325  string_buffer_sprintf(sample_function->name, "%s%s%s%s%s%s", base, type_part, projected ? "Proj" : "",
3326  lod ? "Lod" : grad ? "Grad" : "", offset ? "Offset" : "", suffix);
3327 
3328  shader_glsl_get_coord_size(resource_type, &coord_size, &deriv_size);
3329  if (shadow)
3330  ++coord_size;
3331  sample_function->offset_size = offset ? deriv_size : 0;
3332  sample_function->coord_mask = (1u << coord_size) - 1;
3333  sample_function->deriv_mask = (1u << deriv_size) - 1;
3334  sample_function->output_single_component = shadow && !legacy_syntax;
3335 }
3336 
3338  struct glsl_sample_function *sample_function)
3339 {
3340  const struct shader_glsl_ctx_priv *priv = ctx->backend_data;
3341 
3342  string_buffer_release(priv->string_buffers, sample_function->name);
3343 }
3344 
3345 static void shader_glsl_append_fixup_arg(char *arguments, const char *reg_name,
3346  BOOL sign_fixup, enum fixup_channel_source channel_source)
3347 {
3348  switch(channel_source)
3349  {
3350  case CHANNEL_SOURCE_ZERO:
3351  strcat(arguments, "0.0");
3352  break;
3353 
3354  case CHANNEL_SOURCE_ONE:
3355  strcat(arguments, "1.0");
3356  break;
3357 
3358  case CHANNEL_SOURCE_X:
3359  strcat(arguments, reg_name);
3360  strcat(arguments, ".x");
3361  break;
3362 
3363  case CHANNEL_SOURCE_Y:
3364  strcat(arguments, reg_name);
3365  strcat(arguments, ".y");
3366  break;
3367 
3368  case CHANNEL_SOURCE_Z:
3369  strcat(arguments, reg_name);
3370  strcat(arguments, ".z");
3371  break;
3372 
3373  case CHANNEL_SOURCE_W:
3374  strcat(arguments, reg_name);
3375  strcat(arguments, ".w");
3376  break;
3377 
3378  default:
3379  FIXME("Unhandled channel source %#x\n", channel_source);
3380  strcat(arguments, "undefined");
3381  break;
3382  }
3383 
3384  if (sign_fixup) strcat(arguments, " * 2.0 - 1.0");
3385 }
3386 
3388  const char *reg_name, DWORD mask, struct color_fixup_desc fixup)
3389 {
3390  unsigned int mask_size, remaining;
3391  DWORD fixup_mask = 0;
3392  char arguments[256];
3393  char mask_str[6];
3394 
3395  if (fixup.x_sign_fixup || fixup.x_source != CHANNEL_SOURCE_X) fixup_mask |= WINED3DSP_WRITEMASK_0;
3396  if (fixup.y_sign_fixup || fixup.y_source != CHANNEL_SOURCE_Y) fixup_mask |= WINED3DSP_WRITEMASK_1;
3397  if (fixup.z_sign_fixup || fixup.z_source != CHANNEL_SOURCE_Z) fixup_mask |= WINED3DSP_WRITEMASK_2;
3398  if (fixup.w_sign_fixup || fixup.w_source != CHANNEL_SOURCE_W) fixup_mask |= WINED3DSP_WRITEMASK_3;
3399  if (!(mask &= fixup_mask))
3400  return;
3401 
3402  if (is_complex_fixup(fixup))
3403  {
3405  FIXME("Complex fixup (%#x) not supported\n",complex_fixup);
3406  return;
3407  }
3408 
3411 
3412  arguments[0] = '\0';
3413  remaining = mask_size;
3415  {
3416  shader_glsl_append_fixup_arg(arguments, reg_name, fixup.x_sign_fixup, fixup.x_source);
3417  if (--remaining) strcat(arguments, ", ");
3418  }
3420  {
3421  shader_glsl_append_fixup_arg(arguments, reg_name, fixup.y_sign_fixup, fixup.y_source);
3422  if (--remaining) strcat(arguments, ", ");
3423  }
3425  {
3426  shader_glsl_append_fixup_arg(arguments, reg_name, fixup.z_sign_fixup, fixup.z_source);
3427  if (--remaining) strcat(arguments, ", ");
3428  }
3430  {
3431  shader_glsl_append_fixup_arg(arguments, reg_name, fixup.w_sign_fixup, fixup.w_source);
3432  if (--remaining) strcat(arguments, ", ");
3433  }
3434 
3435  if (mask_size > 1)
3436  shader_addline(buffer, "%s%s = vec%u(%s);\n", reg_name, mask_str, mask_size, arguments);
3437  else
3438  shader_addline(buffer, "%s%s = %s;\n", reg_name, mask_str, arguments);
3439 }
3440 
3442 {
3443  char reg_name[256];
3444  BOOL is_color;
3445 
3446  shader_glsl_get_register_name(&ins->dst[0].reg, ins->dst[0].reg.data_type, reg_name, &is_color, ins);
3447  shader_glsl_color_correction_ext(ins->ctx->buffer, reg_name, ins->dst[0].write_mask, fixup);
3448 }
3449 
3450 static void PRINTF_ATTR(9, 10) shader_glsl_gen_sample_code(const struct wined3d_shader_instruction *ins,
3451  unsigned int sampler_bind_idx, const struct glsl_sample_function *sample_function, DWORD swizzle,
3453  const char *coord_reg_fmt, ...)
3454 {
3455  static const struct wined3d_shader_texel_offset dummy_offset = {0, 0, 0};
3456  const struct wined3d_shader_version *version = &ins->ctx->reg_maps->shader_version;
3457  char dst_swizzle[6];
3458  struct color_fixup_desc fixup;
3459  BOOL np2_fixup = FALSE;
3460  va_list args;
3461  int ret;
3462 
3463  shader_glsl_swizzle_to_str(swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle);
3464 
3465  /* If ARB_texture_swizzle is supported we don't need to do anything here.
3466  * We actually rely on it for vertex shaders and SM4+. */
3467  if (version->type == WINED3D_SHADER_TYPE_PIXEL && version->major < 4)
3468  {
3469  const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
3470  fixup = priv->cur_ps_args->color_fixup[sampler_bind_idx];
3471 
3472  if (priv->cur_ps_args->np2_fixup & (1u << sampler_bind_idx))
3473  np2_fixup = TRUE;
3474  }
3475  else
3476  {
3477  fixup = COLOR_FIXUP_IDENTITY;
3478  }
3479 
3480  shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &ins->dst[0], sample_function->data_type);
3481 
3482  if (sample_function->output_single_component)
3483  shader_addline(ins->ctx->buffer, "vec4(");
3484 
3485  shader_addline(ins->ctx->buffer, "%s(%s_sampler%u, ",
3486  sample_function->name->buffer, shader_glsl_get_prefix(version->type), sampler_bind_idx);
3487 
3488  for (;;)
3489  {
3490  va_start(args, coord_reg_fmt);
3491  ret = shader_vaddline(ins->ctx->buffer, coord_reg_fmt, args);
3492  va_end(args);
3493  if (!ret)
3494  break;
3495  if (!string_buffer_resize(ins->ctx->buffer, ret))
3496  break;
3497  }
3498 
3499  if (np2_fixup)
3500  {
3501  const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
3502  const unsigned char idx = priv->cur_np2fixup_info->idx[sampler_bind_idx];
3503 
3504  switch (shader_glsl_get_write_mask_size(sample_function->coord_mask))
3505  {
3506  case 1:
3507  shader_addline(ins->ctx->buffer, " * ps_samplerNP2Fixup[%u].%s",
3508  idx >> 1, (idx % 2) ? "z" : "x");
3509  break;
3510  case 2:
3511  shader_addline(ins->ctx->buffer, " * ps_samplerNP2Fixup[%u].%s",
3512  idx >> 1, (idx % 2) ? "zw" : "xy");
3513  break;
3514  case 3:
3515  shader_addline(ins->ctx->buffer, " * vec3(ps_samplerNP2Fixup[%u].%s, 1.0)",
3516  idx >> 1, (idx % 2) ? "zw" : "xy");
3517  break;
3518  case 4:
3519  shader_addline(ins->ctx->buffer, " * vec4(ps_samplerNP2Fixup[%u].%s, 1.0, 1.0)",
3520  idx >> 1, (idx % 2) ? "zw" : "xy");
3521  break;
3522  }
3523  }
3524  if (sample_function->emulate_lod)
3525  {
3526  if (strcmp(bias, "0")) FIXME("Don't know how to emulate lod level %s\n", bias);
3527  switch (sample_function->emulate_lod)
3528  {
3530  if (!dx) dx = "vec2(0.0, 0.0)";
3531  if (!dy) dy = "vec2(0.0, 0.0)";
3532  break;
3533 
3535  if (!dx) dx = "vec3(0.0, 0.0, 0.0)";
3536  if (!dy) dy = "vec3(0.0, 0.0, 0.0)";
3537  break;
3538 
3539  default:
3540  break;
3541  }
3542  if (!offset) offset = &dummy_offset;
3543  }
3544  if (dx && dy)
3545  shader_addline(ins->ctx->buffer, ", %s, %s", dx, dy);
3546  else if (bias)
3547  shader_addline(ins->ctx->buffer, ", %s", bias);
3548  if (sample_function->offset_size)
3549  {
3550  int offset_immdata[4] = {offset->u, offset->v, offset->w};
3551  shader_addline(ins->ctx->buffer, ", ");
3552  shader_glsl_append_imm_ivec(ins->ctx->buffer, offset_immdata, sample_function->offset_size);
3553  }
3554  shader_addline(ins->ctx->buffer, ")");
3555 
3556  if (sample_function->output_single_component)
3557  shader_addline(ins->ctx->buffer, ")");
3558 
3559  shader_addline(ins->ctx->buffer, "%s);\n", dst_swizzle);
3560 
3561  if (!is_identity_fixup(fixup))
3562  shader_glsl_color_correction(ins, fixup);
3563 }
3564 
3566 {
3567  /* Write the final position.
3568  *
3569  * OpenGL coordinates specify the center of the pixel while D3D coords
3570  * specify the corner. The offsets are stored in z and w in
3571  * pos_fixup. pos_fixup.y contains 1.0 or -1.0 to turn the rendering
3572  * upside down for offscreen rendering. pos_fixup.x contains 1.0 to allow
3573  * a MAD. */
3574  shader_addline(buffer, "gl_Position.y = gl_Position.y * pos_fixup.y;\n");
3575  shader_addline(buffer, "gl_Position.xy += pos_fixup.zw * gl_Position.ww;\n");
3576 
3577  /* Z coord [0;1]->[-1;1] mapping, see comment in get_projection_matrix()
3578  * in utils.c
3579  *
3580  * Basically we want (in homogeneous coordinates) z = z * 2 - 1. However,
3581  * shaders are run before the homogeneous divide, so we have to take the w
3582  * into account: z = ((z / w) * 2 - 1) * w, which is the same as
3583  * z = z * 2 - w. */
3584  shader_addline(buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
3585 }
3586 
3587 /*****************************************************************************
3588  * Begin processing individual instruction opcodes
3589  ****************************************************************************/
3590 
3591 static void shader_glsl_binop(const struct wined3d_shader_instruction *ins)
3592 {
3593  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3594  struct glsl_src_param src0_param;
3595  struct glsl_src_param src1_param;
3596  DWORD write_mask;
3597  const char *op;
3598 
3599  /* Determine the GLSL operator to use based on the opcode */
3600  switch (ins->handler_idx)
3601  {
3602  case WINED3DSIH_ADD: op = "+"; break;
3603  case WINED3DSIH_AND: op = "&"; break;
3604  case WINED3DSIH_DIV: op = "/"; break;
3605  case WINED3DSIH_IADD: op = "+"; break;
3606  case WINED3DSIH_ISHL: op = "<<"; break;
3607  case WINED3DSIH_ISHR: op = ">>"; break;
3608  case WINED3DSIH_MUL: op = "*"; break;
3609  case WINED3DSIH_OR: op = "|"; break;
3610  case WINED3DSIH_SUB: op = "-"; break;
3611  case WINED3DSIH_USHR: op = ">>"; break;
3612  case WINED3DSIH_XOR: op = "^"; break;
3613  default:
3614  op = "<unhandled operator>";
3615  FIXME("Opcode %s not yet handled in GLSL.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
3616  break;
3617  }
3618 
3619  write_mask = shader_glsl_append_dst(buffer, ins);
3620  shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3621  shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
3622  shader_addline(buffer, "%s %s %s);\n", src0_param.param_str, op, src1_param.param_str);
3623 }
3624 
3625 static void shader_glsl_relop(const struct wined3d_shader_instruction *ins)
3626 {
3627  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3628  struct glsl_src_param src0_param;
3629  struct glsl_src_param src1_param;
3630  unsigned int mask_size;
3631  DWORD write_mask;
3632  const char *op;
3633 
3634  write_mask = shader_glsl_append_dst(buffer, ins);
3635  mask_size = shader_glsl_get_write_mask_size(write_mask);
3636  shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3637  shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
3638 
3639  if (mask_size > 1)
3640  {
3641  switch (ins->handler_idx)
3642  {
3643  case WINED3DSIH_EQ: op = "equal"; break;
3644  case WINED3DSIH_IEQ: op = "equal"; break;
3645  case WINED3DSIH_GE: op = "greaterThanEqual"; break;
3646  case WINED3DSIH_IGE: op = "greaterThanEqual"; break;
3647  case WINED3DSIH_UGE: op = "greaterThanEqual"; break;
3648  case WINED3DSIH_LT: op = "lessThan"; break;
3649  case WINED3DSIH_ILT: op = "lessThan"; break;
3650  case WINED3DSIH_ULT: op = "lessThan"; break;
3651  case WINED3DSIH_NE: op = "notEqual"; break;
3652  case WINED3DSIH_INE: op = "notEqual"; break;
3653  default:
3654  op = "<unhandled operator>";
3655  ERR("Unhandled opcode %#x.\n", ins->handler_idx);
3656  break;
3657  }
3658 
3659  shader_addline(buffer, "uvec%u(%s(%s, %s)) * 0xffffffffu);\n",
3660  mask_size, op, src0_param.param_str, src1_param.param_str);
3661  }
3662  else
3663  {
3664  switch (ins->handler_idx)
3665  {
3666  case WINED3DSIH_EQ: op = "=="; break;
3667  case WINED3DSIH_IEQ: op = "=="; break;
3668  case WINED3DSIH_GE: op = ">="; break;
3669  case WINED3DSIH_IGE: op = ">="; break;
3670  case WINED3DSIH_UGE: op = ">="; break;
3671  case WINED3DSIH_LT: op = "<"; break;
3672  case WINED3DSIH_ILT: op = "<"; break;
3673  case WINED3DSIH_ULT: op = "<"; break;
3674  case WINED3DSIH_NE: op = "!="; break;
3675  case WINED3DSIH_INE: op = "!="; break;
3676  default:
3677  op = "<unhandled operator>";
3678  ERR("Unhandled opcode %#x.\n", ins->handler_idx);
3679  break;
3680  }
3681 
3682  shader_addline(buffer, "%s %s %s ? 0xffffffffu : 0u);\n",
3683  src0_param.param_str, op, src1_param.param_str);
3684  }
3685 }
3686 
3687 static void shader_glsl_unary_op(const struct wined3d_shader_instruction *ins)
3688 {
3689  struct glsl_src_param src_param;
3690  DWORD write_mask;
3691  const char *op;
3692 
3693  switch (ins->handler_idx)
3694  {
3695  case WINED3DSIH_INEG: op = "-"; break;
3696  case WINED3DSIH_NOT: op = "~"; break;
3697  default:
3698  op = "<unhandled operator>";
3699  ERR("Unhandled opcode %s.\n",
3701  break;
3702  }
3703 
3704  write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3705  shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
3706  shader_addline(ins->ctx->buffer, "%s%s);\n", op, src_param.param_str);
3707 }
3708 
3710 {
3711  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3712  struct glsl_src_param src0_param;
3713  struct glsl_src_param src1_param;
3714  DWORD write_mask;
3715 
3716  /* If we have ARB_gpu_shader5, we can use imulExtended() / umulExtended().
3717  * If not, we can emulate it. */
3718  if (ins->dst[0].reg.type != WINED3DSPR_NULL)
3719  FIXME("64-bit integer multiplies not implemented.\n");
3720 
3721  if (ins->dst[1].reg.type != WINED3DSPR_NULL)
3722  {
3723  write_mask = shader_glsl_append_dst_ext(buffer, ins, &ins->dst[1], ins->dst[1].reg.data_type);
3724  shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3725  shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
3726 
3727  shader_addline(ins->ctx->buffer, "%s * %s);\n",
3728  src0_param.param_str, src1_param.param_str);
3729  }
3730 }
3731 
3732 static void shader_glsl_udiv(const struct wined3d_shader_instruction *ins)
3733 {
3734  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3735  struct glsl_src_param src0_param, src1_param;
3736  DWORD write_mask;
3737 
3738  if (ins->dst[0].reg.type != WINED3DSPR_NULL)
3739  {
3740  if (ins->dst[1].reg.type != WINED3DSPR_NULL)
3741  {
3742  char dst_mask[6];
3743 
3744  write_mask = shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
3745  shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3746  shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
3747  shader_addline(buffer, "tmp0%s = uintBitsToFloat(%s / %s);\n",
3748  dst_mask, src0_param.param_str, src1_param.param_str);
3749 
3750  write_mask = shader_glsl_append_dst_ext(buffer, ins, &ins->dst[1], ins->dst[1].reg.data_type);
3751  shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3752  shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
3753  shader_addline(buffer, "%s %% %s);\n", src0_param.param_str, src1_param.param_str);
3754 
3756  shader_addline(buffer, "tmp0%s);\n", dst_mask);
3757  }
3758  else
3759  {
3760  write_mask = shader_glsl_append_dst_ext(buffer, ins, &ins->dst[0], ins->dst[0].reg.data_type);
3761  shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3762  shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
3763  shader_addline(buffer, "%s / %s);\n", src0_param.param_str, src1_param.param_str);
3764  }
3765  }
3766  else if (ins->dst[1].reg.type != WINED3DSPR_NULL)
3767  {
3768  write_mask = shader_glsl_append_dst_ext(buffer, ins, &ins->dst[1], ins->dst[1].reg.data_type);
3769  shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3770  shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
3771  shader_addline(buffer, "%s %% %s);\n", src0_param.param_str, src1_param.param_str);
3772  }
3773 }
3774 
3775 /* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */
3776 static void shader_glsl_mov(const struct wined3d_shader_instruction *ins)
3777 {
3778  const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3779  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3780  struct glsl_src_param src0_param;
3781  DWORD write_mask;
3782 
3783  write_mask = shader_glsl_append_dst(buffer, ins);
3784  shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
3785 
3786  /* In vs_1_1 WINED3DSIO_MOV can write to the address register. In later
3787  * shader versions WINED3DSIO_MOVA is used for this. */
3788  if (ins->ctx->reg_maps->shader_version.major == 1
3789  && ins->ctx->reg_maps->shader_version.type == WINED3D_SHADER_TYPE_VERTEX
3790  && ins->dst[0].reg.type == WINED3DSPR_ADDR)
3791  {
3792  /* This is a simple floor() */
3793  unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
3794  if (mask_size > 1) {
3795  shader_addline(buffer, "ivec%d(floor(%s)));\n", mask_size, src0_param.param_str);
3796  } else {
3797  shader_addline(buffer, "int(floor(%s)));\n", src0_param.param_str);
3798  }
3799  }
3800  else if (ins->handler_idx == WINED3DSIH_MOVA)
3801  {
3802  unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
3803 
3804  if (shader_glsl_get_version(gl_info) >= 130 || gl_info->supported[EXT_GPU_SHADER4])
3805  {
3806  if (mask_size > 1)
3807  shader_addline(buffer, "ivec%d(round(%s)));\n", mask_size, src0_param.param_str);
3808  else
3809  shader_addline(buffer, "int(round(%s)));\n", src0_param.param_str);
3810  }
3811  else
3812  {
3813  if (mask_size > 1)
3814  shader_addline(buffer, "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s)));\n",
3815  mask_size, src0_param.param_str, mask_size, src0_param.param_str);
3816  else
3817  shader_addline(buffer, "int(floor(abs(%s) + 0.5) * sign(%s)));\n",
3818  src0_param.param_str, src0_param.param_str);
3819  }
3820  }
3821  else
3822  {
3823  shader_addline(buffer, "%s);\n", src0_param.param_str);
3824  }
3825 }
3826 
3827 /* Process the dot product operators DP3 and DP4 in GLSL (dst = dot(src0, src1)) */
3828 static void shader_glsl_dot(const struct wined3d_shader_instruction *ins)
3829 {
3830  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3831  struct glsl_src_param src0_param;
3832  struct glsl_src_param src1_param;
3833  DWORD dst_write_mask, src_write_mask;
3834  unsigned int dst_size;
3835 
3836  dst_write_mask = shader_glsl_append_dst(buffer, ins);
3837  dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
3838 
3839  /* dp4 works on vec4, dp3 on vec3, etc. */
3840  if (ins->handler_idx == WINED3DSIH_DP4)
3841  src_write_mask = WINED3DSP_WRITEMASK_ALL;
3842  else if (ins->handler_idx == WINED3DSIH_DP3)
3844  else
3845  src_write_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1;
3846 
3847  shader_glsl_add_src_param(ins, &ins->src[0], src_write_mask, &src0_param);
3848  shader_glsl_add_src_param(ins, &ins->src[1], src_write_mask, &src1_param);
3849 
3850  if (dst_size > 1) {
3851  shader_addline(buffer, "vec%d(dot(%s, %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
3852  } else {
3853  shader_addline(buffer, "dot(%s, %s));\n", src0_param.param_str, src1_param.param_str);
3854  }
3855 }
3856 
3857 /* Note that this instruction has some restrictions. The destination write mask
3858  * can't contain the w component, and the source swizzles have to be .xyzw */
3859 static void shader_glsl_cross(const struct wined3d_shader_instruction *ins)
3860 {
3862  struct glsl_src_param src0_param;
3863  struct glsl_src_param src1_param;
3864  char dst_mask[6];
3865 
3866  shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
3867  shader_glsl_append_dst(ins->ctx->buffer, ins);
3868  shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3869  shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param);
3870  shader_addline(ins->ctx->buffer, "cross(%s, %s)%s);\n", src0_param.param_str, src1_param.param_str, dst_mask);
3871 }
3872 
3873 static void shader_glsl_cut(const struct wined3d_shader_instruction *ins)
3874 {
3875  unsigned int stream = ins->handler_idx == WINED3DSIH_CUT ? 0 : ins->src[0].reg.idx[0].offset;
3876 
3877  if (!stream)
3878  shader_addline(ins->ctx->buffer, "EndPrimitive();\n");
3879  else
3880  FIXME("Unhandled primitive stream %u.\n", stream);
3881 }
3882 
3883 /* Process the WINED3DSIO_POW instruction in GLSL (dst = |src0|^src1)
3884  * Src0 and src1 are scalars. Note that D3D uses the absolute of src0, while
3885  * GLSL uses the value as-is. */
3886 static void shader_glsl_pow(const struct wined3d_shader_instruction *ins)
3887 {
3888  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3889  struct glsl_src_param src0_param;
3890  struct glsl_src_param src1_param;
3891  DWORD dst_write_mask;
3892  unsigned int dst_size;
3893 
3894  dst_write_mask = shader_glsl_append_dst(buffer, ins);
3895  dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
3896 
3897  shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3898  shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
3899 
3900  if (dst_size > 1)
3901  {
3902  shader_addline(buffer, "vec%u(%s == 0.0 ? 1.0 : pow(abs(%s), %s)));\n",
3903  dst_size, src1_param.param_str, src0_param.param_str, src1_param.param_str);
3904  }
3905  else
3906  {
3907  shader_addline(buffer, "%s == 0.0 ? 1.0 : pow(abs(%s), %s));\n",
3908  src1_param.param_str, src0_param.param_str, src1_param.param_str);
3909  }
3910 }
3911 
3912 /* Map the opcode 1-to-1 to the GL code (arg->dst = instruction(src0, src1, ...) */
3913 static void shader_glsl_map2gl(const struct wined3d_shader_instruction *ins)
3914 {
3915  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
3916  struct glsl_src_param src_param;
3917  const char *instruction;
3918  DWORD write_mask;
3919  unsigned i;
3920 
3921  /* Determine the GLSL function to use based on the opcode */
3922  /* TODO: Possibly make this a table for faster lookups */
3923  switch (ins->handler_idx)
3924  {
3925  case WINED3DSIH_ABS: instruction = "abs"; break;
3926  case WINED3DSIH_BFREV: instruction = "bitfieldReverse"; break;
3927  case WINED3DSIH_COUNTBITS: instruction = "bitCount"; break;
3928  case WINED3DSIH_DSX: instruction = "dFdx"; break;
3929  case WINED3DSIH_DSX_COARSE: instruction = "dFdxCoarse"; break;
3930  case WINED3DSIH_DSX_FINE: instruction = "dFdxFine"; break;
3931  case WINED3DSIH_DSY: instruction = "ycorrection.y * dFdy"; break;
3932  case WINED3DSIH_DSY_COARSE: instruction = "ycorrection.y * dFdyCoarse"; break;
3933  case WINED3DSIH_DSY_FINE: instruction = "ycorrection.y * dFdyFine"; break;
3934  case WINED3DSIH_FIRSTBIT_HI: instruction = "findMSB"; break;
3935  case WINED3DSIH_FIRSTBIT_LO: instruction = "findLSB"; break;
3936  case WINED3DSIH_FIRSTBIT_SHI: instruction = "findMSB"; break;
3937  case WINED3DSIH_FRC: instruction = "fract"; break;
3938  case WINED3DSIH_IMAX: instruction = "max"; break;
3939  case WINED3DSIH_IMIN: instruction = "min"; break;
3940  case WINED3DSIH_MAX: instruction = "max"; break;
3941  case WINED3DSIH_MIN: instruction = "min"; break;
3942  case WINED3DSIH_ROUND_NE: instruction = "roundEven"; break;
3943  case WINED3DSIH_ROUND_NI: instruction = "floor"; break;
3944  case WINED3DSIH_ROUND_PI: instruction = "ceil"; break;
3945  case WINED3DSIH_ROUND_Z: instruction = "trunc"; break;
3946  case WINED3DSIH_SQRT: instruction = "sqrt"; break;
3947  case WINED3DSIH_UMAX: instruction = "max"; break;
3948  case WINED3DSIH_UMIN: instruction = "min"; break;
3949  default: instruction = "";
3950  ERR("Opcode %s not yet handled in GLSL.\n", debug_d3dshaderinstructionhandler(ins->handler_idx));
3951  break;
3952  }
3953 
3954  write_mask = shader_glsl_append_dst(buffer, ins);
3955 
3956  /* In D3D bits are numbered from the most significant bit. */
3958  shader_addline(buffer, "31 - ");
3960 
3961  if (ins->src_count)
3962  {
3963  shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
3964  shader_addline(buffer, "%s", src_param.param_str);
3965  for (i = 1; i < ins->src_count; ++i)
3966  {
3967  shader_glsl_add_src_param(ins, &ins->src[i], write_mask, &src_param);
3968  shader_addline(buffer, ", %s", src_param.param_str);
3969  }
3970  }
3971 
3972  shader_addline(buffer, "));\n");
3973 }
3974 
3975 static void shader_glsl_float16(const struct wined3d_shader_instruction *ins)
3976 {
3978  struct glsl_src_param src;
3979  DWORD write_mask;
3980  const char *fmt;
3981  unsigned int i;
3982 
3984  ? "unpackHalf2x16(%s).x);\n" : "packHalf2x16(vec2(%s, 0.0)));\n";
3985 
3986  dst = ins->dst[0];
3987  for (i = 0; i < 4; ++i)
3988  {
3989  dst.write_mask = ins->dst[0].write_mask & (WINED3DSP_WRITEMASK_0 << i);
3990  if (!(write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins,
3991  &dst, dst.reg.data_type)))
3992  continue;
3993 
3994  shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src);
3995  shader_addline(ins->ctx->buffer, fmt, src.param_str);
3996  }
3997 }
3998 
4000 {
4001  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
4003  struct glsl_src_param src[4];
4004  const char *instruction;
4005  BOOL tmp_dst = FALSE;
4006  char mask_char[6];
4007  unsigned int i, j;
4008  DWORD write_mask;
4009 
4010  switch (ins->handler_idx)
4011  {
4012  case WINED3DSIH_BFI: instruction = "bitfieldInsert"; break;
4013  case WINED3DSIH_IBFE: instruction = "bitfieldExtract"; break;
4014  case WINED3DSIH_UBFE: instruction = "bitfieldExtract"; break;
4015  default:
4016  ERR("Unhandled opcode %#x.\n", ins->handler_idx);
4017  return;
4018  }
4019 
4020  for (i = 0; i < ins->src_count; ++i)
4021  {
4022  if (ins->dst[0].reg.idx[0].offset == ins->src[i].reg.idx[0].offset
4023  && ins->dst[0].reg.type == ins->src[i].reg.type)
4024  tmp_dst = TRUE;
4025  }
4026 
4027  dst = ins->dst[0];
4028  for (i = 0; i < 4; ++i)
4029  {
4030  dst.write_mask = ins->dst[0].write_mask & (WINED3DSP_WRITEMASK_0 << i);
4031  if (tmp_dst && (write_mask = shader_glsl_get_write_mask(&dst, mask_char)))
4032  shader_addline(buffer, "tmp0%s = %sBitsToFloat(", mask_char,
4033  dst.reg.data_type == WINED3D_DATA_INT ? "int" : "uint");
4034  else if (!(write_mask = shader_glsl_append_dst_ext(buffer, ins, &dst, dst.reg.data_type)))
4035  continue;
4036 
4037  for (j = 0; j < ins->src_count; ++j)
4038  shader_glsl_add_src_param(ins, &ins->src[j], write_mask, &src[j]);
4040  for (j = 0; j < ins->src_count - 2; ++j)
4041  shader_addline(buffer, "%s, ", src[ins->src_count - j - 1].param_str);
4042  shader_addline(buffer, "%s & 0x1f, %s & 0x1f));\n", src[1].param_str, src[0].param_str);
4043  }
4044 
4045  if (tmp_dst)
4046  {
4048  shader_glsl_get_write_mask(&ins->dst[0], mask_char);
4049  shader_addline(buffer, "tmp0%s);\n", mask_char);
4050  }
4051 }
4052 
4053 static void shader_glsl_nop(const struct wined3d_shader_instruction *ins) {}
4054 
4055 static void shader_glsl_nrm(const struct wined3d_shader_instruction *ins)
4056 {
4057  struct wined3d_string_buffer *buffer = ins->ctx->buffer;
4058  struct glsl_src_param src_param;
4059  unsigned int mask_size;
4060  DWORD write_mask;
4061  char dst_mask[6];
4062 
4063  write_mask = shader_glsl_get_write_mask(ins->dst, dst_mask);
4064  mask_size = shader_glsl_get_write_mask_size(write_mask);
4065  shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
4066 
4067  shader_addline(buffer, "tmp0.x = dot(%s, %s);\n",
4068  src_param.param_str, src_param.param_str);
4070 
4071  if (mask_size > 1)
4072  {
4073