VirtualBox

source: vbox/trunk/src/VBox/Devices/Graphics/shaderlib/glsl_shader.c@ 68994

Last change on this file since 68994 was 68994, checked in by vboxsync, 8 years ago

Fix switch statement fall-through warnings with gcc 7.2.
bugref:8192: gcc warnings

gcc 7.1 and later add a switch statement fall-through warning level, which
-Wall sets to level 3. At this level, fall-throughs have to have at least
a comment following particular requirements (see gcc manual). This change
fixes a few places in the code to meet these requirements. Currently this
warning prevents building with kObjCache enabled, as the compiler checks
the comments in the source, which are stripped out by the object cache.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 218.1 KB
Line 
1/*
2 * GLSL pixel and vertex shader implementation
3 *
4 * Copyright 2006 Jason Green
5 * Copyright 2006-2007 Henri Verbeet
6 * Copyright 2007-2008 Stefan Dösinger for CodeWeavers
7 * Copyright 2009 Henri Verbeet for CodeWeavers
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
22 */
23
24/*
25 * Oracle LGPL Disclaimer: For the avoidance of doubt, except that if any license choice
26 * other than GPL or LGPL is available it will apply instead, Oracle elects to use only
27 * the Lesser General Public License version 2.1 (LGPLv2) at this time for any software where
28 * a choice of LGPL license versions is made available with the language indicating
29 * that LGPLv2 or any later version may be used, or where a choice of which version
30 * of the LGPL is applied is otherwise unspecified.
31 */
32
33/*
34 * D3D shader asm has swizzles on source parameters, and write masks for
35 * destination parameters. GLSL uses swizzles for both. The result of this is
36 * that for example "mov dst.xw, src.zyxw" becomes "dst.xw = src.zw" in GLSL.
37 * Ie, to generate a proper GLSL source swizzle, we need to take the D3D write
38 * mask for the destination parameter into account.
39 */
40
41#include "config.h"
42#include "wine/port.h"
43#include <limits.h>
44#include <stdio.h>
45#include "wined3d_private.h"
46
47WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader);
48WINE_DECLARE_DEBUG_CHANNEL(d3d_constants);
49WINE_DECLARE_DEBUG_CHANNEL(d3d_caps);
50WINE_DECLARE_DEBUG_CHANNEL(d3d);
51
52#ifdef VBOX_WITH_VMSVGA
53#define LOG_GROUP LOG_GROUP_DEV_VMSVGA
54#include <VBox/log.h>
55#undef WDLOG
56#define WDLOG(_m) Log(_m)
57#undef CONST
58#define CONST const
59#endif
60
61#define GLINFO_LOCATION (*gl_info)
62
63#define WINED3D_GLSL_SAMPLE_PROJECTED 0x1
64#define WINED3D_GLSL_SAMPLE_RECT 0x2
65#define WINED3D_GLSL_SAMPLE_LOD 0x4
66#define WINED3D_GLSL_SAMPLE_GRAD 0x8
67
68typedef struct {
69 char reg_name[150];
70 char mask_str[6];
71} glsl_dst_param_t;
72
73typedef struct {
74 char reg_name[150];
75 char param_str[200];
76} glsl_src_param_t;
77
78typedef struct {
79 const char *name;
80 DWORD coord_mask;
81} glsl_sample_function_t;
82
83enum heap_node_op
84{
85 HEAP_NODE_TRAVERSE_LEFT,
86 HEAP_NODE_TRAVERSE_RIGHT,
87 HEAP_NODE_POP,
88};
89
90struct constant_entry
91{
92 unsigned int idx;
93 unsigned int version;
94};
95
96struct constant_heap
97{
98 struct constant_entry *entries;
99 unsigned int *positions;
100 unsigned int size;
101};
102
103/* GLSL shader private data */
104struct shader_glsl_priv {
105 struct wined3d_shader_buffer shader_buffer;
106 struct wine_rb_tree program_lookup;
107 struct glsl_shader_prog_link *glsl_program;
108 struct constant_heap vconst_heap;
109 struct constant_heap pconst_heap;
110 unsigned char *stack;
111 GLhandleARB depth_blt_program[tex_type_count];
112 UINT next_constant_version;
113};
114
115/* Struct to maintain data about a linked GLSL program */
116struct glsl_shader_prog_link {
117 struct wine_rb_entry program_lookup_entry;
118 struct list vshader_entry;
119 struct list pshader_entry;
120 GLhandleARB programId;
121 GLint *vuniformF_locations;
122 GLint *puniformF_locations;
123 GLint vuniformI_locations[MAX_CONST_I];
124 GLint puniformI_locations[MAX_CONST_I];
125 GLint posFixup_location;
126 GLint np2Fixup_location;
127 GLint bumpenvmat_location[MAX_TEXTURES];
128 GLint luminancescale_location[MAX_TEXTURES];
129 GLint luminanceoffset_location[MAX_TEXTURES];
130 GLint ycorrection_location;
131 GLenum vertex_color_clamp;
132 IWineD3DVertexShader *vshader;
133 IWineD3DPixelShader *pshader;
134 struct vs_compile_args vs_args;
135 struct ps_compile_args ps_args;
136 UINT constant_version;
137 const struct wined3d_context *context;
138 UINT inp2Fixup_info;
139};
140
141#ifdef VBOX_WITH_VMSVGA
142# define WINEFIXUPINFO_NOINDEX (~0U)
143#else
144#define WINEFIXUPINFO_NOINDEX (~0UL)
145#endif
146#define WINEFIXUPINFO_GET(_p) get_fixup_info((const IWineD3DPixelShaderImpl*)(_p)->pshader, (_p)->inp2Fixup_info)
147#define WINEFIXUPINFO_ISVALID(_p) ((_p)->inp2Fixup_info != WINEFIXUPINFO_NOINDEX)
148#ifdef VBOX_WITH_VMSVGA
149# define WINEFIXUPINFO_INIT(_p) do { (_p)->inp2Fixup_info = WINEFIXUPINFO_NOINDEX; } while (0)
150#else
151#define WINEFIXUPINFO_INIT(_p) ((_p)->inp2Fixup_info == WINEFIXUPINFO_NOINDEX)
152#endif
153
154typedef struct {
155 IWineD3DVertexShader *vshader;
156 IWineD3DPixelShader *pshader;
157 struct ps_compile_args ps_args;
158 struct vs_compile_args vs_args;
159 const struct wined3d_context *context;
160} glsl_program_key_t;
161
162struct shader_glsl_ctx_priv {
163 const struct vs_compile_args *cur_vs_args;
164 const struct ps_compile_args *cur_ps_args;
165 struct ps_np2fixup_info *cur_np2fixup_info;
166};
167
168struct glsl_ps_compiled_shader
169{
170 struct ps_compile_args args;
171 struct ps_np2fixup_info np2fixup;
172 GLhandleARB prgId;
173 const struct wined3d_context *context;
174};
175
176struct glsl_pshader_private
177{
178 struct glsl_ps_compiled_shader *gl_shaders;
179 UINT num_gl_shaders, shader_array_size;
180};
181
182struct glsl_vs_compiled_shader
183{
184 struct vs_compile_args args;
185 GLhandleARB prgId;
186 const struct wined3d_context *context;
187};
188
189struct glsl_vshader_private
190{
191 struct glsl_vs_compiled_shader *gl_shaders;
192 UINT num_gl_shaders, shader_array_size;
193};
194
195static const char *debug_gl_shader_type(GLenum type)
196{
197 switch (type)
198 {
199#define WINED3D_TO_STR(u) case u: return #u
200 WINED3D_TO_STR(GL_VERTEX_SHADER_ARB);
201 WINED3D_TO_STR(GL_GEOMETRY_SHADER_ARB);
202 WINED3D_TO_STR(GL_FRAGMENT_SHADER_ARB);
203#undef WINED3D_TO_STR
204 default:
205 return wine_dbg_sprintf("UNKNOWN(%#x)", type);
206 }
207}
208
209/* Extract a line from the info log.
210 * Note that this modifies the source string. */
211static char *get_info_log_line(char **ptr, int *pcbStr)
212{
213 char *p, *q;
214 const int cbStr = *pcbStr;
215
216 if (!cbStr)
217 {
218 /* zero-length string */
219 return NULL;
220 }
221
222 if ((*ptr)[cbStr-1] != '\0')
223 {
224 ERR("string should be null-rerminated, forcing it!");
225 (*ptr)[cbStr-1] = '\0';
226 }
227 p = *ptr;
228 if (!*p)
229 {
230 *pcbStr = 0;
231 return NULL;
232 }
233
234 if (!(q = strstr(p, "\n")))
235 {
236 /* the string contains a single line! */
237 *ptr += strlen(p);
238 *pcbStr = 0;
239 return p;
240 }
241
242 *q = '\0';
243 *pcbStr = cbStr - (((uintptr_t)q) - ((uintptr_t)p)) - 1;
244 Assert((*pcbStr) >= 0);
245 Assert((*pcbStr) < cbStr);
246 *ptr = q + 1;
247
248 return p;
249}
250
251/** Prints the GLSL info log which will contain error messages if they exist */
252/* GL locking is done by the caller */
253static void print_glsl_info_log(const struct wined3d_gl_info *gl_info, GLhandleARB obj)
254{
255 int infologLength = 0;
256 char *infoLog;
257 unsigned int i;
258 BOOL is_spam;
259
260 static const char * const spam[] =
261 {
262 "Vertex shader was successfully compiled to run on hardware.\n", /* fglrx */
263 "Fragment shader was successfully compiled to run on hardware.\n", /* fglrx, with \n */
264 "Fragment shader was successfully compiled to run on hardware.", /* fglrx, no \n */
265 "Fragment shader(s) linked, vertex shader(s) linked. \n ", /* fglrx, with \n */
266 "Fragment shader(s) linked, vertex shader(s) linked.", /* fglrx, no \n */
267 "Vertex shader(s) linked, no fragment shader(s) defined. \n ", /* fglrx, with \n */
268 "Vertex shader(s) linked, no fragment shader(s) defined.", /* fglrx, no \n */
269 "Fragment shader(s) linked, no vertex shader(s) defined. \n ", /* fglrx, with \n */
270 "Fragment shader(s) linked, no vertex shader(s) defined.", /* fglrx, no \n */
271 };
272
273#ifndef VBOXWINEDBG_SHADERS
274 if (!TRACE_ON(d3d_shader) && !FIXME_ON(d3d_shader)) return;
275#endif
276
277 GL_EXTCALL(glGetObjectParameterivARB(obj,
278 GL_OBJECT_INFO_LOG_LENGTH_ARB,
279 &infologLength));
280
281 /* A size of 1 is just a null-terminated string, so the log should be bigger than
282 * that if there are errors. */
283 if (infologLength > 1)
284 {
285 char *ptr, *line;
286 int cbPtr;
287
288 /* Fglrx doesn't terminate the string properly, but it tells us the proper length.
289 * So use HEAP_ZERO_MEMORY to avoid uninitialized bytes
290 */
291 infoLog = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, infologLength);
292 GL_EXTCALL(glGetInfoLogARB(obj, infologLength, NULL, infoLog));
293 is_spam = FALSE;
294
295 for(i = 0; i < sizeof(spam) / sizeof(spam[0]); i++) {
296 if(strcmp(infoLog, spam[i]) == 0) {
297 is_spam = TRUE;
298 break;
299 }
300 }
301
302 ptr = infoLog;
303 cbPtr = infologLength;
304 if (is_spam)
305 {
306 WDLOG(("Spam received from GLSL shader #%u:\n", obj));
307 while ((line = get_info_log_line(&ptr, &cbPtr))) WDLOG((" %s\n", line));
308 }
309 else
310 {
311 WDLOG(("Error received from GLSL shader #%u:\n", obj));
312 while ((line = get_info_log_line(&ptr, &cbPtr))) WDLOG((" %s\n", line));
313 }
314 HeapFree(GetProcessHeap(), 0, infoLog);
315 }
316}
317
318static void shader_glsl_dump_shader_source(const struct wined3d_gl_info *gl_info, GLhandleARB shader)
319{
320 char *ptr;
321 GLint tmp, source_size;
322 char *source = NULL;
323 int cbPtr;
324
325 GL_EXTCALL(glGetObjectParameterivARB(shader, GL_OBJECT_SHADER_SOURCE_LENGTH_ARB, &tmp));
326
327 source = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, tmp);
328 if (!source)
329 {
330 ERR("Failed to allocate %d bytes for shader source.\n", tmp);
331 return;
332 }
333
334 source_size = tmp;
335
336 WDLOG(("Object %u:\n", shader));
337 GL_EXTCALL(glGetObjectParameterivARB(shader, GL_OBJECT_SUBTYPE_ARB, &tmp));
338 WDLOG((" GL_OBJECT_SUBTYPE_ARB: %s.\n", debug_gl_shader_type(tmp)));
339 GL_EXTCALL(glGetObjectParameterivARB(shader, GL_OBJECT_COMPILE_STATUS_ARB, &tmp));
340 WDLOG((" GL_OBJECT_COMPILE_STATUS_ARB: %d.\n", tmp));
341 WDLOG(("\n"));
342
343 ptr = source;
344 cbPtr = source_size;
345 GL_EXTCALL(glGetShaderSourceARB(shader, source_size, NULL, source));
346#if 0
347 while ((line = get_info_log_line(&ptr, &cbPtr))) WDLOG((" %s\n", line));
348#else
349 WDLOG(("*****shader source***\n"));
350 WDLOG((" %s\n", source));
351 WDLOG(("\n*****END shader source***\n\n"));
352#endif
353 WDLOG(("\n"));
354}
355
356/* GL locking is done by the caller. */
357static void shader_glsl_dump_program_source(const struct wined3d_gl_info *gl_info, GLhandleARB program)
358{
359 GLint i, object_count;
360 GLhandleARB *objects;
361 char *source = NULL;
362
363 WDLOG(("\n***************************dumping program %d******************************\n", program));
364
365 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_ATTACHED_OBJECTS_ARB, &object_count));
366 objects = HeapAlloc(GetProcessHeap(), 0, object_count * sizeof(*objects));
367 if (!objects)
368 {
369 ERR("Failed to allocate object array memory.\n");
370 return;
371 }
372
373 GL_EXTCALL(glGetAttachedObjectsARB(program, object_count, NULL, objects));
374 for (i = 0; i < object_count; ++i)
375 {
376 shader_glsl_dump_shader_source(gl_info, objects[i]);
377 }
378
379 HeapFree(GetProcessHeap(), 0, source);
380 HeapFree(GetProcessHeap(), 0, objects);
381
382 WDLOG(("\n***************************END dumping program %d******************************\n\n", program));
383}
384
385/* GL locking is done by the caller. */
386static void shader_glsl_validate_compile_link(const struct wined3d_gl_info *gl_info, GLhandleARB program, GLboolean fIsProgram)
387{
388 GLint tmp = -1;
389
390#ifndef VBOXWINEDBG_SHADERS
391 if (!TRACE_ON(d3d_shader) && !FIXME_ON(d3d_shader)) return;
392#endif
393
394 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_TYPE_ARB, &tmp));
395 if (tmp == GL_PROGRAM_OBJECT_ARB)
396 {
397 if (!fIsProgram)
398 {
399 ERR("this is a program, but shader expected");
400 }
401 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_LINK_STATUS_ARB, &tmp));
402 if (!tmp)
403 {
404 ERR("Program %p link status invalid.\n", (void *)(uintptr_t)program);
405#ifndef VBOXWINEDBG_SHADERS
406 shader_glsl_dump_program_source(gl_info, program);
407#endif
408 }
409#if defined(VBOX_WITH_VMSVGA) && defined(DEBUG)
410 shader_glsl_dump_program_source(gl_info, program);
411#endif
412 }
413 else if (tmp == GL_SHADER_OBJECT_ARB)
414 {
415 if (fIsProgram)
416 {
417 ERR("this is a shader, but program expected");
418 }
419
420 GL_EXTCALL(glGetObjectParameterivARB(program, GL_OBJECT_COMPILE_STATUS_ARB, &tmp));
421 if (!tmp)
422 {
423 ERR("Shader %p compile status invalid.\n", (void *)(uintptr_t)program);
424 shader_glsl_dump_shader_source(gl_info, program);
425 }
426 }
427 else
428 {
429 ERR("unexpected oject type(%d)!", tmp);
430 }
431
432 print_glsl_info_log(gl_info, program);
433}
434
435/**
436 * Loads (pixel shader) samplers
437 */
438/* GL locking is done by the caller */
439static void shader_glsl_load_psamplers(const struct wined3d_gl_info *gl_info,
440 DWORD *tex_unit_map, GLhandleARB programId)
441{
442 GLint name_loc;
443 int i;
444 char sampler_name[20];
445
446 for (i = 0; i < MAX_FRAGMENT_SAMPLERS; ++i) {
447 snprintf(sampler_name, sizeof(sampler_name), "Psampler%d", i);
448 name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
449 if (name_loc != -1) {
450 DWORD mapped_unit = tex_unit_map[i];
451 if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < gl_info->limits.fragment_samplers)
452 {
453 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
454 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
455 checkGLcall("glUniform1iARB");
456 } else {
457 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
458 }
459 }
460 }
461}
462
463/* GL locking is done by the caller */
464static void shader_glsl_load_vsamplers(const struct wined3d_gl_info *gl_info,
465 DWORD *tex_unit_map, GLhandleARB programId)
466{
467 GLint name_loc;
468 char sampler_name[20];
469 int i;
470
471 for (i = 0; i < MAX_VERTEX_SAMPLERS; ++i) {
472 snprintf(sampler_name, sizeof(sampler_name), "Vsampler%d", i);
473 name_loc = GL_EXTCALL(glGetUniformLocationARB(programId, sampler_name));
474 if (name_loc != -1) {
475 DWORD mapped_unit = tex_unit_map[MAX_FRAGMENT_SAMPLERS + i];
476 if (mapped_unit != WINED3D_UNMAPPED_STAGE && mapped_unit < gl_info->limits.combined_samplers)
477 {
478 TRACE("Loading %s for texture %d\n", sampler_name, mapped_unit);
479 GL_EXTCALL(glUniform1iARB(name_loc, mapped_unit));
480 checkGLcall("glUniform1iARB");
481 } else {
482 ERR("Trying to load sampler %s on unsupported unit %d\n", sampler_name, mapped_unit);
483 }
484 }
485 }
486}
487
488/* GL locking is done by the caller */
489static inline void walk_constant_heap(const struct wined3d_gl_info *gl_info, const float *constants,
490 const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
491{
492 int stack_idx = 0;
493 unsigned int heap_idx = 1;
494 unsigned int idx;
495
496 if (heap->entries[heap_idx].version <= version) return;
497
498 idx = heap->entries[heap_idx].idx;
499 if (constant_locations[idx] != -1) GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
500 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
501
502 while (stack_idx >= 0)
503 {
504 /* Note that we fall through to the next case statement. */
505 switch(stack[stack_idx])
506 {
507 case HEAP_NODE_TRAVERSE_LEFT:
508 {
509 unsigned int left_idx = heap_idx << 1;
510 if (left_idx < heap->size && heap->entries[left_idx].version > version)
511 {
512 heap_idx = left_idx;
513 idx = heap->entries[heap_idx].idx;
514 if (constant_locations[idx] != -1)
515 GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
516
517 stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
518 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
519 break;
520 }
521 } /* Fall through */
522
523 case HEAP_NODE_TRAVERSE_RIGHT:
524 {
525 unsigned int right_idx = (heap_idx << 1) + 1;
526 if (right_idx < heap->size && heap->entries[right_idx].version > version)
527 {
528 heap_idx = right_idx;
529 idx = heap->entries[heap_idx].idx;
530 if (constant_locations[idx] != -1)
531 GL_EXTCALL(glUniform4fvARB(constant_locations[idx], 1, &constants[idx * 4]));
532
533 stack[stack_idx++] = HEAP_NODE_POP;
534 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
535 break;
536 }
537 } /* Fall through */
538
539 case HEAP_NODE_POP:
540 {
541 heap_idx >>= 1;
542 --stack_idx;
543 break;
544 }
545 }
546 }
547 checkGLcall("walk_constant_heap()");
548}
549
550/* GL locking is done by the caller */
551static inline void apply_clamped_constant(const struct wined3d_gl_info *gl_info, GLint location, const GLfloat *data)
552{
553 GLfloat clamped_constant[4];
554
555 if (location == -1) return;
556
557 clamped_constant[0] = data[0] < -1.0f ? -1.0f : data[0] > 1.0f ? 1.0f : data[0];
558 clamped_constant[1] = data[1] < -1.0f ? -1.0f : data[1] > 1.0f ? 1.0f : data[1];
559 clamped_constant[2] = data[2] < -1.0f ? -1.0f : data[2] > 1.0f ? 1.0f : data[2];
560 clamped_constant[3] = data[3] < -1.0f ? -1.0f : data[3] > 1.0f ? 1.0f : data[3];
561
562 GL_EXTCALL(glUniform4fvARB(location, 1, clamped_constant));
563}
564
565/* GL locking is done by the caller */
566static inline void walk_constant_heap_clamped(const struct wined3d_gl_info *gl_info, const float *constants,
567 const GLint *constant_locations, const struct constant_heap *heap, unsigned char *stack, DWORD version)
568{
569 int stack_idx = 0;
570 unsigned int heap_idx = 1;
571 unsigned int idx;
572
573 if (heap->entries[heap_idx].version <= version) return;
574
575 idx = heap->entries[heap_idx].idx;
576 apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
577 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
578
579 while (stack_idx >= 0)
580 {
581 /* Note that we fall through to the next case statement. */
582 switch(stack[stack_idx])
583 {
584 case HEAP_NODE_TRAVERSE_LEFT:
585 {
586 unsigned int left_idx = heap_idx << 1;
587 if (left_idx < heap->size && heap->entries[left_idx].version > version)
588 {
589 heap_idx = left_idx;
590 idx = heap->entries[heap_idx].idx;
591 apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
592
593 stack[stack_idx++] = HEAP_NODE_TRAVERSE_RIGHT;
594 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
595 break;
596 }
597 } /* Fall through */
598
599 case HEAP_NODE_TRAVERSE_RIGHT:
600 {
601 unsigned int right_idx = (heap_idx << 1) + 1;
602 if (right_idx < heap->size && heap->entries[right_idx].version > version)
603 {
604 heap_idx = right_idx;
605 idx = heap->entries[heap_idx].idx;
606 apply_clamped_constant(gl_info, constant_locations[idx], &constants[idx * 4]);
607
608 stack[stack_idx++] = HEAP_NODE_POP;
609 stack[stack_idx] = HEAP_NODE_TRAVERSE_LEFT;
610 break;
611 }
612 } /* Fall through */
613
614 case HEAP_NODE_POP:
615 {
616 heap_idx >>= 1;
617 --stack_idx;
618 break;
619 }
620 }
621 }
622 checkGLcall("walk_constant_heap_clamped()");
623}
624
625/* Loads floating point constants (aka uniforms) into the currently set GLSL program. */
626/* GL locking is done by the caller */
627static void shader_glsl_load_constantsF(IWineD3DBaseShaderImpl *This, const struct wined3d_gl_info *gl_info,
628 const float *constants, const GLint *constant_locations, const struct constant_heap *heap,
629 unsigned char *stack, UINT version)
630{
631 const local_constant *lconst;
632
633 /* 1.X pshaders have the constants clamped to [-1;1] implicitly. */
634 if (This->baseShader.reg_maps.shader_version.major == 1
635 && shader_is_pshader_version(This->baseShader.reg_maps.shader_version.type))
636 walk_constant_heap_clamped(gl_info, constants, constant_locations, heap, stack, version);
637 else
638 walk_constant_heap(gl_info, constants, constant_locations, heap, stack, version);
639
640 if (!This->baseShader.load_local_constsF)
641 {
642 TRACE("No need to load local float constants for this shader\n");
643 return;
644 }
645
646 /* Immediate constants are clamped to [-1;1] at shader creation time if needed */
647 LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry)
648 {
649 GLint location = constant_locations[lconst->idx];
650 /* We found this uniform name in the program - go ahead and send the data */
651 if (location != -1) GL_EXTCALL(glUniform4fvARB(location, 1, (const GLfloat *)lconst->value));
652 }
653 checkGLcall("glUniform4fvARB()");
654}
655
656/* Loads integer constants (aka uniforms) into the currently set GLSL program. */
657/* GL locking is done by the caller */
658static void shader_glsl_load_constantsI(IWineD3DBaseShaderImpl *This, const struct wined3d_gl_info *gl_info,
659 const GLint locations[MAX_CONST_I], const int *constants, WORD constants_set)
660{
661 unsigned int i;
662 struct list* ptr;
663
664 for (i = 0; constants_set; constants_set >>= 1, ++i)
665 {
666 if (!(constants_set & 1)) continue;
667
668 TRACE_(d3d_constants)("Loading constants %u: %i, %i, %i, %i\n",
669 i, constants[i*4], constants[i*4+1], constants[i*4+2], constants[i*4+3]);
670
671 /* We found this uniform name in the program - go ahead and send the data */
672 GL_EXTCALL(glUniform4ivARB(locations[i], 1, &constants[i*4]));
673 checkGLcall("glUniform4ivARB");
674 }
675
676 /* Load immediate constants */
677 ptr = list_head(&This->baseShader.constantsI);
678 while (ptr) {
679 const struct local_constant *lconst = LIST_ENTRY(ptr, const struct local_constant, entry);
680 unsigned int idx = lconst->idx;
681 const GLint *values = (const GLint *)lconst->value;
682
683 TRACE_(d3d_constants)("Loading local constants %i: %i, %i, %i, %i\n", idx,
684 values[0], values[1], values[2], values[3]);
685
686 /* We found this uniform name in the program - go ahead and send the data */
687 GL_EXTCALL(glUniform4ivARB(locations[idx], 1, values));
688 checkGLcall("glUniform4ivARB");
689 ptr = list_next(&This->baseShader.constantsI, ptr);
690 }
691}
692
693/* Loads boolean constants (aka uniforms) into the currently set GLSL program. */
694/* GL locking is done by the caller */
695static void shader_glsl_load_constantsB(IWineD3DBaseShaderImpl *This, const struct wined3d_gl_info *gl_info,
696 GLhandleARB programId, const BOOL *constants, WORD constants_set)
697{
698 GLint tmp_loc;
699 unsigned int i;
700 char tmp_name[8];
701 const char *prefix;
702 struct list* ptr;
703
704 switch (This->baseShader.reg_maps.shader_version.type)
705 {
706 case WINED3D_SHADER_TYPE_VERTEX:
707 prefix = "VB";
708 break;
709
710 case WINED3D_SHADER_TYPE_GEOMETRY:
711 prefix = "GB";
712 break;
713
714 case WINED3D_SHADER_TYPE_PIXEL:
715 prefix = "PB";
716 break;
717
718 default:
719 FIXME("Unknown shader type %#x.\n",
720 This->baseShader.reg_maps.shader_version.type);
721 prefix = "UB";
722 break;
723 }
724
725 /* TODO: Benchmark and see if it would be beneficial to store the
726 * locations of the constants to avoid looking up each time */
727 for (i = 0; constants_set; constants_set >>= 1, ++i)
728 {
729 if (!(constants_set & 1)) continue;
730
731 TRACE_(d3d_constants)("Loading constants %i: %i;\n", i, constants[i]);
732
733 /* TODO: Benchmark and see if it would be beneficial to store the
734 * locations of the constants to avoid looking up each time */
735 snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, i);
736 tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
737 if (tmp_loc != -1)
738 {
739 /* We found this uniform name in the program - go ahead and send the data */
740 GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, &constants[i]));
741 checkGLcall("glUniform1ivARB");
742 }
743 }
744
745 /* Load immediate constants */
746 ptr = list_head(&This->baseShader.constantsB);
747 while (ptr) {
748 const struct local_constant *lconst = LIST_ENTRY(ptr, const struct local_constant, entry);
749 unsigned int idx = lconst->idx;
750 const GLint *values = (const GLint *)lconst->value;
751
752 TRACE_(d3d_constants)("Loading local constants %i: %i\n", idx, values[0]);
753
754 snprintf(tmp_name, sizeof(tmp_name), "%s[%i]", prefix, idx);
755 tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, tmp_name));
756 if (tmp_loc != -1) {
757 /* We found this uniform name in the program - go ahead and send the data */
758 GL_EXTCALL(glUniform1ivARB(tmp_loc, 1, values));
759 checkGLcall("glUniform1ivARB");
760 }
761 ptr = list_next(&This->baseShader.constantsB, ptr);
762 }
763}
764
765static void reset_program_constant_version(struct wine_rb_entry *entry, void *context)
766{
767 WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry)->constant_version = 0;
768}
769
770static const struct ps_np2fixup_info * get_fixup_info(const IWineD3DPixelShaderImpl *shader, UINT inp2fixup_info)
771{
772 struct glsl_pshader_private *shader_data = shader->baseShader.backend_data;
773
774 if (inp2fixup_info == WINEFIXUPINFO_NOINDEX)
775 return NULL;
776
777 if (!shader->baseShader.backend_data)
778 {
779 ERR("no backend data\n");
780 return NULL;
781 }
782 shader_data = shader->baseShader.backend_data;
783
784 if (inp2fixup_info >= shader_data->num_gl_shaders)
785 {
786 ERR("invalid index\n");
787 return NULL;
788 }
789
790 return &shader_data->gl_shaders[inp2fixup_info].np2fixup;
791}
792
793/**
794 * Loads the texture dimensions for NP2 fixup into the currently set GLSL program.
795 */
796/* GL locking is done by the caller (state handler) */
797static void shader_glsl_load_np2fixup_constants(
798 IWineD3DDevice* device,
799 char usePixelShader,
800 char useVertexShader) {
801
802 const IWineD3DDeviceImpl* deviceImpl = (const IWineD3DDeviceImpl*) device;
803 const struct glsl_shader_prog_link* prog = ((struct shader_glsl_priv *)(deviceImpl->shader_priv))->glsl_program;
804
805 if (!prog) {
806 /* No GLSL program set - nothing to do. */
807 return;
808 }
809
810 if (!usePixelShader) {
811 /* NP2 texcoord fixup is (currently) only done for pixelshaders. */
812 return;
813 }
814
815 if (prog->ps_args.np2_fixup && -1 != prog->np2Fixup_location) {
816 const struct wined3d_gl_info *gl_info = &deviceImpl->adapter->gl_info;
817 const IWineD3DStateBlockImpl* stateBlock = (const IWineD3DStateBlockImpl*) deviceImpl->stateBlock;
818 UINT i;
819 UINT fixup = prog->ps_args.np2_fixup;
820 GLfloat np2fixup_constants[4 * MAX_FRAGMENT_SAMPLERS];
821
822 const struct ps_np2fixup_info *np2Fixup_info = WINEFIXUPINFO_GET(prog);
823
824 for (i = 0; fixup; fixup >>= 1, ++i) {
825 const unsigned char idx = np2Fixup_info->idx[i];
826 const IWineD3DBaseTextureImpl* const tex = (const IWineD3DBaseTextureImpl*) stateBlock->textures[i];
827 GLfloat* tex_dim = &np2fixup_constants[(idx >> 1) * 4];
828
829 if (!tex) {
830 FIXME("Nonexistent texture is flagged for NP2 texcoord fixup\n");
831 continue;
832 }
833
834 if (idx % 2) {
835 tex_dim[2] = tex->baseTexture.pow2Matrix[0]; tex_dim[3] = tex->baseTexture.pow2Matrix[5];
836 } else {
837 tex_dim[0] = tex->baseTexture.pow2Matrix[0]; tex_dim[1] = tex->baseTexture.pow2Matrix[5];
838 }
839 }
840
841 GL_EXTCALL(glUniform4fvARB(prog->np2Fixup_location, np2Fixup_info->num_consts, np2fixup_constants));
842 }
843}
844
845/**
846 * Loads the app-supplied constants into the currently set GLSL program.
847 */
848/* GL locking is done by the caller (state handler) */
849static void shader_glsl_load_constants(const struct wined3d_context *context,
850 char usePixelShader, char useVertexShader)
851{
852 const struct wined3d_gl_info *gl_info = context->gl_info;
853 IWineD3DDeviceImpl *device = context_get_device(context);
854 IWineD3DStateBlockImpl* stateBlock = device->stateBlock;
855 struct shader_glsl_priv *priv = device->shader_priv;
856
857 GLhandleARB programId;
858 struct glsl_shader_prog_link *prog = priv->glsl_program;
859 UINT constant_version;
860 int i;
861
862 if (!prog) {
863 /* No GLSL program set - nothing to do. */
864 return;
865 }
866 programId = prog->programId;
867 constant_version = prog->constant_version;
868
869 if (useVertexShader) {
870 IWineD3DBaseShaderImpl* vshader = (IWineD3DBaseShaderImpl*) stateBlock->vertexShader;
871
872 /* Load DirectX 9 float constants/uniforms for vertex shader */
873 shader_glsl_load_constantsF(vshader, gl_info, stateBlock->vertexShaderConstantF,
874 prog->vuniformF_locations, &priv->vconst_heap, priv->stack, constant_version);
875
876 /* Load DirectX 9 integer constants/uniforms for vertex shader */
877 shader_glsl_load_constantsI(vshader, gl_info, prog->vuniformI_locations, stateBlock->vertexShaderConstantI,
878 stateBlock->changed.vertexShaderConstantsI & vshader->baseShader.reg_maps.integer_constants);
879
880 /* Load DirectX 9 boolean constants/uniforms for vertex shader */
881 shader_glsl_load_constantsB(vshader, gl_info, programId, stateBlock->vertexShaderConstantB,
882 stateBlock->changed.vertexShaderConstantsB & vshader->baseShader.reg_maps.boolean_constants);
883
884 /* Upload the position fixup params */
885 GL_EXTCALL(glUniform4fvARB(prog->posFixup_location, 1, &device->posFixup[0]));
886 checkGLcall("glUniform4fvARB");
887 }
888
889 if (usePixelShader) {
890
891 IWineD3DBaseShaderImpl* pshader = (IWineD3DBaseShaderImpl*) stateBlock->pixelShader;
892
893 /* Load DirectX 9 float constants/uniforms for pixel shader */
894 shader_glsl_load_constantsF(pshader, gl_info, stateBlock->pixelShaderConstantF,
895 prog->puniformF_locations, &priv->pconst_heap, priv->stack, constant_version);
896
897 /* Load DirectX 9 integer constants/uniforms for pixel shader */
898 shader_glsl_load_constantsI(pshader, gl_info, prog->puniformI_locations, stateBlock->pixelShaderConstantI,
899 stateBlock->changed.pixelShaderConstantsI & pshader->baseShader.reg_maps.integer_constants);
900
901 /* Load DirectX 9 boolean constants/uniforms for pixel shader */
902 shader_glsl_load_constantsB(pshader, gl_info, programId, stateBlock->pixelShaderConstantB,
903 stateBlock->changed.pixelShaderConstantsB & pshader->baseShader.reg_maps.boolean_constants);
904
905 /* Upload the environment bump map matrix if needed. The needsbumpmat member specifies the texture stage to load the matrix from.
906 * It can't be 0 for a valid texbem instruction.
907 */
908 for(i = 0; i < MAX_TEXTURES; i++) {
909 const float *data;
910
911 if(prog->bumpenvmat_location[i] == -1) continue;
912
913 data = (const float *)&stateBlock->textureState[i][WINED3DTSS_BUMPENVMAT00];
914 GL_EXTCALL(glUniformMatrix2fvARB(prog->bumpenvmat_location[i], 1, 0, data));
915 checkGLcall("glUniformMatrix2fvARB");
916
917 /* texbeml needs the luminance scale and offset too. If texbeml is used, needsbumpmat
918 * is set too, so we can check that in the needsbumpmat check
919 */
920 if(prog->luminancescale_location[i] != -1) {
921 const GLfloat *scale = (const GLfloat *)&stateBlock->textureState[i][WINED3DTSS_BUMPENVLSCALE];
922 const GLfloat *offset = (const GLfloat *)&stateBlock->textureState[i][WINED3DTSS_BUMPENVLOFFSET];
923
924 GL_EXTCALL(glUniform1fvARB(prog->luminancescale_location[i], 1, scale));
925 checkGLcall("glUniform1fvARB");
926 GL_EXTCALL(glUniform1fvARB(prog->luminanceoffset_location[i], 1, offset));
927 checkGLcall("glUniform1fvARB");
928 }
929 }
930
931 if(((IWineD3DPixelShaderImpl *) pshader)->vpos_uniform) {
932 float correction_params[4];
933
934 if (context->render_offscreen)
935 {
936 correction_params[0] = 0.0f;
937 correction_params[1] = 1.0f;
938 } else {
939 /* position is window relative, not viewport relative */
940#ifdef VBOX_WITH_VMSVGA
941 correction_params[0] = device->rtHeight;
942#else
943 correction_params[0] = ((IWineD3DSurfaceImpl *)context->current_rt)->currentDesc.Height;
944#endif
945 correction_params[1] = -1.0f;
946 }
947 GL_EXTCALL(glUniform4fvARB(prog->ycorrection_location, 1, correction_params));
948 }
949 }
950
951 if (priv->next_constant_version == UINT_MAX)
952 {
953 TRACE("Max constant version reached, resetting to 0.\n");
954 wine_rb_for_each_entry(&priv->program_lookup, reset_program_constant_version, NULL);
955 priv->next_constant_version = 1;
956 }
957 else
958 {
959 prog->constant_version = priv->next_constant_version++;
960 }
961}
962
963static inline void update_heap_entry(struct constant_heap *heap, unsigned int idx,
964 unsigned int heap_idx, DWORD new_version)
965{
966 struct constant_entry *entries = heap->entries;
967 unsigned int *positions = heap->positions;
968 unsigned int parent_idx;
969
970 while (heap_idx > 1)
971 {
972 parent_idx = heap_idx >> 1;
973
974 if (new_version <= entries[parent_idx].version) break;
975
976 entries[heap_idx] = entries[parent_idx];
977 positions[entries[parent_idx].idx] = heap_idx;
978 heap_idx = parent_idx;
979 }
980
981 entries[heap_idx].version = new_version;
982 entries[heap_idx].idx = idx;
983 positions[idx] = heap_idx;
984}
985
986static void shader_glsl_update_float_vertex_constants(IWineD3DDevice *iface, UINT start, UINT count)
987{
988 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
989 struct shader_glsl_priv *priv = This->shader_priv;
990 struct constant_heap *heap = &priv->vconst_heap;
991 UINT i;
992
993 for (i = start; i < count + start; ++i)
994 {
995 if (!This->stateBlock->changed.vertexShaderConstantsF[i])
996 update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
997 else
998 update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
999 }
1000}
1001
1002static void shader_glsl_update_float_pixel_constants(IWineD3DDevice *iface, UINT start, UINT count)
1003{
1004 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
1005 struct shader_glsl_priv *priv = This->shader_priv;
1006 struct constant_heap *heap = &priv->pconst_heap;
1007 UINT i;
1008
1009 for (i = start; i < count + start; ++i)
1010 {
1011 if (!This->stateBlock->changed.pixelShaderConstantsF[i])
1012 update_heap_entry(heap, i, heap->size++, priv->next_constant_version);
1013 else
1014 update_heap_entry(heap, i, heap->positions[i], priv->next_constant_version);
1015 }
1016}
1017
1018static unsigned int vec4_varyings(DWORD shader_major, const struct wined3d_gl_info *gl_info)
1019{
1020 unsigned int ret = gl_info->limits.glsl_varyings / 4;
1021 /* 4.0 shaders do not write clip coords because d3d10 does not support user clipplanes */
1022 if(shader_major > 3) return ret;
1023
1024 /* 3.0 shaders may need an extra varying for the clip coord on some cards(mostly dx10 ones) */
1025 if (gl_info->quirks & WINED3D_QUIRK_GLSL_CLIP_VARYING) ret -= 1;
1026 return ret;
1027}
1028
1029/** Generate the variable & register declarations for the GLSL output target */
1030static void shader_generate_glsl_declarations(const struct wined3d_context *context,
1031 struct wined3d_shader_buffer *buffer, IWineD3DBaseShader *iface,
1032 const shader_reg_maps *reg_maps, struct shader_glsl_ctx_priv *ctx_priv)
1033{
1034 IWineD3DBaseShaderImpl* This = (IWineD3DBaseShaderImpl*) iface;
1035 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) This->baseShader.device;
1036 const struct ps_compile_args *ps_args = ctx_priv->cur_ps_args;
1037 const struct wined3d_gl_info *gl_info = context->gl_info;
1038 unsigned int i, extra_constants_needed = 0;
1039 const local_constant *lconst;
1040 DWORD map;
1041
1042 /* There are some minor differences between pixel and vertex shaders */
1043 char pshader = shader_is_pshader_version(reg_maps->shader_version.type);
1044 char prefix = pshader ? 'P' : 'V';
1045
1046 /* Prototype the subroutines */
1047 for (i = 0, map = reg_maps->labels; map; map >>= 1, ++i)
1048 {
1049 if (map & 1) shader_addline(buffer, "void subroutine%u();\n", i);
1050 }
1051
1052 /* Declare the constants (aka uniforms) */
1053 if (This->baseShader.limits.constant_float > 0) {
1054 unsigned max_constantsF;
1055 /* Unless the shader uses indirect addressing, always declare the maximum array size and ignore that we need some
1056 * uniforms privately. E.g. if GL supports 256 uniforms, and we need 2 for the pos fixup and immediate values, still
1057 * declare VC[256]. If the shader needs more uniforms than we have it won't work in any case. If it uses less, the
1058 * compiler will figure out which uniforms are really used and strip them out. This allows a shader to use c255 on
1059 * a dx9 card, as long as it doesn't also use all the other constants.
1060 *
1061 * If the shader uses indirect addressing the compiler must assume that all declared uniforms are used. In this case,
1062 * declare only the amount that we're assured to have.
1063 *
1064 * Thus we run into problems in these two cases:
1065 * 1) The shader really uses more uniforms than supported
1066 * 2) The shader uses indirect addressing, less constants than supported, but uses a constant index > #supported consts
1067 */
1068 if (pshader)
1069 {
1070 /* No indirect addressing here. */
1071 max_constantsF = gl_info->limits.glsl_ps_float_constants;
1072 }
1073 else
1074 {
1075 if(This->baseShader.reg_maps.usesrelconstF) {
1076 /* Subtract the other potential uniforms from the max available (bools, ints, and 1 row of projection matrix).
1077 * Subtract another uniform for immediate values, which have to be loaded via uniform by the driver as well.
1078 * The shader code only uses 0.5, 2.0, 1.0, 128 and -128 in vertex shader code, so one vec4 should be enough
1079 * (Unfortunately the Nvidia driver doesn't store 128 and -128 in one float).
1080 *
1081 * Writing gl_ClipVertex requires one uniform for each clipplane as well.
1082 */
1083#ifdef VBOX_WITH_WDDM
1084 if (gl_info->limits.glsl_vs_float_constants == 256)
1085 {
1086 DWORD dwVersion = GetVersion();
1087 DWORD dwMajor = (DWORD)(LOBYTE(LOWORD(dwVersion)));
1088 DWORD dwMinor = (DWORD)(HIBYTE(LOWORD(dwVersion)));
1089 /* tmp workaround Win8 Aero requirement for 256 */
1090 if (dwMajor > 6 || dwMinor > 1)
1091 {
1092 /* tmp work-around to make Internet Explorer in win8 work with GPU supporting only with 256 shader uniform vars
1093 * @todo: make it more robust */
1094 max_constantsF = gl_info->limits.glsl_vs_float_constants - 1;
1095 }
1096 else
1097 max_constantsF = gl_info->limits.glsl_vs_float_constants - 3;
1098 }
1099 else
1100#endif
1101 {
1102 max_constantsF = gl_info->limits.glsl_vs_float_constants - 3;
1103 }
1104
1105 if(ctx_priv->cur_vs_args->clip_enabled)
1106 {
1107 max_constantsF -= gl_info->limits.clipplanes;
1108 }
1109 max_constantsF -= count_bits(This->baseShader.reg_maps.integer_constants);
1110 /* Strictly speaking a bool only uses one scalar, but the nvidia(Linux) compiler doesn't pack them properly,
1111 * so each scalar requires a full vec4. We could work around this by packing the booleans ourselves, but
1112 * for now take this into account when calculating the number of available constants
1113 */
1114 max_constantsF -= count_bits(This->baseShader.reg_maps.boolean_constants);
1115 /* Set by driver quirks in directx.c */
1116 max_constantsF -= gl_info->reserved_glsl_constants;
1117 }
1118 else
1119 {
1120 max_constantsF = gl_info->limits.glsl_vs_float_constants;
1121 }
1122 }
1123 max_constantsF = min(This->baseShader.limits.constant_float, max_constantsF);
1124 shader_addline(buffer, "uniform vec4 %cC[%u];\n", prefix, max_constantsF);
1125 }
1126
1127 /* Always declare the full set of constants, the compiler can remove the unused ones because d3d doesn't(yet)
1128 * support indirect int and bool constant addressing. This avoids problems if the app uses e.g. i0 and i9.
1129 */
1130 if (This->baseShader.limits.constant_int > 0 && This->baseShader.reg_maps.integer_constants)
1131 shader_addline(buffer, "uniform ivec4 %cI[%u];\n", prefix, This->baseShader.limits.constant_int);
1132
1133 if (This->baseShader.limits.constant_bool > 0 && This->baseShader.reg_maps.boolean_constants)
1134 shader_addline(buffer, "uniform bool %cB[%u];\n", prefix, This->baseShader.limits.constant_bool);
1135
1136 if(!pshader) {
1137 shader_addline(buffer, "uniform vec4 posFixup;\n");
1138 /* Predeclaration; This function is added at link time based on the pixel shader.
1139 * VS 3.0 shaders have an array OUT[] the shader writes to, earlier versions don't have
1140 * that. We know the input to the reorder function at vertex shader compile time, so
1141 * we can deal with that. The reorder function for a 1.x and 2.x vertex shader can just
1142 * read gl_FrontColor. The output depends on the pixel shader. The reorder function for a
1143 * 1.x and 2.x pshader or for fixed function will write gl_FrontColor, and for a 3.0 shader
1144 * it will write to the varying array. Here we depend on the shader optimizer on sorting that
1145 * out. The nvidia driver only does that if the parameter is inout instead of out, hence the
1146 * inout.
1147 */
1148 if (reg_maps->shader_version.major >= 3)
1149 {
1150 shader_addline(buffer, "void order_ps_input(in vec4[%u]);\n", MAX_REG_OUTPUT);
1151 } else {
1152 shader_addline(buffer, "void order_ps_input();\n");
1153 }
1154 } else {
1155 for (i = 0, map = reg_maps->bumpmat; map; map >>= 1, ++i)
1156 {
1157 if (!(map & 1)) continue;
1158
1159 shader_addline(buffer, "uniform mat2 bumpenvmat%d;\n", i);
1160
1161 if (reg_maps->luminanceparams & (1 << i))
1162 {
1163 shader_addline(buffer, "uniform float luminancescale%d;\n", i);
1164 shader_addline(buffer, "uniform float luminanceoffset%d;\n", i);
1165 extra_constants_needed++;
1166 }
1167
1168 extra_constants_needed++;
1169 }
1170
1171 if (ps_args->srgb_correction)
1172 {
1173 shader_addline(buffer, "const vec4 srgb_const0 = vec4(%.8e, %.8e, %.8e, %.8e);\n",
1174 srgb_pow, srgb_mul_high, srgb_sub_high, srgb_mul_low);
1175 shader_addline(buffer, "const vec4 srgb_const1 = vec4(%.8e, 0.0, 0.0, 0.0);\n",
1176 srgb_cmp);
1177 }
1178 if (reg_maps->vpos || reg_maps->usesdsy)
1179 {
1180 if (This->baseShader.limits.constant_float + extra_constants_needed
1181 + 1 < gl_info->limits.glsl_ps_float_constants)
1182 {
1183 shader_addline(buffer, "uniform vec4 ycorrection;\n");
1184 ((IWineD3DPixelShaderImpl *) This)->vpos_uniform = 1;
1185 extra_constants_needed++;
1186 } else {
1187 /* This happens because we do not have proper tracking of the constant registers that are
1188 * actually used, only the max limit of the shader version
1189 */
1190 FIXME("Cannot find a free uniform for vpos correction params\n");
1191 AssertFailed();
1192 shader_addline(buffer, "const vec4 ycorrection = vec4(%f, %f, 0.0, 0.0);\n",
1193 context->render_offscreen ? 0.0f : ((IWineD3DSurfaceImpl *)device->render_targets[0])->currentDesc.Height,
1194 context->render_offscreen ? 1.0f : -1.0f);
1195 }
1196 shader_addline(buffer, "vec4 vpos;\n");
1197 }
1198 }
1199
1200 /* Declare texture samplers */
1201 for (i = 0; i < This->baseShader.limits.sampler; i++) {
1202 if (reg_maps->sampler_type[i])
1203 {
1204 switch (reg_maps->sampler_type[i])
1205 {
1206 case WINED3DSTT_1D:
1207 shader_addline(buffer, "uniform sampler1D %csampler%u;\n", prefix, i);
1208 break;
1209 case WINED3DSTT_2D:
1210 if(device->stateBlock->textures[i] &&
1211 IWineD3DBaseTexture_GetTextureDimensions(device->stateBlock->textures[i]) == GL_TEXTURE_RECTANGLE_ARB) {
1212 shader_addline(buffer, "uniform sampler2DRect %csampler%u;\n", prefix, i);
1213 } else {
1214 shader_addline(buffer, "uniform sampler2D %csampler%u;\n", prefix, i);
1215 }
1216 break;
1217 case WINED3DSTT_CUBE:
1218 shader_addline(buffer, "uniform samplerCube %csampler%u;\n", prefix, i);
1219 break;
1220 case WINED3DSTT_VOLUME:
1221 shader_addline(buffer, "uniform sampler3D %csampler%u;\n", prefix, i);
1222 break;
1223 default:
1224 shader_addline(buffer, "uniform unsupported_sampler %csampler%u;\n", prefix, i);
1225 FIXME("Unrecognized sampler type: %#x\n", reg_maps->sampler_type[i]);
1226 break;
1227 }
1228 }
1229 }
1230
1231 /* Declare uniforms for NP2 texcoord fixup:
1232 * This is NOT done inside the loop that declares the texture samplers since the NP2 fixup code
1233 * is currently only used for the GeforceFX series and when forcing the ARB_npot extension off.
1234 * Modern cards just skip the code anyway, so put it inside a separate loop. */
1235 if (pshader && ps_args->np2_fixup) {
1236
1237 struct ps_np2fixup_info* const fixup = ctx_priv->cur_np2fixup_info;
1238 UINT cur = 0;
1239
1240 /* NP2/RECT textures in OpenGL use texcoords in the range [0,width]x[0,height]
1241 * while D3D has them in the (normalized) [0,1]x[0,1] range.
1242 * samplerNP2Fixup stores texture dimensions and is updated through
1243 * shader_glsl_load_np2fixup_constants when the sampler changes. */
1244
1245 for (i = 0; i < This->baseShader.limits.sampler; ++i) {
1246 if (reg_maps->sampler_type[i]) {
1247 if (!(ps_args->np2_fixup & (1 << i))) continue;
1248
1249 if (WINED3DSTT_2D != reg_maps->sampler_type[i]) {
1250 FIXME("Non-2D texture is flagged for NP2 texcoord fixup.\n");
1251 continue;
1252 }
1253
1254 fixup->idx[i] = cur++;
1255 }
1256 }
1257
1258 fixup->num_consts = (cur + 1) >> 1;
1259 shader_addline(buffer, "uniform vec4 %csamplerNP2Fixup[%u];\n", prefix, fixup->num_consts);
1260 }
1261
1262 /* Declare address variables */
1263 for (i = 0, map = reg_maps->address; map; map >>= 1, ++i)
1264 {
1265 if (map & 1) shader_addline(buffer, "ivec4 A%u;\n", i);
1266 }
1267
1268 /* Declare texture coordinate temporaries and initialize them */
1269 for (i = 0, map = reg_maps->texcoord; map; map >>= 1, ++i)
1270 {
1271 if (map & 1) shader_addline(buffer, "vec4 T%u = gl_TexCoord[%u];\n", i, i);
1272 }
1273
1274 /* Declare input register varyings. Only pixel shader, vertex shaders have that declared in the
1275 * helper function shader that is linked in at link time
1276 */
1277 if (pshader && reg_maps->shader_version.major >= 3)
1278 {
1279 if (use_vs(device->stateBlock))
1280 {
1281 shader_addline(buffer, "varying vec4 IN[%u];\n", vec4_varyings(reg_maps->shader_version.major, gl_info));
1282 } else {
1283 /* TODO: Write a replacement shader for the fixed function vertex pipeline, so this isn't needed.
1284 * For fixed function vertex processing + 3.0 pixel shader we need a separate function in the
1285 * pixel shader that reads the fixed function color into the packed input registers.
1286 */
1287 shader_addline(buffer, "vec4 IN[%u];\n", vec4_varyings(reg_maps->shader_version.major, gl_info));
1288 }
1289 }
1290
1291 /* Declare output register temporaries */
1292 if(This->baseShader.limits.packed_output) {
1293 shader_addline(buffer, "vec4 OUT[%u];\n", This->baseShader.limits.packed_output);
1294 }
1295
1296 /* Declare temporary variables */
1297 for (i = 0, map = reg_maps->temporary; map; map >>= 1, ++i)
1298 {
1299 if (map & 1) shader_addline(buffer, "vec4 R%u;\n", i);
1300 }
1301
1302 /* Declare attributes */
1303 if (reg_maps->shader_version.type == WINED3D_SHADER_TYPE_VERTEX)
1304 {
1305 for (i = 0, map = reg_maps->input_registers; map; map >>= 1, ++i)
1306 {
1307 if (map & 1) shader_addline(buffer, "attribute vec4 attrib%i;\n", i);
1308 }
1309 }
1310
1311 /* Declare loop registers aLx */
1312 for (i = 0; i < reg_maps->loop_depth; i++) {
1313 shader_addline(buffer, "int aL%u;\n", i);
1314 shader_addline(buffer, "int tmpInt%u;\n", i);
1315 }
1316
1317 /* Temporary variables for matrix operations */
1318 shader_addline(buffer, "vec4 tmp0;\n");
1319 shader_addline(buffer, "vec4 tmp1;\n");
1320#ifdef VBOX_WITH_VMSVGA
1321 shader_addline(buffer, "bool p0[4];\n");
1322#endif
1323
1324 /* Local constants use a different name so they can be loaded once at shader link time
1325 * They can't be hardcoded into the shader text via LC = {x, y, z, w}; because the
1326 * float -> string conversion can cause precision loss.
1327 */
1328 if(!This->baseShader.load_local_constsF) {
1329 LIST_FOR_EACH_ENTRY(lconst, &This->baseShader.constantsF, local_constant, entry) {
1330 shader_addline(buffer, "uniform vec4 %cLC%u;\n", prefix, lconst->idx);
1331 }
1332 }
1333
1334 shader_addline(buffer, "const float FLT_MAX = 1e38;\n");
1335
1336 /* Start the main program */
1337 shader_addline(buffer, "void main() {\n");
1338 if(pshader && reg_maps->vpos) {
1339 /* DirectX apps expect integer values, while OpenGL drivers add approximately 0.5. This causes
1340 * off-by-one problems as spotted by the vPos d3d9 visual test. Unfortunately the ATI cards do
1341 * not add exactly 0.5, but rather something like 0.49999999 or 0.50000001, which still causes
1342 * precision troubles when we just substract 0.5.
1343 *
1344 * To deal with that just floor() the position. This will eliminate the fraction on all cards.
1345 *
1346 * TODO: Test how that behaves with multisampling once we can enable multisampling in winex11.
1347 *
1348 * An advantage of floor is that it works even if the driver doesn't add 1/2. It is somewhat
1349 * questionable if 1.5, 2.5, ... are the proper values to return in gl_FragCoord, even though
1350 * coordinates specify the pixel centers instead of the pixel corners. This code will behave
1351 * correctly on drivers that returns integer values.
1352 */
1353 shader_addline(buffer, "vpos = floor(vec4(0, ycorrection[0], 0, 0) + gl_FragCoord * vec4(1, ycorrection[1], 1, 1));\n");
1354 }
1355}
1356
1357/*****************************************************************************
1358 * Functions to generate GLSL strings from DirectX Shader bytecode begin here.
1359 *
1360 * For more information, see http://wiki.winehq.org/DirectX-Shaders
1361 ****************************************************************************/
1362
1363/* Prototypes */
1364static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins,
1365 const struct wined3d_shader_src_param *wined3d_src, DWORD mask, glsl_src_param_t *glsl_src);
1366
1367/** Used for opcode modifiers - They multiply the result by the specified amount */
1368static const char * const shift_glsl_tab[] = {
1369 "", /* 0 (none) */
1370 "2.0 * ", /* 1 (x2) */
1371 "4.0 * ", /* 2 (x4) */
1372 "8.0 * ", /* 3 (x8) */
1373 "16.0 * ", /* 4 (x16) */
1374 "32.0 * ", /* 5 (x32) */
1375 "", /* 6 (x64) */
1376 "", /* 7 (x128) */
1377 "", /* 8 (d256) */
1378 "", /* 9 (d128) */
1379 "", /* 10 (d64) */
1380 "", /* 11 (d32) */
1381 "0.0625 * ", /* 12 (d16) */
1382 "0.125 * ", /* 13 (d8) */
1383 "0.25 * ", /* 14 (d4) */
1384 "0.5 * " /* 15 (d2) */
1385};
1386
1387/* Generate a GLSL parameter that does the input modifier computation and return the input register/mask to use */
1388static void shader_glsl_gen_modifier(DWORD src_modifier, const char *in_reg, const char *in_regswizzle, char *out_str)
1389{
1390 out_str[0] = 0;
1391
1392 switch (src_modifier)
1393 {
1394 case WINED3DSPSM_DZ: /* Need to handle this in the instructions itself (texld & texcrd). */
1395 case WINED3DSPSM_DW:
1396 case WINED3DSPSM_NONE:
1397 sprintf(out_str, "%s%s", in_reg, in_regswizzle);
1398 break;
1399 case WINED3DSPSM_NEG:
1400 sprintf(out_str, "-%s%s", in_reg, in_regswizzle);
1401 break;
1402 case WINED3DSPSM_NOT:
1403 sprintf(out_str, "!%s%s", in_reg, in_regswizzle);
1404 break;
1405 case WINED3DSPSM_BIAS:
1406 sprintf(out_str, "(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
1407 break;
1408 case WINED3DSPSM_BIASNEG:
1409 sprintf(out_str, "-(%s%s - vec4(0.5)%s)", in_reg, in_regswizzle, in_regswizzle);
1410 break;
1411 case WINED3DSPSM_SIGN:
1412 sprintf(out_str, "(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
1413 break;
1414 case WINED3DSPSM_SIGNNEG:
1415 sprintf(out_str, "-(2.0 * (%s%s - 0.5))", in_reg, in_regswizzle);
1416 break;
1417 case WINED3DSPSM_COMP:
1418 sprintf(out_str, "(1.0 - %s%s)", in_reg, in_regswizzle);
1419 break;
1420 case WINED3DSPSM_X2:
1421 sprintf(out_str, "(2.0 * %s%s)", in_reg, in_regswizzle);
1422 break;
1423 case WINED3DSPSM_X2NEG:
1424 sprintf(out_str, "-(2.0 * %s%s)", in_reg, in_regswizzle);
1425 break;
1426 case WINED3DSPSM_ABS:
1427 sprintf(out_str, "abs(%s%s)", in_reg, in_regswizzle);
1428 break;
1429 case WINED3DSPSM_ABSNEG:
1430 sprintf(out_str, "-abs(%s%s)", in_reg, in_regswizzle);
1431 break;
1432 default:
1433 FIXME("Unhandled modifier %u\n", src_modifier);
1434 sprintf(out_str, "%s%s", in_reg, in_regswizzle);
1435 }
1436}
1437
1438/** Writes the GLSL variable name that corresponds to the register that the
1439 * DX opcode parameter is trying to access */
1440static void shader_glsl_get_register_name(const struct wined3d_shader_register *reg,
1441 char *register_name, BOOL *is_color, const struct wined3d_shader_instruction *ins)
1442{
1443 /* oPos, oFog and oPts in D3D */
1444 static const char * const hwrastout_reg_names[] = { "gl_Position", "gl_FogFragCoord", "gl_PointSize" };
1445
1446 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
1447 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
1448 char pshader = shader_is_pshader_version(This->baseShader.reg_maps.shader_version.type);
1449
1450 *is_color = FALSE;
1451
1452 switch (reg->type)
1453 {
1454 case WINED3DSPR_TEMP:
1455 sprintf(register_name, "R%u", reg->idx);
1456 break;
1457
1458 case WINED3DSPR_INPUT:
1459 /* vertex shaders */
1460 if (!pshader)
1461 {
1462 struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
1463 if (priv->cur_vs_args->swizzle_map & (1 << reg->idx)) *is_color = TRUE;
1464 sprintf(register_name, "attrib%u", reg->idx);
1465 break;
1466 }
1467
1468 /* pixel shaders >= 3.0 */
1469 if (This->baseShader.reg_maps.shader_version.major >= 3)
1470 {
1471 DWORD idx = ((IWineD3DPixelShaderImpl *)This)->input_reg_map[reg->idx];
1472 unsigned int in_count = vec4_varyings(This->baseShader.reg_maps.shader_version.major, gl_info);
1473
1474 if (reg->rel_addr)
1475 {
1476 glsl_src_param_t rel_param;
1477
1478 shader_glsl_add_src_param(ins, reg->rel_addr, WINED3DSP_WRITEMASK_0, &rel_param);
1479
1480 /* Removing a + 0 would be an obvious optimization, but macos doesn't see the NOP
1481 * operation there */
1482 if (idx)
1483 {
1484 if (((IWineD3DPixelShaderImpl *)This)->declared_in_count > in_count)
1485 {
1486 sprintf(register_name,
1487 "((%s + %u) > %d ? (%s + %u) > %d ? gl_SecondaryColor : gl_Color : IN[%s + %u])",
1488 rel_param.param_str, idx, in_count - 1, rel_param.param_str, idx, in_count,
1489 rel_param.param_str, idx);
1490 }
1491 else
1492 {
1493 sprintf(register_name, "IN[%s + %u]", rel_param.param_str, idx);
1494 }
1495 }
1496 else
1497 {
1498 if (((IWineD3DPixelShaderImpl *)This)->declared_in_count > in_count)
1499 {
1500 sprintf(register_name, "((%s) > %d ? (%s) > %d ? gl_SecondaryColor : gl_Color : IN[%s])",
1501 rel_param.param_str, in_count - 1, rel_param.param_str, in_count,
1502 rel_param.param_str);
1503 }
1504 else
1505 {
1506 sprintf(register_name, "IN[%s]", rel_param.param_str);
1507 }
1508 }
1509 }
1510 else
1511 {
1512 if (idx == in_count) sprintf(register_name, "gl_Color");
1513 else if (idx == in_count + 1) sprintf(register_name, "gl_SecondaryColor");
1514 else sprintf(register_name, "IN[%u]", idx);
1515 }
1516 }
1517 else
1518 {
1519 if (reg->idx == 0) strcpy(register_name, "gl_Color");
1520 else strcpy(register_name, "gl_SecondaryColor");
1521 break;
1522 }
1523 break;
1524
1525 case WINED3DSPR_CONST:
1526 {
1527 const char prefix = pshader ? 'P' : 'V';
1528
1529 /* Relative addressing */
1530 if (reg->rel_addr)
1531 {
1532 glsl_src_param_t rel_param;
1533 shader_glsl_add_src_param(ins, reg->rel_addr, WINED3DSP_WRITEMASK_0, &rel_param);
1534 if (reg->idx) sprintf(register_name, "%cC[%s + %u]", prefix, rel_param.param_str, reg->idx);
1535 else sprintf(register_name, "%cC[%s]", prefix, rel_param.param_str);
1536 }
1537 else
1538 {
1539 if (shader_constant_is_local(This, reg->idx))
1540 sprintf(register_name, "%cLC%u", prefix, reg->idx);
1541 else
1542 sprintf(register_name, "%cC[%u]", prefix, reg->idx);
1543 }
1544 }
1545 break;
1546
1547 case WINED3DSPR_CONSTINT:
1548 if (pshader) sprintf(register_name, "PI[%u]", reg->idx);
1549 else sprintf(register_name, "VI[%u]", reg->idx);
1550 break;
1551
1552 case WINED3DSPR_CONSTBOOL:
1553 if (pshader) sprintf(register_name, "PB[%u]", reg->idx);
1554 else sprintf(register_name, "VB[%u]", reg->idx);
1555 break;
1556
1557 case WINED3DSPR_TEXTURE: /* case WINED3DSPR_ADDR: */
1558 if (pshader) sprintf(register_name, "T%u", reg->idx);
1559 else sprintf(register_name, "A%u", reg->idx);
1560 break;
1561
1562 case WINED3DSPR_LOOP:
1563 sprintf(register_name, "aL%u", This->baseShader.cur_loop_regno - 1);
1564 break;
1565
1566 case WINED3DSPR_SAMPLER:
1567 if (pshader) sprintf(register_name, "Psampler%u", reg->idx);
1568 else sprintf(register_name, "Vsampler%u", reg->idx);
1569 break;
1570
1571 case WINED3DSPR_COLOROUT:
1572 if (reg->idx >= gl_info->limits.buffers)
1573 WARN("Write to render target %u, only %d supported.\n", reg->idx, gl_info->limits.buffers);
1574
1575 sprintf(register_name, "gl_FragData[%u]", reg->idx);
1576 break;
1577
1578 case WINED3DSPR_RASTOUT:
1579 sprintf(register_name, "%s", hwrastout_reg_names[reg->idx]);
1580 break;
1581
1582 case WINED3DSPR_DEPTHOUT:
1583 sprintf(register_name, "gl_FragDepth");
1584 break;
1585
1586 case WINED3DSPR_ATTROUT:
1587 if (reg->idx == 0) sprintf(register_name, "gl_FrontColor");
1588 else sprintf(register_name, "gl_FrontSecondaryColor");
1589 break;
1590
1591 case WINED3DSPR_TEXCRDOUT:
1592 /* Vertex shaders >= 3.0: WINED3DSPR_OUTPUT */
1593 if (This->baseShader.reg_maps.shader_version.major >= 3) sprintf(register_name, "OUT[%u]", reg->idx);
1594 else sprintf(register_name, "gl_TexCoord[%u]", reg->idx);
1595 break;
1596
1597 case WINED3DSPR_MISCTYPE:
1598 if (reg->idx == 0)
1599 {
1600 /* vPos */
1601 sprintf(register_name, "vpos");
1602 }
1603 else if (reg->idx == 1)
1604 {
1605 /* Note that gl_FrontFacing is a bool, while vFace is
1606 * a float for which the sign determines front/back */
1607 sprintf(register_name, "(gl_FrontFacing ? 1.0 : -1.0)");
1608 }
1609 else
1610 {
1611 FIXME("Unhandled misctype register %d\n", reg->idx);
1612 sprintf(register_name, "unrecognized_register");
1613 }
1614 break;
1615
1616 case WINED3DSPR_IMMCONST:
1617 switch (reg->immconst_type)
1618 {
1619 case WINED3D_IMMCONST_FLOAT:
1620 sprintf(register_name, "%.8e", *(const float *)reg->immconst_data);
1621 break;
1622
1623 case WINED3D_IMMCONST_FLOAT4:
1624 sprintf(register_name, "vec4(%.8e, %.8e, %.8e, %.8e)",
1625 *(const float *)&reg->immconst_data[0], *(const float *)&reg->immconst_data[1],
1626 *(const float *)&reg->immconst_data[2], *(const float *)&reg->immconst_data[3]);
1627 break;
1628
1629 default:
1630 FIXME("Unhandled immconst type %#x\n", reg->immconst_type);
1631 sprintf(register_name, "<unhandled_immconst_type %#x>", reg->immconst_type);
1632 }
1633 break;
1634
1635#ifdef VBOX_WITH_VMSVGA
1636 case WINED3DSPR_PREDICATE:
1637 sprintf(register_name, "p0");
1638 break;
1639#endif
1640
1641 default:
1642 FIXME("Unhandled register name Type(%d)\n", reg->type);
1643 sprintf(register_name, "unrecognized_register");
1644 break;
1645 }
1646}
1647
1648static void shader_glsl_write_mask_to_str(DWORD write_mask, char *str)
1649{
1650 *str++ = '.';
1651 if (write_mask & WINED3DSP_WRITEMASK_0) *str++ = 'x';
1652 if (write_mask & WINED3DSP_WRITEMASK_1) *str++ = 'y';
1653 if (write_mask & WINED3DSP_WRITEMASK_2) *str++ = 'z';
1654 if (write_mask & WINED3DSP_WRITEMASK_3) *str++ = 'w';
1655 *str = '\0';
1656}
1657
1658/* Get the GLSL write mask for the destination register */
1659static DWORD shader_glsl_get_write_mask(const struct wined3d_shader_dst_param *param, char *write_mask)
1660{
1661 DWORD mask = param->write_mask;
1662
1663 if (shader_is_scalar(&param->reg))
1664 {
1665 mask = WINED3DSP_WRITEMASK_0;
1666 *write_mask = '\0';
1667 }
1668 else
1669 {
1670#ifdef VBOX_WITH_VMSVGA
1671 if (param->reg.type == WINED3DSPR_PREDICATE)
1672 {
1673 *write_mask++ = '[';
1674 if (mask & WINED3DSP_WRITEMASK_0) *write_mask++ = '0';
1675 else
1676 if (mask & WINED3DSP_WRITEMASK_1) *write_mask++ = '1';
1677 else
1678 if (mask & WINED3DSP_WRITEMASK_2) *write_mask++ = '2';
1679 else
1680 if (mask & WINED3DSP_WRITEMASK_3) *write_mask++ = '3';
1681 *write_mask++ = ']';
1682 *write_mask = '\0';
1683 }
1684 else
1685#endif
1686 shader_glsl_write_mask_to_str(mask, write_mask);
1687 }
1688
1689 return mask;
1690}
1691
1692static unsigned int shader_glsl_get_write_mask_size(DWORD write_mask) {
1693 unsigned int size = 0;
1694
1695 if (write_mask & WINED3DSP_WRITEMASK_0) ++size;
1696 if (write_mask & WINED3DSP_WRITEMASK_1) ++size;
1697 if (write_mask & WINED3DSP_WRITEMASK_2) ++size;
1698 if (write_mask & WINED3DSP_WRITEMASK_3) ++size;
1699
1700 return size;
1701}
1702
1703static void shader_glsl_swizzle_to_str(const DWORD swizzle, BOOL fixup, DWORD mask, char *str)
1704{
1705 /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
1706 * but addressed as "rgba". To fix this we need to swap the register's x
1707 * and z components. */
1708 const char *swizzle_chars = fixup ? "zyxw" : "xyzw";
1709
1710 *str++ = '.';
1711 /* swizzle bits fields: wwzzyyxx */
1712 if (mask & WINED3DSP_WRITEMASK_0) *str++ = swizzle_chars[swizzle & 0x03];
1713 if (mask & WINED3DSP_WRITEMASK_1) *str++ = swizzle_chars[(swizzle >> 2) & 0x03];
1714 if (mask & WINED3DSP_WRITEMASK_2) *str++ = swizzle_chars[(swizzle >> 4) & 0x03];
1715 if (mask & WINED3DSP_WRITEMASK_3) *str++ = swizzle_chars[(swizzle >> 6) & 0x03];
1716 *str = '\0';
1717}
1718
1719static void shader_glsl_get_swizzle(const struct wined3d_shader_src_param *param,
1720 BOOL fixup, DWORD mask, char *swizzle_str)
1721{
1722 if (shader_is_scalar(&param->reg))
1723 *swizzle_str = '\0';
1724 else
1725 shader_glsl_swizzle_to_str(param->swizzle, fixup, mask, swizzle_str);
1726}
1727
1728/* From a given parameter token, generate the corresponding GLSL string.
1729 * Also, return the actual register name and swizzle in case the
1730 * caller needs this information as well. */
1731static void shader_glsl_add_src_param(const struct wined3d_shader_instruction *ins,
1732 const struct wined3d_shader_src_param *wined3d_src, DWORD mask, glsl_src_param_t *glsl_src)
1733{
1734 BOOL is_color = FALSE;
1735 char swizzle_str[6];
1736
1737 glsl_src->reg_name[0] = '\0';
1738 glsl_src->param_str[0] = '\0';
1739 swizzle_str[0] = '\0';
1740
1741 shader_glsl_get_register_name(&wined3d_src->reg, glsl_src->reg_name, &is_color, ins);
1742 shader_glsl_get_swizzle(wined3d_src, is_color, mask, swizzle_str);
1743 shader_glsl_gen_modifier(wined3d_src->modifiers, glsl_src->reg_name, swizzle_str, glsl_src->param_str);
1744}
1745
1746/* From a given parameter token, generate the corresponding GLSL string.
1747 * Also, return the actual register name and swizzle in case the
1748 * caller needs this information as well. */
1749static DWORD shader_glsl_add_dst_param(const struct wined3d_shader_instruction *ins,
1750 const struct wined3d_shader_dst_param *wined3d_dst, glsl_dst_param_t *glsl_dst)
1751{
1752 BOOL is_color = FALSE;
1753
1754 glsl_dst->mask_str[0] = '\0';
1755 glsl_dst->reg_name[0] = '\0';
1756
1757 shader_glsl_get_register_name(&wined3d_dst->reg, glsl_dst->reg_name, &is_color, ins);
1758 return shader_glsl_get_write_mask(wined3d_dst, glsl_dst->mask_str);
1759}
1760
1761/* Append the destination part of the instruction to the buffer, return the effective write mask */
1762static DWORD shader_glsl_append_dst_ext(struct wined3d_shader_buffer *buffer,
1763 const struct wined3d_shader_instruction *ins, const struct wined3d_shader_dst_param *dst)
1764{
1765 glsl_dst_param_t glsl_dst;
1766 DWORD mask;
1767
1768 mask = shader_glsl_add_dst_param(ins, dst, &glsl_dst);
1769 if (mask) shader_addline(buffer, "%s%s = %s(", glsl_dst.reg_name, glsl_dst.mask_str, shift_glsl_tab[dst->shift]);
1770
1771 return mask;
1772}
1773
1774/* Append the destination part of the instruction to the buffer, return the effective write mask */
1775static DWORD shader_glsl_append_dst(struct wined3d_shader_buffer *buffer, const struct wined3d_shader_instruction *ins)
1776{
1777 return shader_glsl_append_dst_ext(buffer, ins, &ins->dst[0]);
1778}
1779
1780/** Process GLSL instruction modifiers */
1781static void shader_glsl_add_instruction_modifiers(const struct wined3d_shader_instruction *ins)
1782{
1783 glsl_dst_param_t dst_param;
1784 DWORD modifiers;
1785
1786 if (!ins->dst_count) return;
1787
1788 modifiers = ins->dst[0].modifiers;
1789 if (!modifiers) return;
1790
1791 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
1792
1793 if (modifiers & WINED3DSPDM_SATURATE)
1794 {
1795 /* _SAT means to clamp the value of the register to between 0 and 1 */
1796 shader_addline(ins->ctx->buffer, "%s%s = clamp(%s%s, 0.0, 1.0);\n", dst_param.reg_name,
1797 dst_param.mask_str, dst_param.reg_name, dst_param.mask_str);
1798 }
1799
1800 if (modifiers & WINED3DSPDM_MSAMPCENTROID)
1801 {
1802 FIXME("_centroid modifier not handled\n");
1803 }
1804
1805 if (modifiers & WINED3DSPDM_PARTIALPRECISION)
1806 {
1807 /* MSDN says this modifier can be safely ignored, so that's what we'll do. */
1808 }
1809}
1810
1811static inline const char *shader_get_comp_op(DWORD op)
1812{
1813 switch (op) {
1814 case COMPARISON_GT: return ">";
1815 case COMPARISON_EQ: return "==";
1816 case COMPARISON_GE: return ">=";
1817 case COMPARISON_LT: return "<";
1818 case COMPARISON_NE: return "!=";
1819 case COMPARISON_LE: return "<=";
1820 default:
1821 FIXME("Unrecognized comparison value: %u\n", op);
1822 return "(\?\?)";
1823 }
1824}
1825
1826static void shader_glsl_get_sample_function(const struct wined3d_gl_info *gl_info,
1827 DWORD sampler_type, DWORD flags, glsl_sample_function_t *sample_function)
1828{
1829 BOOL projected = flags & WINED3D_GLSL_SAMPLE_PROJECTED;
1830 BOOL texrect = flags & WINED3D_GLSL_SAMPLE_RECT;
1831 BOOL lod = flags & WINED3D_GLSL_SAMPLE_LOD;
1832 BOOL grad = flags & WINED3D_GLSL_SAMPLE_GRAD;
1833
1834 /* Note that there's no such thing as a projected cube texture. */
1835 switch(sampler_type) {
1836 case WINED3DSTT_1D:
1837 if(lod) {
1838 sample_function->name = projected ? "texture1DProjLod" : "texture1DLod";
1839 }
1840 else if (grad)
1841 {
1842 if (gl_info->supported[EXT_GPU_SHADER4])
1843 sample_function->name = projected ? "texture1DProjGrad" : "texture1DGrad";
1844 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1845 sample_function->name = projected ? "texture1DProjGradARB" : "texture1DGradARB";
1846 else
1847 {
1848 FIXME("Unsupported 1D grad function.\n");
1849 sample_function->name = "unsupported1DGrad";
1850 }
1851 }
1852 else
1853 {
1854 sample_function->name = projected ? "texture1DProj" : "texture1D";
1855 }
1856 sample_function->coord_mask = WINED3DSP_WRITEMASK_0;
1857 break;
1858 case WINED3DSTT_2D:
1859 if(texrect) {
1860 if(lod) {
1861 sample_function->name = projected ? "texture2DRectProjLod" : "texture2DRectLod";
1862 }
1863 else if (grad)
1864 {
1865 if (gl_info->supported[EXT_GPU_SHADER4])
1866 sample_function->name = projected ? "texture2DRectProjGrad" : "texture2DRectGrad";
1867 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1868 sample_function->name = projected ? "texture2DRectProjGradARB" : "texture2DRectGradARB";
1869 else
1870 {
1871 FIXME("Unsupported RECT grad function.\n");
1872 sample_function->name = "unsupported2DRectGrad";
1873 }
1874 }
1875 else
1876 {
1877 sample_function->name = projected ? "texture2DRectProj" : "texture2DRect";
1878 }
1879 } else {
1880 if(lod) {
1881 sample_function->name = projected ? "texture2DProjLod" : "texture2DLod";
1882 }
1883 else if (grad)
1884 {
1885 if (gl_info->supported[EXT_GPU_SHADER4])
1886 sample_function->name = projected ? "texture2DProjGrad" : "texture2DGrad";
1887 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1888 sample_function->name = projected ? "texture2DProjGradARB" : "texture2DGradARB";
1889 else
1890 {
1891 FIXME("Unsupported 2D grad function.\n");
1892 sample_function->name = "unsupported2DGrad";
1893 }
1894 }
1895 else
1896 {
1897 sample_function->name = projected ? "texture2DProj" : "texture2D";
1898 }
1899 }
1900 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1;
1901 break;
1902 case WINED3DSTT_CUBE:
1903 if(lod) {
1904 sample_function->name = "textureCubeLod";
1905 }
1906 else if (grad)
1907 {
1908 if (gl_info->supported[EXT_GPU_SHADER4])
1909 sample_function->name = "textureCubeGrad";
1910 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1911 sample_function->name = "textureCubeGradARB";
1912 else
1913 {
1914 FIXME("Unsupported Cube grad function.\n");
1915 sample_function->name = "unsupportedCubeGrad";
1916 }
1917 }
1918 else
1919 {
1920 sample_function->name = "textureCube";
1921 }
1922 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1923 break;
1924 case WINED3DSTT_VOLUME:
1925 if(lod) {
1926 sample_function->name = projected ? "texture3DProjLod" : "texture3DLod";
1927 }
1928 else if (grad)
1929 {
1930 if (gl_info->supported[EXT_GPU_SHADER4])
1931 sample_function->name = projected ? "texture3DProjGrad" : "texture3DGrad";
1932 else if (gl_info->supported[ARB_SHADER_TEXTURE_LOD])
1933 sample_function->name = projected ? "texture3DProjGradARB" : "texture3DGradARB";
1934 else
1935 {
1936 FIXME("Unsupported 3D grad function.\n");
1937 sample_function->name = "unsupported3DGrad";
1938 }
1939 }
1940 else
1941 {
1942 sample_function->name = projected ? "texture3DProj" : "texture3D";
1943 }
1944 sample_function->coord_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
1945 break;
1946 default:
1947 sample_function->name = "";
1948 sample_function->coord_mask = 0;
1949 FIXME("Unrecognized sampler type: %#x;\n", sampler_type);
1950 break;
1951 }
1952}
1953
1954static void shader_glsl_append_fixup_arg(char *arguments, const char *reg_name,
1955 BOOL sign_fixup, enum fixup_channel_source channel_source)
1956{
1957 switch(channel_source)
1958 {
1959 case CHANNEL_SOURCE_ZERO:
1960 strcat(arguments, "0.0");
1961 break;
1962
1963 case CHANNEL_SOURCE_ONE:
1964 strcat(arguments, "1.0");
1965 break;
1966
1967 case CHANNEL_SOURCE_X:
1968 strcat(arguments, reg_name);
1969 strcat(arguments, ".x");
1970 break;
1971
1972 case CHANNEL_SOURCE_Y:
1973 strcat(arguments, reg_name);
1974 strcat(arguments, ".y");
1975 break;
1976
1977 case CHANNEL_SOURCE_Z:
1978 strcat(arguments, reg_name);
1979 strcat(arguments, ".z");
1980 break;
1981
1982 case CHANNEL_SOURCE_W:
1983 strcat(arguments, reg_name);
1984 strcat(arguments, ".w");
1985 break;
1986
1987 default:
1988 FIXME("Unhandled channel source %#x\n", channel_source);
1989 strcat(arguments, "undefined");
1990 break;
1991 }
1992
1993 if (sign_fixup) strcat(arguments, " * 2.0 - 1.0");
1994}
1995
1996static void shader_glsl_color_correction(const struct wined3d_shader_instruction *ins, struct color_fixup_desc fixup)
1997{
1998 struct wined3d_shader_dst_param dst;
1999 unsigned int mask_size, remaining;
2000 glsl_dst_param_t dst_param;
2001 char arguments[256];
2002 DWORD mask;
2003
2004 mask = 0;
2005 if (fixup.x_sign_fixup || fixup.x_source != CHANNEL_SOURCE_X) mask |= WINED3DSP_WRITEMASK_0;
2006 if (fixup.y_sign_fixup || fixup.y_source != CHANNEL_SOURCE_Y) mask |= WINED3DSP_WRITEMASK_1;
2007 if (fixup.z_sign_fixup || fixup.z_source != CHANNEL_SOURCE_Z) mask |= WINED3DSP_WRITEMASK_2;
2008 if (fixup.w_sign_fixup || fixup.w_source != CHANNEL_SOURCE_W) mask |= WINED3DSP_WRITEMASK_3;
2009 mask &= ins->dst[0].write_mask;
2010
2011 if (!mask) return; /* Nothing to do */
2012
2013 if (is_complex_fixup(fixup))
2014 {
2015 enum complex_fixup complex_fixup = get_complex_fixup(fixup);
2016 FIXME("Complex fixup (%#x) not supported\n",complex_fixup); (void)complex_fixup;
2017 return;
2018 }
2019
2020 mask_size = shader_glsl_get_write_mask_size(mask);
2021
2022 dst = ins->dst[0];
2023 dst.write_mask = mask;
2024 shader_glsl_add_dst_param(ins, &dst, &dst_param);
2025
2026 arguments[0] = '\0';
2027 remaining = mask_size;
2028 if (mask & WINED3DSP_WRITEMASK_0)
2029 {
2030 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.x_sign_fixup, fixup.x_source);
2031 if (--remaining) strcat(arguments, ", ");
2032 }
2033 if (mask & WINED3DSP_WRITEMASK_1)
2034 {
2035 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.y_sign_fixup, fixup.y_source);
2036 if (--remaining) strcat(arguments, ", ");
2037 }
2038 if (mask & WINED3DSP_WRITEMASK_2)
2039 {
2040 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.z_sign_fixup, fixup.z_source);
2041 if (--remaining) strcat(arguments, ", ");
2042 }
2043 if (mask & WINED3DSP_WRITEMASK_3)
2044 {
2045 shader_glsl_append_fixup_arg(arguments, dst_param.reg_name, fixup.w_sign_fixup, fixup.w_source);
2046 if (--remaining) strcat(arguments, ", ");
2047 }
2048
2049 if (mask_size > 1)
2050 {
2051 shader_addline(ins->ctx->buffer, "%s%s = vec%u(%s);\n",
2052 dst_param.reg_name, dst_param.mask_str, mask_size, arguments);
2053 }
2054 else
2055 {
2056 shader_addline(ins->ctx->buffer, "%s%s = %s;\n", dst_param.reg_name, dst_param.mask_str, arguments);
2057 }
2058}
2059
2060static void PRINTF_ATTR(8, 9) shader_glsl_gen_sample_code(const struct wined3d_shader_instruction *ins,
2061 DWORD sampler, const glsl_sample_function_t *sample_function, DWORD swizzle,
2062 const char *dx, const char *dy,
2063 const char *bias, const char *coord_reg_fmt, ...)
2064{
2065 const char *sampler_base;
2066 char dst_swizzle[6];
2067 struct color_fixup_desc fixup;
2068 BOOL np2_fixup = FALSE;
2069 BOOL tmirror_tmp_reg = FALSE;
2070 va_list args;
2071
2072 shader_glsl_swizzle_to_str(swizzle, FALSE, ins->dst[0].write_mask, dst_swizzle);
2073
2074 if (shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
2075 {
2076 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
2077 fixup = priv->cur_ps_args->color_fixup[sampler];
2078 sampler_base = "Psampler";
2079
2080 if (priv->cur_ps_args->np2_fixup & (1 << sampler)) {
2081 if(bias) {
2082 FIXME("Biased sampling from NP2 textures is unsupported\n");
2083 } else {
2084 np2_fixup = TRUE;
2085 }
2086 }
2087
2088 if (priv->cur_ps_args->t_mirror & (1 << sampler))
2089 {
2090 if (ins->ctx->reg_maps->sampler_type[sampler]==WINED3DSTT_2D)
2091 {
2092 if (sample_function->coord_mask & WINED3DSP_WRITEMASK_1)
2093 {
2094 glsl_src_param_t coord_param;
2095 shader_glsl_add_src_param(ins, &ins->src[0], sample_function->coord_mask, &coord_param);
2096
2097 if (ins->src[0].reg.type != WINED3DSPR_INPUT)
2098 {
2099 shader_addline(ins->ctx->buffer, "%s.y=1.0-%s.y;\n",
2100 coord_param.reg_name, coord_param.reg_name);
2101 }
2102 else
2103 {
2104 tmirror_tmp_reg = TRUE;
2105 shader_addline(ins->ctx->buffer, "tmp0.xy=vec2(%s.x, 1.0-%s.y).xy;\n",
2106 coord_param.reg_name, coord_param.reg_name);
2107 }
2108 }
2109 else
2110 {
2111 DebugBreak();
2112 FIXME("Unexpected coord_mask with t_mirror\n");
2113 }
2114 }
2115 }
2116 } else {
2117 sampler_base = "Vsampler";
2118 fixup = COLOR_FIXUP_IDENTITY; /* FIXME: Vshader color fixup */
2119 }
2120
2121 shader_glsl_append_dst(ins->ctx->buffer, ins);
2122
2123 shader_addline(ins->ctx->buffer, "%s(%s%u, ", sample_function->name, sampler_base, sampler);
2124
2125 if (tmirror_tmp_reg)
2126 {
2127 shader_addline(ins->ctx->buffer, "%s", "tmp0.xy");
2128 }
2129 else
2130 {
2131 va_start(args, coord_reg_fmt);
2132 shader_vaddline(ins->ctx->buffer, coord_reg_fmt, args);
2133 va_end(args);
2134 }
2135
2136 if(bias) {
2137 shader_addline(ins->ctx->buffer, ", %s)%s);\n", bias, dst_swizzle);
2138 } else {
2139 if (np2_fixup) {
2140 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
2141 const unsigned char idx = priv->cur_np2fixup_info->idx[sampler];
2142
2143 shader_addline(ins->ctx->buffer, " * PsamplerNP2Fixup[%u].%s)%s);\n", idx >> 1,
2144 (idx % 2) ? "zw" : "xy", dst_swizzle);
2145 } else if(dx && dy) {
2146 shader_addline(ins->ctx->buffer, ", %s, %s)%s);\n", dx, dy, dst_swizzle);
2147 } else {
2148 shader_addline(ins->ctx->buffer, ")%s);\n", dst_swizzle);
2149 }
2150 }
2151
2152 if(!is_identity_fixup(fixup)) {
2153 shader_glsl_color_correction(ins, fixup);
2154 }
2155}
2156
2157/*****************************************************************************
2158 * Begin processing individual instruction opcodes
2159 ****************************************************************************/
2160
2161/* Generate GLSL arithmetic functions (dst = src1 + src2) */
2162static void shader_glsl_arith(const struct wined3d_shader_instruction *ins)
2163{
2164 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2165 glsl_src_param_t src0_param;
2166 glsl_src_param_t src1_param;
2167 DWORD write_mask;
2168 char op;
2169
2170 /* Determine the GLSL operator to use based on the opcode */
2171 switch (ins->handler_idx)
2172 {
2173 case WINED3DSIH_MUL: op = '*'; break;
2174 case WINED3DSIH_ADD: op = '+'; break;
2175 case WINED3DSIH_SUB: op = '-'; break;
2176 default:
2177 op = ' ';
2178 FIXME("Opcode %#x not yet handled in GLSL\n", ins->handler_idx);
2179 break;
2180 }
2181
2182 write_mask = shader_glsl_append_dst(buffer, ins);
2183 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2184 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2185 shader_addline(buffer, "%s %c %s);\n", src0_param.param_str, op, src1_param.param_str);
2186}
2187
2188#ifdef VBOX_WITH_VMSVGA
2189static void shader_glsl_mov_impl(const struct wined3d_shader_instruction *ins, int p0_idx);
2190
2191/* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */
2192static void shader_glsl_mov(const struct wined3d_shader_instruction *ins)
2193{
2194 if (ins->predicate)
2195 {
2196 int i;
2197 DWORD dst_mask = ins->dst[0].write_mask;
2198 struct wined3d_shader_dst_param *dst = (struct wined3d_shader_dst_param *)&ins->dst[0];
2199
2200 for (i = 0; i < 4; i++)
2201 {
2202 if (dst_mask & RT_BIT(i))
2203 {
2204 dst->write_mask = RT_BIT(i);
2205
2206 shader_glsl_mov_impl(ins, i);
2207 }
2208 }
2209 dst->write_mask = dst_mask;
2210 }
2211 else
2212 shader_glsl_mov_impl(ins, 0);
2213}
2214
2215/* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */
2216static void shader_glsl_mov_impl(const struct wined3d_shader_instruction *ins, int p0_idx)
2217
2218#else
2219/* Process the WINED3DSIO_MOV opcode using GLSL (dst = src) */
2220static void shader_glsl_mov(const struct wined3d_shader_instruction *ins)
2221#endif
2222{
2223 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
2224 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2225 glsl_src_param_t src0_param;
2226 DWORD write_mask;
2227
2228#ifdef VBOX_WITH_VMSVGA
2229 if (ins->predicate)
2230 {
2231 shader_addline(buffer, "if (p0[%d]) {\n", p0_idx);
2232 }
2233#endif
2234
2235 write_mask = shader_glsl_append_dst(buffer, ins);
2236 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2237
2238 /* In vs_1_1 WINED3DSIO_MOV can write to the address register. In later
2239 * shader versions WINED3DSIO_MOVA is used for this. */
2240 if (ins->ctx->reg_maps->shader_version.major == 1
2241 && !shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)
2242 && ins->dst[0].reg.type == WINED3DSPR_ADDR)
2243 {
2244 /* This is a simple floor() */
2245 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
2246 if (mask_size > 1) {
2247 shader_addline(buffer, "ivec%d(floor(%s)));\n", mask_size, src0_param.param_str);
2248 } else {
2249 shader_addline(buffer, "int(floor(%s)));\n", src0_param.param_str);
2250 }
2251 }
2252 else if(ins->handler_idx == WINED3DSIH_MOVA)
2253 {
2254 /* We need to *round* to the nearest int here. */
2255 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
2256
2257 if (gl_info->supported[EXT_GPU_SHADER4])
2258 {
2259 if (mask_size > 1)
2260 shader_addline(buffer, "ivec%d(round(%s)));\n", mask_size, src0_param.param_str);
2261 else
2262 shader_addline(buffer, "int(round(%s)));\n", src0_param.param_str);
2263 }
2264 else
2265 {
2266 if (mask_size > 1)
2267 shader_addline(buffer, "ivec%d(floor(abs(%s) + vec%d(0.5)) * sign(%s)));\n",
2268 mask_size, src0_param.param_str, mask_size, src0_param.param_str);
2269 else
2270 shader_addline(buffer, "int(floor(abs(%s) + 0.5) * sign(%s)));\n",
2271 src0_param.param_str, src0_param.param_str);
2272 }
2273 }
2274 else
2275 {
2276 shader_addline(buffer, "%s);\n", src0_param.param_str);
2277 }
2278#ifdef VBOX_WITH_VMSVGA
2279 if (ins->predicate)
2280 {
2281 shader_addline(buffer, "}\n");
2282 }
2283#endif
2284}
2285
2286/* Process the dot product operators DP3 and DP4 in GLSL (dst = dot(src0, src1)) */
2287static void shader_glsl_dot(const struct wined3d_shader_instruction *ins)
2288{
2289 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2290 glsl_src_param_t src0_param;
2291 glsl_src_param_t src1_param;
2292 DWORD dst_write_mask, src_write_mask;
2293 unsigned int dst_size = 0;
2294
2295 dst_write_mask = shader_glsl_append_dst(buffer, ins);
2296 dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
2297
2298 /* dp3 works on vec3, dp4 on vec4 */
2299 if (ins->handler_idx == WINED3DSIH_DP4)
2300 {
2301 src_write_mask = WINED3DSP_WRITEMASK_ALL;
2302 } else {
2303 src_write_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2304 }
2305
2306 shader_glsl_add_src_param(ins, &ins->src[0], src_write_mask, &src0_param);
2307 shader_glsl_add_src_param(ins, &ins->src[1], src_write_mask, &src1_param);
2308
2309 if (dst_size > 1) {
2310 shader_addline(buffer, "vec%d(dot(%s, %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
2311 } else {
2312 shader_addline(buffer, "dot(%s, %s));\n", src0_param.param_str, src1_param.param_str);
2313 }
2314}
2315
2316/* Note that this instruction has some restrictions. The destination write mask
2317 * can't contain the w component, and the source swizzles have to be .xyzw */
2318static void shader_glsl_cross(const struct wined3d_shader_instruction *ins)
2319{
2320 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
2321 glsl_src_param_t src0_param;
2322 glsl_src_param_t src1_param;
2323 char dst_mask[6];
2324
2325 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2326 shader_glsl_append_dst(ins->ctx->buffer, ins);
2327 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
2328 shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param);
2329 shader_addline(ins->ctx->buffer, "cross(%s, %s)%s);\n", src0_param.param_str, src1_param.param_str, dst_mask);
2330}
2331
2332/* Process the WINED3DSIO_POW instruction in GLSL (dst = |src0|^src1)
2333 * Src0 and src1 are scalars. Note that D3D uses the absolute of src0, while
2334 * GLSL uses the value as-is. */
2335static void shader_glsl_pow(const struct wined3d_shader_instruction *ins)
2336{
2337 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2338 glsl_src_param_t src0_param;
2339 glsl_src_param_t src1_param;
2340 DWORD dst_write_mask;
2341 unsigned int dst_size;
2342
2343 dst_write_mask = shader_glsl_append_dst(buffer, ins);
2344 dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
2345
2346 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2347 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
2348
2349 if (dst_size > 1) {
2350 shader_addline(buffer, "vec%d(pow(abs(%s), %s)));\n", dst_size, src0_param.param_str, src1_param.param_str);
2351 } else {
2352 shader_addline(buffer, "pow(abs(%s), %s));\n", src0_param.param_str, src1_param.param_str);
2353 }
2354}
2355
2356/* Process the WINED3DSIO_LOG instruction in GLSL (dst = log2(|src0|))
2357 * Src0 is a scalar. Note that D3D uses the absolute of src0, while
2358 * GLSL uses the value as-is. */
2359static void shader_glsl_log(const struct wined3d_shader_instruction *ins)
2360{
2361 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2362 glsl_src_param_t src0_param;
2363 DWORD dst_write_mask;
2364 unsigned int dst_size;
2365
2366 dst_write_mask = shader_glsl_append_dst(buffer, ins);
2367 dst_size = shader_glsl_get_write_mask_size(dst_write_mask);
2368
2369 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2370
2371 if (dst_size > 1)
2372 {
2373 shader_addline(buffer, "vec%d(%s == 0.0 ? -FLT_MAX : log2(abs(%s))));\n",
2374 dst_size, src0_param.param_str, src0_param.param_str);
2375 }
2376 else
2377 {
2378 shader_addline(buffer, "%s == 0.0 ? -FLT_MAX : log2(abs(%s)));\n",
2379 src0_param.param_str, src0_param.param_str);
2380 }
2381}
2382
2383/* Map the opcode 1-to-1 to the GL code (arg->dst = instruction(src0, src1, ...) */
2384static void shader_glsl_map2gl(const struct wined3d_shader_instruction *ins)
2385{
2386 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2387 glsl_src_param_t src_param;
2388 const char *instruction;
2389 DWORD write_mask;
2390 unsigned i;
2391
2392 /* Determine the GLSL function to use based on the opcode */
2393 /* TODO: Possibly make this a table for faster lookups */
2394 switch (ins->handler_idx)
2395 {
2396 case WINED3DSIH_MIN: instruction = "min"; break;
2397 case WINED3DSIH_MAX: instruction = "max"; break;
2398 case WINED3DSIH_ABS: instruction = "abs"; break;
2399 case WINED3DSIH_FRC: instruction = "fract"; break;
2400 case WINED3DSIH_EXP: instruction = "exp2"; break;
2401 case WINED3DSIH_DSX: instruction = "dFdx"; break;
2402 case WINED3DSIH_DSY: instruction = "ycorrection.y * dFdy"; break;
2403 default: instruction = "";
2404 FIXME("Opcode %#x not yet handled in GLSL\n", ins->handler_idx);
2405 break;
2406 }
2407
2408 write_mask = shader_glsl_append_dst(buffer, ins);
2409
2410 shader_addline(buffer, "%s(", instruction);
2411
2412 if (ins->src_count)
2413 {
2414 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
2415 shader_addline(buffer, "%s", src_param.param_str);
2416 for (i = 1; i < ins->src_count; ++i)
2417 {
2418 shader_glsl_add_src_param(ins, &ins->src[i], write_mask, &src_param);
2419 shader_addline(buffer, ", %s", src_param.param_str);
2420 }
2421 }
2422
2423 shader_addline(buffer, "));\n");
2424}
2425
2426static void shader_glsl_nrm(const struct wined3d_shader_instruction *ins)
2427{
2428 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2429 glsl_src_param_t src_param;
2430 unsigned int mask_size;
2431 DWORD write_mask;
2432 char dst_mask[6];
2433
2434 write_mask = shader_glsl_get_write_mask(ins->dst, dst_mask);
2435 mask_size = shader_glsl_get_write_mask_size(write_mask);
2436 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param);
2437
2438 shader_addline(buffer, "tmp0.x = length(%s);\n", src_param.param_str);
2439 shader_glsl_append_dst(buffer, ins);
2440 if (mask_size > 1)
2441 {
2442 shader_addline(buffer, "tmp0.x == 0.0 ? vec%u(0.0) : (%s / tmp0.x));\n",
2443 mask_size, src_param.param_str);
2444 }
2445 else
2446 {
2447 shader_addline(buffer, "tmp0.x == 0.0 ? 0.0 : (%s / tmp0.x));\n",
2448 src_param.param_str);
2449 }
2450}
2451
2452/** Process the WINED3DSIO_EXPP instruction in GLSL:
2453 * For shader model 1.x, do the following (and honor the writemask, so use a temporary variable):
2454 * dst.x = 2^(floor(src))
2455 * dst.y = src - floor(src)
2456 * dst.z = 2^src (partial precision is allowed, but optional)
2457 * dst.w = 1.0;
2458 * For 2.0 shaders, just do this (honoring writemask and swizzle):
2459 * dst = 2^src; (partial precision is allowed, but optional)
2460 */
2461static void shader_glsl_expp(const struct wined3d_shader_instruction *ins)
2462{
2463 glsl_src_param_t src_param;
2464
2465 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src_param);
2466
2467 if (ins->ctx->reg_maps->shader_version.major < 2)
2468 {
2469 char dst_mask[6];
2470
2471 shader_addline(ins->ctx->buffer, "tmp0.x = exp2(floor(%s));\n", src_param.param_str);
2472 shader_addline(ins->ctx->buffer, "tmp0.y = %s - floor(%s);\n", src_param.param_str, src_param.param_str);
2473 shader_addline(ins->ctx->buffer, "tmp0.z = exp2(%s);\n", src_param.param_str);
2474 shader_addline(ins->ctx->buffer, "tmp0.w = 1.0;\n");
2475
2476 shader_glsl_append_dst(ins->ctx->buffer, ins);
2477 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2478 shader_addline(ins->ctx->buffer, "tmp0%s);\n", dst_mask);
2479 } else {
2480 DWORD write_mask;
2481 unsigned int mask_size;
2482
2483 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2484 mask_size = shader_glsl_get_write_mask_size(write_mask);
2485
2486 if (mask_size > 1) {
2487 shader_addline(ins->ctx->buffer, "vec%d(exp2(%s)));\n", mask_size, src_param.param_str);
2488 } else {
2489 shader_addline(ins->ctx->buffer, "exp2(%s));\n", src_param.param_str);
2490 }
2491 }
2492}
2493
2494/** Process the RCP (reciprocal or inverse) opcode in GLSL (dst = 1 / src) */
2495static void shader_glsl_rcp(const struct wined3d_shader_instruction *ins)
2496{
2497 glsl_src_param_t src_param;
2498 DWORD write_mask;
2499 unsigned int mask_size;
2500
2501 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2502 mask_size = shader_glsl_get_write_mask_size(write_mask);
2503 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src_param);
2504
2505 if (mask_size > 1)
2506 {
2507 shader_addline(ins->ctx->buffer, "vec%d(%s == 0.0 ? FLT_MAX : 1.0 / %s));\n",
2508 mask_size, src_param.param_str, src_param.param_str);
2509 }
2510 else
2511 {
2512 shader_addline(ins->ctx->buffer, "%s == 0.0 ? FLT_MAX : 1.0 / %s);\n",
2513 src_param.param_str, src_param.param_str);
2514 }
2515}
2516
2517static void shader_glsl_rsq(const struct wined3d_shader_instruction *ins)
2518{
2519 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2520 glsl_src_param_t src_param;
2521 DWORD write_mask;
2522 unsigned int mask_size;
2523
2524 write_mask = shader_glsl_append_dst(buffer, ins);
2525 mask_size = shader_glsl_get_write_mask_size(write_mask);
2526
2527 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src_param);
2528
2529 if (mask_size > 1)
2530 {
2531 shader_addline(buffer, "vec%d(%s == 0.0 ? FLT_MAX : inversesqrt(abs(%s))));\n",
2532 mask_size, src_param.param_str, src_param.param_str);
2533 }
2534 else
2535 {
2536 shader_addline(buffer, "%s == 0.0 ? FLT_MAX : inversesqrt(abs(%s)));\n",
2537 src_param.param_str, src_param.param_str);
2538 }
2539}
2540
2541#ifdef VBOX_WITH_VMSVGA
2542static void shader_glsl_setp(const struct wined3d_shader_instruction *ins)
2543{
2544 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
2545 glsl_src_param_t src_param1, src_param2;
2546 DWORD write_mask;
2547
2548 int i;
2549 DWORD dst_mask = ins->dst[0].write_mask;
2550 struct wined3d_shader_dst_param dst = ins->dst[0];
2551
2552 /* Cycle through all source0 channels */
2553 for (i=0; i<4; i++) {
2554 if (dst_mask & RT_BIT(i))
2555 {
2556 write_mask = WINED3DSP_WRITEMASK_0 << i;
2557 dst.write_mask = dst_mask & write_mask;
2558
2559 write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst);
2560 Assert(write_mask);
2561
2562 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src_param1);
2563 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src_param2);
2564
2565 shader_addline(buffer, "%s %s %s);\n",
2566 src_param1.param_str, shader_get_comp_op(ins->flags), src_param2.param_str);
2567 }
2568 }
2569}
2570#endif
2571
2572/** Process signed comparison opcodes in GLSL. */
2573static void shader_glsl_compare(const struct wined3d_shader_instruction *ins)
2574{
2575 glsl_src_param_t src0_param;
2576 glsl_src_param_t src1_param;
2577 DWORD write_mask;
2578 unsigned int mask_size;
2579
2580 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2581 mask_size = shader_glsl_get_write_mask_size(write_mask);
2582 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2583 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2584
2585 if (mask_size > 1) {
2586 const char *compare;
2587
2588 switch(ins->handler_idx)
2589 {
2590 case WINED3DSIH_SLT: compare = "lessThan"; break;
2591 case WINED3DSIH_SGE: compare = "greaterThanEqual"; break;
2592 default: compare = "";
2593 FIXME("Can't handle opcode %#x\n", ins->handler_idx);
2594 }
2595
2596 shader_addline(ins->ctx->buffer, "vec%d(%s(%s, %s)));\n", mask_size, compare,
2597 src0_param.param_str, src1_param.param_str);
2598 } else {
2599 switch(ins->handler_idx)
2600 {
2601 case WINED3DSIH_SLT:
2602 /* Step(src0, src1) is not suitable here because if src0 == src1 SLT is supposed,
2603 * to return 0.0 but step returns 1.0 because step is not < x
2604 * An alternative is a bvec compare padded with an unused second component.
2605 * step(src1 * -1.0, src0 * -1.0) is not an option because it suffers from the same
2606 * issue. Playing with not() is not possible either because not() does not accept
2607 * a scalar.
2608 */
2609 shader_addline(ins->ctx->buffer, "(%s < %s) ? 1.0 : 0.0);\n",
2610 src0_param.param_str, src1_param.param_str);
2611 break;
2612 case WINED3DSIH_SGE:
2613 /* Here we can use the step() function and safe a conditional */
2614 shader_addline(ins->ctx->buffer, "step(%s, %s));\n", src1_param.param_str, src0_param.param_str);
2615 break;
2616 default:
2617 FIXME("Can't handle opcode %#x\n", ins->handler_idx);
2618 }
2619
2620 }
2621}
2622
2623/** Process CMP instruction in GLSL (dst = src0 >= 0.0 ? src1 : src2), per channel */
2624static void shader_glsl_cmp(const struct wined3d_shader_instruction *ins)
2625{
2626 glsl_src_param_t src0_param;
2627 glsl_src_param_t src1_param;
2628 glsl_src_param_t src2_param;
2629 DWORD write_mask, cmp_channel = 0;
2630 unsigned int i, j;
2631 char mask_char[6];
2632 BOOL temp_destination = FALSE;
2633
2634 if (shader_is_scalar(&ins->src[0].reg))
2635 {
2636 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2637
2638 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
2639 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2640 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2641
2642 shader_addline(ins->ctx->buffer, "%s >= 0.0 ? %s : %s);\n",
2643 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2644 } else {
2645 DWORD dst_mask = ins->dst[0].write_mask;
2646 struct wined3d_shader_dst_param dst = ins->dst[0];
2647
2648 /* Cycle through all source0 channels */
2649 for (i=0; i<4; i++) {
2650 write_mask = 0;
2651 /* Find the destination channels which use the current source0 channel */
2652 for (j=0; j<4; j++) {
2653 if (((ins->src[0].swizzle >> (2 * j)) & 0x3) == i)
2654 {
2655 write_mask |= WINED3DSP_WRITEMASK_0 << j;
2656 cmp_channel = WINED3DSP_WRITEMASK_0 << j;
2657 }
2658 }
2659 dst.write_mask = dst_mask & write_mask;
2660
2661 /* Splitting the cmp instruction up in multiple lines imposes a problem:
2662 * The first lines may overwrite source parameters of the following lines.
2663 * Deal with that by using a temporary destination register if needed
2664 */
2665 if ((ins->src[0].reg.idx == ins->dst[0].reg.idx
2666 && ins->src[0].reg.type == ins->dst[0].reg.type)
2667 || (ins->src[1].reg.idx == ins->dst[0].reg.idx
2668 && ins->src[1].reg.type == ins->dst[0].reg.type)
2669 || (ins->src[2].reg.idx == ins->dst[0].reg.idx
2670 && ins->src[2].reg.type == ins->dst[0].reg.type))
2671 {
2672 write_mask = shader_glsl_get_write_mask(&dst, mask_char);
2673 if (!write_mask) continue;
2674 shader_addline(ins->ctx->buffer, "tmp0%s = (", mask_char);
2675 temp_destination = TRUE;
2676 } else {
2677 write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst);
2678 if (!write_mask) continue;
2679 }
2680
2681 shader_glsl_add_src_param(ins, &ins->src[0], cmp_channel, &src0_param);
2682 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2683 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2684
2685 shader_addline(ins->ctx->buffer, "%s >= 0.0 ? %s : %s);\n",
2686 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2687 }
2688
2689 if(temp_destination) {
2690 shader_glsl_get_write_mask(&ins->dst[0], mask_char);
2691 shader_glsl_append_dst(ins->ctx->buffer, ins);
2692 shader_addline(ins->ctx->buffer, "tmp0%s);\n", mask_char);
2693 }
2694 }
2695
2696}
2697
2698/** Process the CND opcode in GLSL (dst = (src0 > 0.5) ? src1 : src2) */
2699/* For ps 1.1-1.3, only a single component of src0 is used. For ps 1.4
2700 * the compare is done per component of src0. */
2701static void shader_glsl_cnd(const struct wined3d_shader_instruction *ins)
2702{
2703 struct wined3d_shader_dst_param dst;
2704 glsl_src_param_t src0_param;
2705 glsl_src_param_t src1_param;
2706 glsl_src_param_t src2_param;
2707 DWORD write_mask, cmp_channel = 0;
2708 unsigned int i, j;
2709 DWORD dst_mask;
2710 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
2711 ins->ctx->reg_maps->shader_version.minor);
2712
2713 if (shader_version < WINED3D_SHADER_VERSION(1, 4))
2714 {
2715 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2716 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2717 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2718 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2719
2720 /* Fun: The D3DSI_COISSUE flag changes the semantic of the cnd instruction for < 1.4 shaders */
2721 if (ins->coissue)
2722 {
2723 shader_addline(ins->ctx->buffer, "%s /* COISSUE! */);\n", src1_param.param_str);
2724 } else {
2725 shader_addline(ins->ctx->buffer, "%s > 0.5 ? %s : %s);\n",
2726 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2727 }
2728 return;
2729 }
2730 /* Cycle through all source0 channels */
2731 dst_mask = ins->dst[0].write_mask;
2732 dst = ins->dst[0];
2733 for (i=0; i<4; i++) {
2734 write_mask = 0;
2735 /* Find the destination channels which use the current source0 channel */
2736 for (j=0; j<4; j++) {
2737 if (((ins->src[0].swizzle >> (2 * j)) & 0x3) == i)
2738 {
2739 write_mask |= WINED3DSP_WRITEMASK_0 << j;
2740 cmp_channel = WINED3DSP_WRITEMASK_0 << j;
2741 }
2742 }
2743
2744 dst.write_mask = dst_mask & write_mask;
2745 write_mask = shader_glsl_append_dst_ext(ins->ctx->buffer, ins, &dst);
2746 if (!write_mask) continue;
2747
2748 shader_glsl_add_src_param(ins, &ins->src[0], cmp_channel, &src0_param);
2749 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2750 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2751
2752 shader_addline(ins->ctx->buffer, "%s > 0.5 ? %s : %s);\n",
2753 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2754 }
2755}
2756
2757/** GLSL code generation for WINED3DSIO_MAD: Multiply the first 2 opcodes, then add the last */
2758static void shader_glsl_mad(const struct wined3d_shader_instruction *ins)
2759{
2760 glsl_src_param_t src0_param;
2761 glsl_src_param_t src1_param;
2762 glsl_src_param_t src2_param;
2763 DWORD write_mask;
2764
2765 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2766 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2767 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2768 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2769 shader_addline(ins->ctx->buffer, "(%s * %s) + %s);\n",
2770 src0_param.param_str, src1_param.param_str, src2_param.param_str);
2771}
2772
2773/* Handles transforming all WINED3DSIO_M?x? opcodes for
2774 Vertex shaders to GLSL codes */
2775static void shader_glsl_mnxn(const struct wined3d_shader_instruction *ins)
2776{
2777 int i;
2778 int nComponents = 0;
2779 struct wined3d_shader_dst_param tmp_dst = {{0}};
2780 struct wined3d_shader_src_param tmp_src[2] = {{{0}}};
2781 struct wined3d_shader_instruction tmp_ins;
2782
2783 memset(&tmp_ins, 0, sizeof(tmp_ins));
2784
2785 /* Set constants for the temporary argument */
2786 tmp_ins.ctx = ins->ctx;
2787 tmp_ins.dst_count = 1;
2788 tmp_ins.dst = &tmp_dst;
2789 tmp_ins.src_count = 2;
2790 tmp_ins.src = tmp_src;
2791
2792 switch(ins->handler_idx)
2793 {
2794 case WINED3DSIH_M4x4:
2795 nComponents = 4;
2796 tmp_ins.handler_idx = WINED3DSIH_DP4;
2797 break;
2798 case WINED3DSIH_M4x3:
2799 nComponents = 3;
2800 tmp_ins.handler_idx = WINED3DSIH_DP4;
2801 break;
2802 case WINED3DSIH_M3x4:
2803 nComponents = 4;
2804 tmp_ins.handler_idx = WINED3DSIH_DP3;
2805 break;
2806 case WINED3DSIH_M3x3:
2807 nComponents = 3;
2808 tmp_ins.handler_idx = WINED3DSIH_DP3;
2809 break;
2810 case WINED3DSIH_M3x2:
2811 nComponents = 2;
2812 tmp_ins.handler_idx = WINED3DSIH_DP3;
2813 break;
2814 default:
2815 break;
2816 }
2817
2818 tmp_dst = ins->dst[0];
2819 tmp_src[0] = ins->src[0];
2820 tmp_src[1] = ins->src[1];
2821 for (i = 0; i < nComponents; ++i)
2822 {
2823 tmp_dst.write_mask = WINED3DSP_WRITEMASK_0 << i;
2824 shader_glsl_dot(&tmp_ins);
2825 ++tmp_src[1].reg.idx;
2826 }
2827}
2828
2829/**
2830 The LRP instruction performs a component-wise linear interpolation
2831 between the second and third operands using the first operand as the
2832 blend factor. Equation: (dst = src2 + src0 * (src1 - src2))
2833 This is equivalent to mix(src2, src1, src0);
2834*/
2835static void shader_glsl_lrp(const struct wined3d_shader_instruction *ins)
2836{
2837 glsl_src_param_t src0_param;
2838 glsl_src_param_t src1_param;
2839 glsl_src_param_t src2_param;
2840 DWORD write_mask;
2841
2842 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2843
2844 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2845 shader_glsl_add_src_param(ins, &ins->src[1], write_mask, &src1_param);
2846 shader_glsl_add_src_param(ins, &ins->src[2], write_mask, &src2_param);
2847
2848 shader_addline(ins->ctx->buffer, "mix(%s, %s, %s));\n",
2849 src2_param.param_str, src1_param.param_str, src0_param.param_str);
2850}
2851
2852/** Process the WINED3DSIO_LIT instruction in GLSL:
2853 * dst.x = dst.w = 1.0
2854 * dst.y = (src0.x > 0) ? src0.x
2855 * dst.z = (src0.x > 0) ? ((src0.y > 0) ? pow(src0.y, src.w) : 0) : 0
2856 * where src.w is clamped at +- 128
2857 */
2858static void shader_glsl_lit(const struct wined3d_shader_instruction *ins)
2859{
2860 glsl_src_param_t src0_param;
2861 glsl_src_param_t src1_param;
2862 glsl_src_param_t src3_param;
2863 char dst_mask[6];
2864
2865 shader_glsl_append_dst(ins->ctx->buffer, ins);
2866 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2867
2868 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2869 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_1, &src1_param);
2870 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &src3_param);
2871
2872 /* The sdk specifies the instruction like this
2873 * dst.x = 1.0;
2874 * if(src.x > 0.0) dst.y = src.x
2875 * else dst.y = 0.0.
2876 * if(src.x > 0.0 && src.y > 0.0) dst.z = pow(src.y, power);
2877 * else dst.z = 0.0;
2878 * dst.w = 1.0;
2879 *
2880 * Obviously that has quite a few conditionals in it which we don't like. So the first step is this:
2881 * dst.x = 1.0 ... No further explanation needed
2882 * dst.y = max(src.y, 0.0); ... If x < 0.0, use 0.0, otherwise x. Same as the conditional
2883 * dst.z = x > 0.0 ? pow(max(y, 0.0), p) : 0; ... 0 ^ power is 0, and otherwise we use y anyway
2884 * dst.w = 1.0. ... Nothing fancy.
2885 *
2886 * So we still have one conditional in there. So do this:
2887 * dst.z = pow(max(0.0, src.y) * step(0.0, src.x), power);
2888 *
2889 * step(0.0, x) will return 1 if src.x > 0.0, and 0 otherwise. So if y is 0 we get pow(0.0 * 1.0, power),
2890 * which sets dst.z to 0. If y > 0, but x = 0.0, we get pow(y * 0.0, power), which results in 0 too.
2891 * if both x and y are > 0, we get pow(y * 1.0, power), as it is supposed to
2892 */
2893 shader_addline(ins->ctx->buffer,
2894 "vec4(1.0, max(%s, 0.0), pow(max(0.0, %s) * step(0.0, %s), clamp(%s, -128.0, 128.0)), 1.0)%s);\n",
2895 src0_param.param_str, src1_param.param_str, src0_param.param_str, src3_param.param_str, dst_mask);
2896}
2897
2898/** Process the WINED3DSIO_DST instruction in GLSL:
2899 * dst.x = 1.0
2900 * dst.y = src0.x * src0.y
2901 * dst.z = src0.z
2902 * dst.w = src1.w
2903 */
2904static void shader_glsl_dst(const struct wined3d_shader_instruction *ins)
2905{
2906 glsl_src_param_t src0y_param;
2907 glsl_src_param_t src0z_param;
2908 glsl_src_param_t src1y_param;
2909 glsl_src_param_t src1w_param;
2910 char dst_mask[6];
2911
2912 shader_glsl_append_dst(ins->ctx->buffer, ins);
2913 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
2914
2915 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_1, &src0y_param);
2916 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &src0z_param);
2917 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_1, &src1y_param);
2918 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_3, &src1w_param);
2919
2920 shader_addline(ins->ctx->buffer, "vec4(1.0, %s * %s, %s, %s))%s;\n",
2921 src0y_param.param_str, src1y_param.param_str, src0z_param.param_str, src1w_param.param_str, dst_mask);
2922}
2923
2924/** Process the WINED3DSIO_SINCOS instruction in GLSL:
2925 * VS 2.0 requires that specific cosine and sine constants be passed to this instruction so the hardware
2926 * can handle it. But, these functions are built-in for GLSL, so we can just ignore the last 2 params.
2927 *
2928 * dst.x = cos(src0.?)
2929 * dst.y = sin(src0.?)
2930 * dst.z = dst.z
2931 * dst.w = dst.w
2932 */
2933static void shader_glsl_sincos(const struct wined3d_shader_instruction *ins)
2934{
2935 glsl_src_param_t src0_param;
2936 DWORD write_mask;
2937
2938 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2939 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
2940
2941 switch (write_mask) {
2942 case WINED3DSP_WRITEMASK_0:
2943 shader_addline(ins->ctx->buffer, "cos(%s));\n", src0_param.param_str);
2944 break;
2945
2946 case WINED3DSP_WRITEMASK_1:
2947 shader_addline(ins->ctx->buffer, "sin(%s));\n", src0_param.param_str);
2948 break;
2949
2950 case (WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1):
2951 shader_addline(ins->ctx->buffer, "vec2(cos(%s), sin(%s)));\n", src0_param.param_str, src0_param.param_str);
2952 break;
2953
2954 default:
2955 ERR("Write mask should be .x, .y or .xy\n");
2956 break;
2957 }
2958}
2959
2960/* sgn in vs_2_0 has 2 extra parameters(registers for temporary storage) which we don't use
2961 * here. But those extra parameters require a dedicated function for sgn, since map2gl would
2962 * generate invalid code
2963 */
2964static void shader_glsl_sgn(const struct wined3d_shader_instruction *ins)
2965{
2966 glsl_src_param_t src0_param;
2967 DWORD write_mask;
2968
2969 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
2970 shader_glsl_add_src_param(ins, &ins->src[0], write_mask, &src0_param);
2971
2972 shader_addline(ins->ctx->buffer, "sign(%s));\n", src0_param.param_str);
2973}
2974
2975/** Process the WINED3DSIO_LOOP instruction in GLSL:
2976 * Start a for() loop where src1.y is the initial value of aL,
2977 * increment aL by src1.z for a total of src1.x iterations.
2978 * Need to use a temporary variable for this operation.
2979 */
2980/* FIXME: I don't think nested loops will work correctly this way. */
2981static void shader_glsl_loop(const struct wined3d_shader_instruction *ins)
2982{
2983 glsl_src_param_t src1_param;
2984 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
2985 const DWORD *control_values = NULL;
2986 const local_constant *constant;
2987
2988 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_ALL, &src1_param);
2989
2990 /* Try to hardcode the loop control parameters if possible. Direct3D 9 class hardware doesn't support real
2991 * varying indexing, but Microsoft designed this feature for Shader model 2.x+. If the loop control is
2992 * known at compile time, the GLSL compiler can unroll the loop, and replace indirect addressing with direct
2993 * addressing.
2994 */
2995 if (ins->src[1].reg.type == WINED3DSPR_CONSTINT)
2996 {
2997 LIST_FOR_EACH_ENTRY(constant, &shader->baseShader.constantsI, local_constant, entry) {
2998 if (constant->idx == ins->src[1].reg.idx)
2999 {
3000 control_values = constant->value;
3001 break;
3002 }
3003 }
3004 }
3005
3006 if (control_values)
3007 {
3008 struct wined3d_shader_loop_control loop_control;
3009 loop_control.count = control_values[0];
3010 loop_control.start = control_values[1];
3011 loop_control.step = (int)control_values[2];
3012
3013 if (loop_control.step > 0)
3014 {
3015 shader_addline(ins->ctx->buffer, "for (aL%u = %u; aL%u < (%u * %d + %u); aL%u += %d) {\n",
3016 shader->baseShader.cur_loop_depth, loop_control.start,
3017 shader->baseShader.cur_loop_depth, loop_control.count, loop_control.step, loop_control.start,
3018 shader->baseShader.cur_loop_depth, loop_control.step);
3019 }
3020 else if (loop_control.step < 0)
3021 {
3022 shader_addline(ins->ctx->buffer, "for (aL%u = %u; aL%u > (%u * %d + %u); aL%u += %d) {\n",
3023 shader->baseShader.cur_loop_depth, loop_control.start,
3024 shader->baseShader.cur_loop_depth, loop_control.count, loop_control.step, loop_control.start,
3025 shader->baseShader.cur_loop_depth, loop_control.step);
3026 }
3027 else
3028 {
3029 shader_addline(ins->ctx->buffer, "for (aL%u = %u, tmpInt%u = 0; tmpInt%u < %u; tmpInt%u++) {\n",
3030 shader->baseShader.cur_loop_depth, loop_control.start, shader->baseShader.cur_loop_depth,
3031 shader->baseShader.cur_loop_depth, loop_control.count,
3032 shader->baseShader.cur_loop_depth);
3033 }
3034 } else {
3035 shader_addline(ins->ctx->buffer,
3036 "for (tmpInt%u = 0, aL%u = %s.y; tmpInt%u < %s.x; tmpInt%u++, aL%u += %s.z) {\n",
3037 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_regno,
3038 src1_param.reg_name, shader->baseShader.cur_loop_depth, src1_param.reg_name,
3039 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_regno, src1_param.reg_name);
3040 }
3041
3042 shader->baseShader.cur_loop_depth++;
3043 shader->baseShader.cur_loop_regno++;
3044}
3045
3046static void shader_glsl_end(const struct wined3d_shader_instruction *ins)
3047{
3048 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3049
3050 shader_addline(ins->ctx->buffer, "}\n");
3051
3052 if (ins->handler_idx == WINED3DSIH_ENDLOOP)
3053 {
3054 shader->baseShader.cur_loop_depth--;
3055 shader->baseShader.cur_loop_regno--;
3056 }
3057
3058 if (ins->handler_idx == WINED3DSIH_ENDREP)
3059 {
3060 shader->baseShader.cur_loop_depth--;
3061 }
3062}
3063
3064static void shader_glsl_rep(const struct wined3d_shader_instruction *ins)
3065{
3066 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3067 glsl_src_param_t src0_param;
3068 const DWORD *control_values = NULL;
3069 const local_constant *constant;
3070
3071 /* Try to hardcode local values to help the GLSL compiler to unroll and optimize the loop */
3072 if (ins->src[0].reg.type == WINED3DSPR_CONSTINT)
3073 {
3074 LIST_FOR_EACH_ENTRY(constant, &shader->baseShader.constantsI, local_constant, entry)
3075 {
3076 if (constant->idx == ins->src[0].reg.idx)
3077 {
3078 control_values = constant->value;
3079 break;
3080 }
3081 }
3082 }
3083
3084 if(control_values) {
3085 shader_addline(ins->ctx->buffer, "for (tmpInt%d = 0; tmpInt%d < %d; tmpInt%d++) {\n",
3086 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_depth,
3087 control_values[0], shader->baseShader.cur_loop_depth);
3088 } else {
3089 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3090 shader_addline(ins->ctx->buffer, "for (tmpInt%d = 0; tmpInt%d < %s; tmpInt%d++) {\n",
3091 shader->baseShader.cur_loop_depth, shader->baseShader.cur_loop_depth,
3092 src0_param.param_str, shader->baseShader.cur_loop_depth);
3093 }
3094 shader->baseShader.cur_loop_depth++;
3095}
3096
3097static void shader_glsl_if(const struct wined3d_shader_instruction *ins)
3098{
3099 glsl_src_param_t src0_param;
3100
3101 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3102 shader_addline(ins->ctx->buffer, "if (%s) {\n", src0_param.param_str);
3103}
3104
3105static void shader_glsl_ifc(const struct wined3d_shader_instruction *ins)
3106{
3107 glsl_src_param_t src0_param;
3108 glsl_src_param_t src1_param;
3109
3110 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3111 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
3112
3113 shader_addline(ins->ctx->buffer, "if (%s %s %s) {\n",
3114 src0_param.param_str, shader_get_comp_op(ins->flags), src1_param.param_str);
3115}
3116
3117static void shader_glsl_else(const struct wined3d_shader_instruction *ins)
3118{
3119 shader_addline(ins->ctx->buffer, "} else {\n");
3120}
3121
3122static void shader_glsl_break(const struct wined3d_shader_instruction *ins)
3123{
3124 shader_addline(ins->ctx->buffer, "break;\n");
3125}
3126
3127/* FIXME: According to MSDN the compare is done per component. */
3128static void shader_glsl_breakc(const struct wined3d_shader_instruction *ins)
3129{
3130 glsl_src_param_t src0_param;
3131 glsl_src_param_t src1_param;
3132
3133 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0, &src0_param);
3134 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
3135
3136 shader_addline(ins->ctx->buffer, "if (%s %s %s) break;\n",
3137 src0_param.param_str, shader_get_comp_op(ins->flags), src1_param.param_str);
3138}
3139
3140static void shader_glsl_label(const struct wined3d_shader_instruction *ins)
3141{
3142 shader_addline(ins->ctx->buffer, "}\n");
3143 shader_addline(ins->ctx->buffer, "void subroutine%u () {\n", ins->src[0].reg.idx);
3144}
3145
3146static void shader_glsl_call(const struct wined3d_shader_instruction *ins)
3147{
3148 shader_addline(ins->ctx->buffer, "subroutine%u();\n", ins->src[0].reg.idx);
3149}
3150
3151static void shader_glsl_callnz(const struct wined3d_shader_instruction *ins)
3152{
3153 glsl_src_param_t src1_param;
3154
3155 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0, &src1_param);
3156 shader_addline(ins->ctx->buffer, "if (%s) subroutine%u();\n", src1_param.param_str, ins->src[0].reg.idx);
3157}
3158
3159static void shader_glsl_ret(const struct wined3d_shader_instruction *ins)
3160{
3161 /* No-op. The closing } is written when a new function is started, and at the end of the shader. This
3162 * function only suppresses the unhandled instruction warning
3163 */
3164}
3165
3166/*********************************************
3167 * Pixel Shader Specific Code begins here
3168 ********************************************/
3169static void shader_glsl_tex(const struct wined3d_shader_instruction *ins)
3170{
3171 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3172 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *)shader->baseShader.device;
3173 DWORD shader_version = WINED3D_SHADER_VERSION(ins->ctx->reg_maps->shader_version.major,
3174 ins->ctx->reg_maps->shader_version.minor);
3175 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3176 glsl_sample_function_t sample_function;
3177 DWORD sample_flags = 0;
3178 WINED3DSAMPLER_TEXTURE_TYPE sampler_type;
3179 DWORD sampler_idx;
3180 DWORD mask = 0, swizzle;
3181
3182 /* 1.0-1.4: Use destination register as sampler source.
3183 * 2.0+: Use provided sampler source. */
3184 if (shader_version < WINED3D_SHADER_VERSION(2,0)) sampler_idx = ins->dst[0].reg.idx;
3185 else sampler_idx = ins->src[1].reg.idx;
3186 sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3187
3188 if (shader_version < WINED3D_SHADER_VERSION(1,4))
3189 {
3190 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
3191 DWORD flags = (priv->cur_ps_args->tex_transform >> (sampler_idx * WINED3D_PSARGS_TEXTRANSFORM_SHIFT))
3192 & WINED3D_PSARGS_TEXTRANSFORM_MASK;
3193
3194 /* Projected cube textures don't make a lot of sense, the resulting coordinates stay the same. */
3195 if (flags & WINED3D_PSARGS_PROJECTED && sampler_type != WINED3DSTT_CUBE) {
3196 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3197 switch (flags & ~WINED3D_PSARGS_PROJECTED) {
3198 case WINED3DTTFF_COUNT1: FIXME("WINED3DTTFF_PROJECTED with WINED3DTTFF_COUNT1?\n"); break;
3199 case WINED3DTTFF_COUNT2: mask = WINED3DSP_WRITEMASK_1; break;
3200 case WINED3DTTFF_COUNT3: mask = WINED3DSP_WRITEMASK_2; break;
3201 case WINED3DTTFF_COUNT4:
3202 case WINED3DTTFF_DISABLE: mask = WINED3DSP_WRITEMASK_3; break;
3203 }
3204 }
3205 }
3206 else if (shader_version < WINED3D_SHADER_VERSION(2,0))
3207 {
3208 DWORD src_mod = ins->src[0].modifiers;
3209
3210 if (src_mod == WINED3DSPSM_DZ) {
3211 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3212 mask = WINED3DSP_WRITEMASK_2;
3213 } else if (src_mod == WINED3DSPSM_DW) {
3214 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3215 mask = WINED3DSP_WRITEMASK_3;
3216 }
3217 } else {
3218 if (ins->flags & WINED3DSI_TEXLD_PROJECT)
3219 {
3220 /* ps 2.0 texldp instruction always divides by the fourth component. */
3221 sample_flags |= WINED3D_GLSL_SAMPLE_PROJECTED;
3222 mask = WINED3DSP_WRITEMASK_3;
3223 }
3224 }
3225
3226 if(deviceImpl->stateBlock->textures[sampler_idx] &&
3227 IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
3228 sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
3229 }
3230
3231 shader_glsl_get_sample_function(gl_info, sampler_type, sample_flags, &sample_function);
3232 mask |= sample_function.coord_mask;
3233
3234 if (shader_version < WINED3D_SHADER_VERSION(2,0)) swizzle = WINED3DSP_NOSWIZZLE;
3235 else swizzle = ins->src[1].swizzle;
3236
3237 /* 1.0-1.3: Use destination register as coordinate source.
3238 1.4+: Use provided coordinate source register. */
3239 if (shader_version < WINED3D_SHADER_VERSION(1,4))
3240 {
3241 char coord_mask[6];
3242 shader_glsl_write_mask_to_str(mask, coord_mask);
3243 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL,
3244 "T%u%s", sampler_idx, coord_mask);
3245 } else {
3246 glsl_src_param_t coord_param;
3247 shader_glsl_add_src_param(ins, &ins->src[0], mask, &coord_param);
3248 if (ins->flags & WINED3DSI_TEXLD_BIAS)
3249 {
3250 glsl_src_param_t bias;
3251 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &bias);
3252 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, bias.param_str,
3253 "%s", coord_param.param_str);
3254 } else {
3255 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, NULL,
3256 "%s", coord_param.param_str);
3257 }
3258 }
3259}
3260
3261static void shader_glsl_texldd(const struct wined3d_shader_instruction *ins)
3262{
3263 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3264 IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
3265 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3266 glsl_sample_function_t sample_function;
3267 glsl_src_param_t coord_param, dx_param, dy_param;
3268 DWORD sample_flags = WINED3D_GLSL_SAMPLE_GRAD;
3269 DWORD sampler_type;
3270 DWORD sampler_idx;
3271 DWORD swizzle = ins->src[1].swizzle;
3272
3273 if (!gl_info->supported[ARB_SHADER_TEXTURE_LOD] && !gl_info->supported[EXT_GPU_SHADER4])
3274 {
3275 FIXME("texldd used, but not supported by hardware. Falling back to regular tex\n");
3276 shader_glsl_tex(ins);
3277 return;
3278 }
3279
3280 sampler_idx = ins->src[1].reg.idx;
3281 sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3282 if(deviceImpl->stateBlock->textures[sampler_idx] &&
3283 IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
3284 sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
3285 }
3286
3287 shader_glsl_get_sample_function(gl_info, sampler_type, sample_flags, &sample_function);
3288 shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
3289 shader_glsl_add_src_param(ins, &ins->src[2], sample_function.coord_mask, &dx_param);
3290 shader_glsl_add_src_param(ins, &ins->src[3], sample_function.coord_mask, &dy_param);
3291
3292 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, dx_param.param_str, dy_param.param_str, NULL,
3293 "%s", coord_param.param_str);
3294}
3295
3296static void shader_glsl_texldl(const struct wined3d_shader_instruction *ins)
3297{
3298 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3299 IWineD3DDeviceImpl* deviceImpl = (IWineD3DDeviceImpl*) This->baseShader.device;
3300 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3301 glsl_sample_function_t sample_function;
3302 glsl_src_param_t coord_param, lod_param;
3303 DWORD sample_flags = WINED3D_GLSL_SAMPLE_LOD;
3304 DWORD sampler_type;
3305 DWORD sampler_idx;
3306 DWORD swizzle = ins->src[1].swizzle;
3307
3308 sampler_idx = ins->src[1].reg.idx;
3309 sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3310 if(deviceImpl->stateBlock->textures[sampler_idx] &&
3311 IWineD3DBaseTexture_GetTextureDimensions(deviceImpl->stateBlock->textures[sampler_idx]) == GL_TEXTURE_RECTANGLE_ARB) {
3312 sample_flags |= WINED3D_GLSL_SAMPLE_RECT;
3313 }
3314 shader_glsl_get_sample_function(gl_info, sampler_type, sample_flags, &sample_function);
3315 shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &coord_param);
3316
3317 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &lod_param);
3318
3319 if (!gl_info->supported[ARB_SHADER_TEXTURE_LOD] && !gl_info->supported[EXT_GPU_SHADER4]
3320 && shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type))
3321 {
3322 /* The GLSL spec claims the Lod sampling functions are only supported in vertex shaders.
3323 * However, they seem to work just fine in fragment shaders as well. */
3324 WARN("Using %s in fragment shader.\n", sample_function.name);
3325 }
3326 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, swizzle, NULL, NULL, lod_param.param_str,
3327 "%s", coord_param.param_str);
3328}
3329
3330static void shader_glsl_texcoord(const struct wined3d_shader_instruction *ins)
3331{
3332 /* FIXME: Make this work for more than just 2D textures */
3333 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3334 DWORD write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3335
3336 if (!(ins->ctx->reg_maps->shader_version.major == 1 && ins->ctx->reg_maps->shader_version.minor == 4))
3337 {
3338 char dst_mask[6];
3339
3340 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
3341 shader_addline(buffer, "clamp(gl_TexCoord[%u], 0.0, 1.0)%s);\n",
3342 ins->dst[0].reg.idx, dst_mask);
3343 } else {
3344 DWORD reg = ins->src[0].reg.idx;
3345 DWORD src_mod = ins->src[0].modifiers;
3346 char dst_swizzle[6];
3347
3348 shader_glsl_get_swizzle(&ins->src[0], FALSE, write_mask, dst_swizzle);
3349
3350 if (src_mod == WINED3DSPSM_DZ) {
3351 glsl_src_param_t div_param;
3352 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
3353 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &div_param);
3354
3355 if (mask_size > 1) {
3356 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
3357 } else {
3358 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
3359 }
3360 } else if (src_mod == WINED3DSPSM_DW) {
3361 glsl_src_param_t div_param;
3362 unsigned int mask_size = shader_glsl_get_write_mask_size(write_mask);
3363 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_3, &div_param);
3364
3365 if (mask_size > 1) {
3366 shader_addline(buffer, "gl_TexCoord[%u]%s / vec%d(%s));\n", reg, dst_swizzle, mask_size, div_param.param_str);
3367 } else {
3368 shader_addline(buffer, "gl_TexCoord[%u]%s / %s);\n", reg, dst_swizzle, div_param.param_str);
3369 }
3370 } else {
3371 shader_addline(buffer, "gl_TexCoord[%u]%s);\n", reg, dst_swizzle);
3372 }
3373 }
3374}
3375
3376/** Process the WINED3DSIO_TEXDP3TEX instruction in GLSL:
3377 * Take a 3-component dot product of the TexCoord[dstreg] and src,
3378 * then perform a 1D texture lookup from stage dstregnum, place into dst. */
3379static void shader_glsl_texdp3tex(const struct wined3d_shader_instruction *ins)
3380{
3381 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3382 glsl_src_param_t src0_param;
3383 glsl_sample_function_t sample_function;
3384 DWORD sampler_idx = ins->dst[0].reg.idx;
3385 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3386 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3387 UINT mask_size;
3388
3389 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3390
3391 /* Do I have to take care about the projected bit? I don't think so, since the dp3 returns only one
3392 * scalar, and projected sampling would require 4.
3393 *
3394 * It is a dependent read - not valid with conditional NP2 textures
3395 */
3396 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3397 mask_size = shader_glsl_get_write_mask_size(sample_function.coord_mask);
3398
3399 switch(mask_size)
3400 {
3401 case 1:
3402 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3403 "dot(gl_TexCoord[%u].xyz, %s)", sampler_idx, src0_param.param_str);
3404 break;
3405
3406 case 2:
3407 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3408 "vec2(dot(gl_TexCoord[%u].xyz, %s), 0.0)", sampler_idx, src0_param.param_str);
3409 break;
3410
3411 case 3:
3412 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3413 "vec3(dot(gl_TexCoord[%u].xyz, %s), 0.0, 0.0)", sampler_idx, src0_param.param_str);
3414 break;
3415
3416 default:
3417 FIXME("Unexpected mask size %u\n", mask_size);
3418 break;
3419 }
3420}
3421
3422/** Process the WINED3DSIO_TEXDP3 instruction in GLSL:
3423 * Take a 3-component dot product of the TexCoord[dstreg] and src. */
3424static void shader_glsl_texdp3(const struct wined3d_shader_instruction *ins)
3425{
3426 glsl_src_param_t src0_param;
3427 DWORD dstreg = ins->dst[0].reg.idx;
3428 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3429 DWORD dst_mask;
3430 unsigned int mask_size;
3431
3432 dst_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3433 mask_size = shader_glsl_get_write_mask_size(dst_mask);
3434 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3435
3436 if (mask_size > 1) {
3437 shader_addline(ins->ctx->buffer, "vec%d(dot(T%u.xyz, %s)));\n", mask_size, dstreg, src0_param.param_str);
3438 } else {
3439 shader_addline(ins->ctx->buffer, "dot(T%u.xyz, %s));\n", dstreg, src0_param.param_str);
3440 }
3441}
3442
3443/** Process the WINED3DSIO_TEXDEPTH instruction in GLSL:
3444 * Calculate the depth as dst.x / dst.y */
3445static void shader_glsl_texdepth(const struct wined3d_shader_instruction *ins)
3446{
3447 glsl_dst_param_t dst_param;
3448
3449 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
3450
3451 /* Tests show that texdepth never returns anything below 0.0, and that r5.y is clamped to 1.0.
3452 * Negative input is accepted, -0.25 / -0.5 returns 0.5. GL should clamp gl_FragDepth to [0;1], but
3453 * this doesn't always work, so clamp the results manually. Whether or not the x value is clamped at 1
3454 * too is irrelevant, since if x = 0, any y value < 1.0 (and > 1.0 is not allowed) results in a result
3455 * >= 1.0 or < 0.0
3456 */
3457 shader_addline(ins->ctx->buffer, "gl_FragDepth = clamp((%s.x / min(%s.y, 1.0)), 0.0, 1.0);\n",
3458 dst_param.reg_name, dst_param.reg_name);
3459}
3460
3461/** Process the WINED3DSIO_TEXM3X2DEPTH instruction in GLSL:
3462 * Last row of a 3x2 matrix multiply, use the result to calculate the depth:
3463 * Calculate tmp0.y = TexCoord[dstreg] . src.xyz; (tmp0.x has already been calculated)
3464 * depth = (tmp0.y == 0.0) ? 1.0 : tmp0.x / tmp0.y
3465 */
3466static void shader_glsl_texm3x2depth(const struct wined3d_shader_instruction *ins)
3467{
3468 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3469 DWORD dstreg = ins->dst[0].reg.idx;
3470 glsl_src_param_t src0_param;
3471
3472 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3473
3474 shader_addline(ins->ctx->buffer, "tmp0.y = dot(T%u.xyz, %s);\n", dstreg, src0_param.param_str);
3475 shader_addline(ins->ctx->buffer, "gl_FragDepth = (tmp0.y == 0.0) ? 1.0 : clamp(tmp0.x / tmp0.y, 0.0, 1.0);\n");
3476}
3477
3478/** Process the WINED3DSIO_TEXM3X2PAD instruction in GLSL
3479 * Calculate the 1st of a 2-row matrix multiplication. */
3480static void shader_glsl_texm3x2pad(const struct wined3d_shader_instruction *ins)
3481{
3482 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3483 DWORD reg = ins->dst[0].reg.idx;
3484 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3485 glsl_src_param_t src0_param;
3486
3487 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3488 shader_addline(buffer, "tmp0.x = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3489}
3490
3491/** Process the WINED3DSIO_TEXM3X3PAD instruction in GLSL
3492 * Calculate the 1st or 2nd row of a 3-row matrix multiplication. */
3493static void shader_glsl_texm3x3pad(const struct wined3d_shader_instruction *ins)
3494{
3495 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3496 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3497 DWORD reg = ins->dst[0].reg.idx;
3498 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3499 SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
3500 glsl_src_param_t src0_param;
3501
3502 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3503 shader_addline(buffer, "tmp0.%c = dot(T%u.xyz, %s);\n", 'x' + current_state->current_row, reg, src0_param.param_str);
3504 current_state->texcoord_w[current_state->current_row++] = reg;
3505}
3506
3507static void shader_glsl_texm3x2tex(const struct wined3d_shader_instruction *ins)
3508{
3509 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3510 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3511 DWORD reg = ins->dst[0].reg.idx;
3512 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3513 glsl_src_param_t src0_param;
3514 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[reg];
3515 glsl_sample_function_t sample_function;
3516
3517 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3518 shader_addline(buffer, "tmp0.y = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3519
3520 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3521
3522 /* Sample the texture using the calculated coordinates */
3523 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xy");
3524}
3525
3526/** Process the WINED3DSIO_TEXM3X3TEX instruction in GLSL
3527 * Perform the 3rd row of a 3x3 matrix multiply, then sample the texture using the calculated coordinates */
3528static void shader_glsl_texm3x3tex(const struct wined3d_shader_instruction *ins)
3529{
3530 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3531 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3532 SHADER_PARSE_STATE *current_state = &shader->baseShader.parse_state;
3533 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3534 glsl_src_param_t src0_param;
3535 DWORD reg = ins->dst[0].reg.idx;
3536 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[reg];
3537 glsl_sample_function_t sample_function;
3538
3539 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3540 shader_addline(ins->ctx->buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3541
3542 /* Dependent read, not valid with conditional NP2 */
3543 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3544
3545 /* Sample the texture using the calculated coordinates */
3546 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz");
3547
3548 current_state->current_row = 0;
3549}
3550
3551/** Process the WINED3DSIO_TEXM3X3 instruction in GLSL
3552 * Perform the 3rd row of a 3x3 matrix multiply */
3553static void shader_glsl_texm3x3(const struct wined3d_shader_instruction *ins)
3554{
3555 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3556 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3557 SHADER_PARSE_STATE *current_state = &shader->baseShader.parse_state;
3558 glsl_src_param_t src0_param;
3559 char dst_mask[6];
3560 DWORD reg = ins->dst[0].reg.idx;
3561
3562 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3563
3564 shader_glsl_append_dst(ins->ctx->buffer, ins);
3565 shader_glsl_get_write_mask(&ins->dst[0], dst_mask);
3566 shader_addline(ins->ctx->buffer, "vec4(tmp0.xy, dot(T%u.xyz, %s), 1.0)%s);\n", reg, src0_param.param_str, dst_mask);
3567
3568 current_state->current_row = 0;
3569}
3570
3571/* Process the WINED3DSIO_TEXM3X3SPEC instruction in GLSL
3572 * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
3573static void shader_glsl_texm3x3spec(const struct wined3d_shader_instruction *ins)
3574{
3575 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3576 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3577 DWORD reg = ins->dst[0].reg.idx;
3578 glsl_src_param_t src0_param;
3579 glsl_src_param_t src1_param;
3580 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3581 SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
3582 WINED3DSAMPLER_TEXTURE_TYPE stype = ins->ctx->reg_maps->sampler_type[reg];
3583 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3584 glsl_sample_function_t sample_function;
3585
3586 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3587 shader_glsl_add_src_param(ins, &ins->src[1], src_mask, &src1_param);
3588
3589 /* Perform the last matrix multiply operation */
3590 shader_addline(buffer, "tmp0.z = dot(T%u.xyz, %s);\n", reg, src0_param.param_str);
3591 /* Reflection calculation */
3592 shader_addline(buffer, "tmp0.xyz = -reflect((%s), normalize(tmp0.xyz));\n", src1_param.param_str);
3593
3594 /* Dependent read, not valid with conditional NP2 */
3595 shader_glsl_get_sample_function(gl_info, stype, 0, &sample_function);
3596
3597 /* Sample the texture */
3598 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz");
3599
3600 current_state->current_row = 0;
3601}
3602
3603/* Process the WINED3DSIO_TEXM3X3VSPEC instruction in GLSL
3604 * Perform the final texture lookup based on the previous 2 3x3 matrix multiplies */
3605static void shader_glsl_texm3x3vspec(const struct wined3d_shader_instruction *ins)
3606{
3607 IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3608 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3609 DWORD reg = ins->dst[0].reg.idx;
3610 struct wined3d_shader_buffer *buffer = ins->ctx->buffer;
3611 SHADER_PARSE_STATE* current_state = &shader->baseShader.parse_state;
3612 glsl_src_param_t src0_param;
3613 DWORD src_mask = WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1 | WINED3DSP_WRITEMASK_2;
3614 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[reg];
3615 glsl_sample_function_t sample_function;
3616
3617 shader_glsl_add_src_param(ins, &ins->src[0], src_mask, &src0_param);
3618
3619 /* Perform the last matrix multiply operation */
3620 shader_addline(buffer, "tmp0.z = dot(vec3(T%u), vec3(%s));\n", reg, src0_param.param_str);
3621
3622 /* Construct the eye-ray vector from w coordinates */
3623 shader_addline(buffer, "tmp1.xyz = normalize(vec3(gl_TexCoord[%u].w, gl_TexCoord[%u].w, gl_TexCoord[%u].w));\n",
3624 current_state->texcoord_w[0], current_state->texcoord_w[1], reg);
3625 shader_addline(buffer, "tmp0.xyz = -reflect(tmp1.xyz, normalize(tmp0.xyz));\n");
3626
3627 /* Dependent read, not valid with conditional NP2 */
3628 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3629
3630 /* Sample the texture using the calculated coordinates */
3631 shader_glsl_gen_sample_code(ins, reg, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL, "tmp0.xyz");
3632
3633 current_state->current_row = 0;
3634}
3635
3636/** Process the WINED3DSIO_TEXBEM instruction in GLSL.
3637 * Apply a fake bump map transform.
3638 * texbem is pshader <= 1.3 only, this saves a few version checks
3639 */
3640static void shader_glsl_texbem(const struct wined3d_shader_instruction *ins)
3641{
3642 /*IWineD3DBaseShaderImpl *shader = (IWineD3DBaseShaderImpl *)ins->ctx->shader;
3643 IWineD3DDeviceImpl *deviceImpl = (IWineD3DDeviceImpl *)shader->baseShader.device; - unused */
3644 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3645 const struct shader_glsl_ctx_priv *priv = ins->ctx->backend_data;
3646 glsl_sample_function_t sample_function;
3647 glsl_src_param_t coord_param;
3648 WINED3DSAMPLER_TEXTURE_TYPE sampler_type;
3649 DWORD sampler_idx;
3650 DWORD mask;
3651 DWORD flags;
3652 char coord_mask[6];
3653
3654 sampler_idx = ins->dst[0].reg.idx;
3655 flags = (priv->cur_ps_args->tex_transform >> (sampler_idx * WINED3D_PSARGS_TEXTRANSFORM_SHIFT))
3656 & WINED3D_PSARGS_TEXTRANSFORM_MASK;
3657
3658 sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3659 /* Dependent read, not valid with conditional NP2 */
3660 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3661 mask = sample_function.coord_mask;
3662
3663 shader_glsl_write_mask_to_str(mask, coord_mask);
3664
3665 /* with projective textures, texbem only divides the static texture coord, not the displacement,
3666 * so we can't let the GL handle this.
3667 */
3668 if (flags & WINED3D_PSARGS_PROJECTED) {
3669 DWORD div_mask=0;
3670 char coord_div_mask[3];
3671 switch (flags & ~WINED3D_PSARGS_PROJECTED) {
3672 case WINED3DTTFF_COUNT1: FIXME("WINED3DTTFF_PROJECTED with WINED3DTTFF_COUNT1?\n"); break;
3673 case WINED3DTTFF_COUNT2: div_mask = WINED3DSP_WRITEMASK_1; break;
3674 case WINED3DTTFF_COUNT3: div_mask = WINED3DSP_WRITEMASK_2; break;
3675 case WINED3DTTFF_COUNT4:
3676 case WINED3DTTFF_DISABLE: div_mask = WINED3DSP_WRITEMASK_3; break;
3677 }
3678 shader_glsl_write_mask_to_str(div_mask, coord_div_mask);
3679 shader_addline(ins->ctx->buffer, "T%u%s /= T%u%s;\n", sampler_idx, coord_mask, sampler_idx, coord_div_mask);
3680 }
3681
3682 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &coord_param);
3683
3684 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3685 "T%u%s + vec4(bumpenvmat%d * %s, 0.0, 0.0)%s", sampler_idx, coord_mask, sampler_idx,
3686 coord_param.param_str, coord_mask);
3687
3688 if (ins->handler_idx == WINED3DSIH_TEXBEML)
3689 {
3690 glsl_src_param_t luminance_param;
3691 glsl_dst_param_t dst_param;
3692
3693 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_2, &luminance_param);
3694 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
3695
3696 shader_addline(ins->ctx->buffer, "%s%s *= (%s * luminancescale%d + luminanceoffset%d);\n",
3697 dst_param.reg_name, dst_param.mask_str,
3698 luminance_param.param_str, sampler_idx, sampler_idx);
3699 }
3700}
3701
3702static void shader_glsl_bem(const struct wined3d_shader_instruction *ins)
3703{
3704 glsl_src_param_t src0_param, src1_param;
3705 DWORD sampler_idx = ins->dst[0].reg.idx;
3706
3707 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
3708 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
3709
3710 shader_glsl_append_dst(ins->ctx->buffer, ins);
3711 shader_addline(ins->ctx->buffer, "%s + bumpenvmat%d * %s);\n",
3712 src0_param.param_str, sampler_idx, src1_param.param_str);
3713}
3714
3715/** Process the WINED3DSIO_TEXREG2AR instruction in GLSL
3716 * Sample 2D texture at dst using the alpha & red (wx) components of src as texture coordinates */
3717static void shader_glsl_texreg2ar(const struct wined3d_shader_instruction *ins)
3718{
3719 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3720 glsl_src_param_t src0_param;
3721 DWORD sampler_idx = ins->dst[0].reg.idx;
3722 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3723 glsl_sample_function_t sample_function;
3724
3725 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
3726
3727 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3728 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3729 "%s.wx", src0_param.reg_name);
3730}
3731
3732/** Process the WINED3DSIO_TEXREG2GB instruction in GLSL
3733 * Sample 2D texture at dst using the green & blue (yz) components of src as texture coordinates */
3734static void shader_glsl_texreg2gb(const struct wined3d_shader_instruction *ins)
3735{
3736 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3737 glsl_src_param_t src0_param;
3738 DWORD sampler_idx = ins->dst[0].reg.idx;
3739 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3740 glsl_sample_function_t sample_function;
3741
3742 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_ALL, &src0_param);
3743
3744 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3745 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3746 "%s.yz", src0_param.reg_name);
3747}
3748
3749/** Process the WINED3DSIO_TEXREG2RGB instruction in GLSL
3750 * Sample texture at dst using the rgb (xyz) components of src as texture coordinates */
3751static void shader_glsl_texreg2rgb(const struct wined3d_shader_instruction *ins)
3752{
3753 const struct wined3d_gl_info *gl_info = ins->ctx->gl_info;
3754 glsl_src_param_t src0_param;
3755 DWORD sampler_idx = ins->dst[0].reg.idx;
3756 WINED3DSAMPLER_TEXTURE_TYPE sampler_type = ins->ctx->reg_maps->sampler_type[sampler_idx];
3757 glsl_sample_function_t sample_function;
3758
3759 /* Dependent read, not valid with conditional NP2 */
3760 shader_glsl_get_sample_function(gl_info, sampler_type, 0, &sample_function);
3761 shader_glsl_add_src_param(ins, &ins->src[0], sample_function.coord_mask, &src0_param);
3762
3763 shader_glsl_gen_sample_code(ins, sampler_idx, &sample_function, WINED3DSP_NOSWIZZLE, NULL, NULL, NULL,
3764 "%s", src0_param.param_str);
3765}
3766
3767/** Process the WINED3DSIO_TEXKILL instruction in GLSL.
3768 * If any of the first 3 components are < 0, discard this pixel */
3769static void shader_glsl_texkill(const struct wined3d_shader_instruction *ins)
3770{
3771 glsl_dst_param_t dst_param;
3772
3773 /* The argument is a destination parameter, and no writemasks are allowed */
3774 shader_glsl_add_dst_param(ins, &ins->dst[0], &dst_param);
3775 if (ins->ctx->reg_maps->shader_version.major >= 2)
3776 {
3777 /* 2.0 shaders compare all 4 components in texkill */
3778 shader_addline(ins->ctx->buffer, "if (any(lessThan(%s.xyzw, vec4(0.0)))) discard;\n", dst_param.reg_name);
3779 } else {
3780 /* 1.X shaders only compare the first 3 components, probably due to the nature of the texkill
3781 * instruction as a tex* instruction, and phase, which kills all a / w components. Even if all
3782 * 4 components are defined, only the first 3 are used
3783 */
3784 shader_addline(ins->ctx->buffer, "if (any(lessThan(%s.xyz, vec3(0.0)))) discard;\n", dst_param.reg_name);
3785 }
3786}
3787
3788/** Process the WINED3DSIO_DP2ADD instruction in GLSL.
3789 * dst = dot2(src0, src1) + src2 */
3790static void shader_glsl_dp2add(const struct wined3d_shader_instruction *ins)
3791{
3792 glsl_src_param_t src0_param;
3793 glsl_src_param_t src1_param;
3794 glsl_src_param_t src2_param;
3795 DWORD write_mask;
3796 unsigned int mask_size;
3797
3798 write_mask = shader_glsl_append_dst(ins->ctx->buffer, ins);
3799 mask_size = shader_glsl_get_write_mask_size(write_mask);
3800
3801 shader_glsl_add_src_param(ins, &ins->src[0], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src0_param);
3802 shader_glsl_add_src_param(ins, &ins->src[1], WINED3DSP_WRITEMASK_0 | WINED3DSP_WRITEMASK_1, &src1_param);
3803 shader_glsl_add_src_param(ins, &ins->src[2], WINED3DSP_WRITEMASK_0, &src2_param);
3804
3805 if (mask_size > 1) {
3806 shader_addline(ins->ctx->buffer, "vec%d(dot(%s, %s) + %s));\n",
3807 mask_size, src0_param.param_str, src1_param.param_str, src2_param.param_str);
3808 } else {
3809 shader_addline(ins->ctx->buffer, "dot(%s, %s) + %s);\n",
3810 src0_param.param_str, src1_param.param_str, src2_param.param_str);
3811 }
3812}
3813
3814static void shader_glsl_input_pack(IWineD3DPixelShader *iface, struct wined3d_shader_buffer *buffer,
3815 const struct wined3d_shader_signature_element *input_signature, const struct shader_reg_maps *reg_maps,
3816 enum vertexprocessing_mode vertexprocessing)
3817{
3818 unsigned int i;
3819 IWineD3DPixelShaderImpl *This = (IWineD3DPixelShaderImpl *)iface;
3820 WORD map = reg_maps->input_registers;
3821
3822 for (i = 0; map; map >>= 1, ++i)
3823 {
3824 const char *semantic_name;
3825 UINT semantic_idx;
3826 char reg_mask[6];
3827
3828 /* Unused */
3829 if (!(map & 1)) continue;
3830
3831 semantic_name = input_signature[i].semantic_name;
3832 semantic_idx = input_signature[i].semantic_idx;
3833 shader_glsl_write_mask_to_str(input_signature[i].mask, reg_mask);
3834
3835 if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD))
3836 {
3837 if (semantic_idx < 8 && vertexprocessing == pretransformed)
3838 shader_addline(buffer, "IN[%u]%s = gl_TexCoord[%u]%s;\n",
3839 This->input_reg_map[i], reg_mask, semantic_idx, reg_mask);
3840 else
3841 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3842 This->input_reg_map[i], reg_mask, reg_mask);
3843 }
3844 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR))
3845 {
3846 if (semantic_idx == 0)
3847 shader_addline(buffer, "IN[%u]%s = vec4(gl_Color)%s;\n",
3848 This->input_reg_map[i], reg_mask, reg_mask);
3849 else if (semantic_idx == 1)
3850 shader_addline(buffer, "IN[%u]%s = vec4(gl_SecondaryColor)%s;\n",
3851 This->input_reg_map[i], reg_mask, reg_mask);
3852 else
3853 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3854 This->input_reg_map[i], reg_mask, reg_mask);
3855 }
3856 else
3857 {
3858 shader_addline(buffer, "IN[%u]%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3859 This->input_reg_map[i], reg_mask, reg_mask);
3860 }
3861 }
3862}
3863
3864/*********************************************
3865 * Vertex Shader Specific Code begins here
3866 ********************************************/
3867
3868static void add_glsl_program_entry(struct shader_glsl_priv *priv, struct glsl_shader_prog_link *entry) {
3869 glsl_program_key_t key;
3870
3871 key.vshader = entry->vshader;
3872 key.pshader = entry->pshader;
3873 key.vs_args = entry->vs_args;
3874 key.ps_args = entry->ps_args;
3875 key.context = entry->context;
3876
3877 if (wine_rb_put(&priv->program_lookup, &key, &entry->program_lookup_entry) == -1)
3878 {
3879 ERR("Failed to insert program entry.\n");
3880 }
3881}
3882
3883static struct glsl_shader_prog_link *get_glsl_program_entry(struct shader_glsl_priv *priv,
3884 IWineD3DVertexShader *vshader, IWineD3DPixelShader *pshader, struct vs_compile_args *vs_args,
3885 struct ps_compile_args *ps_args, const struct wined3d_context *context) {
3886 struct wine_rb_entry *entry;
3887 glsl_program_key_t key;
3888
3889 key.vshader = vshader;
3890 key.pshader = pshader;
3891 key.vs_args = *vs_args;
3892 key.ps_args = *ps_args;
3893 key.context = context;
3894
3895 entry = wine_rb_get(&priv->program_lookup, &key);
3896 return entry ? WINE_RB_ENTRY_VALUE(entry, struct glsl_shader_prog_link, program_lookup_entry) : NULL;
3897}
3898
3899/* GL locking is done by the caller */
3900static void delete_glsl_program_entry(struct shader_glsl_priv *priv, const struct wined3d_gl_info *gl_info,
3901 struct glsl_shader_prog_link *entry)
3902{
3903 glsl_program_key_t key;
3904
3905 key.vshader = entry->vshader;
3906 key.pshader = entry->pshader;
3907 key.vs_args = entry->vs_args;
3908 key.ps_args = entry->ps_args;
3909 key.context = entry->context;
3910 wine_rb_remove(&priv->program_lookup, &key);
3911
3912 if (context_get_current() == entry->context)
3913 {
3914 TRACE("deleting program %p\n", (void *)(uintptr_t)entry->programId);
3915 GL_EXTCALL(glDeleteObjectARB(entry->programId));
3916 checkGLcall("glDeleteObjectARB");
3917 }
3918 else
3919 {
3920 WARN("Attempting to delete program %p created in ctx %p from ctx %p\n", (void *)(uintptr_t)entry->programId, entry->context, context_get_current());
3921 }
3922
3923 if (entry->vshader) list_remove(&entry->vshader_entry);
3924 if (entry->pshader) list_remove(&entry->pshader_entry);
3925 HeapFree(GetProcessHeap(), 0, entry->vuniformF_locations);
3926 HeapFree(GetProcessHeap(), 0, entry->puniformF_locations);
3927 HeapFree(GetProcessHeap(), 0, entry);
3928}
3929
3930static void handle_ps3_input(struct wined3d_shader_buffer *buffer, const struct wined3d_gl_info *gl_info, const DWORD *map,
3931 const struct wined3d_shader_signature_element *input_signature, const struct shader_reg_maps *reg_maps_in,
3932 const struct wined3d_shader_signature_element *output_signature, const struct shader_reg_maps *reg_maps_out)
3933{
3934 unsigned int i, j;
3935 const char *semantic_name_in, *semantic_name_out;
3936 UINT semantic_idx_in, semantic_idx_out;
3937 DWORD *set;
3938 DWORD in_idx;
3939 unsigned int in_count = vec4_varyings(3, gl_info);
3940 char reg_mask[6], reg_mask_out[6];
3941 char destination[50];
3942 WORD input_map, output_map;
3943
3944 set = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*set) * (in_count + 2));
3945
3946 if (!output_signature)
3947 {
3948 /* Save gl_FrontColor & gl_FrontSecondaryColor before overwriting them. */
3949 shader_addline(buffer, "vec4 front_color = gl_FrontColor;\n");
3950 shader_addline(buffer, "vec4 front_secondary_color = gl_FrontSecondaryColor;\n");
3951 }
3952
3953 input_map = reg_maps_in->input_registers;
3954 for (i = 0; input_map; input_map >>= 1, ++i)
3955 {
3956 if (!(input_map & 1)) continue;
3957
3958 in_idx = map[i];
3959 if (in_idx >= (in_count + 2)) {
3960 FIXME("More input varyings declared than supported, expect issues\n");
3961 continue;
3962 }
3963 else if (map[i] == ~0U)
3964 {
3965 /* Declared, but not read register */
3966 continue;
3967 }
3968
3969 if (in_idx == in_count) {
3970 sprintf(destination, "gl_FrontColor");
3971 } else if (in_idx == in_count + 1) {
3972 sprintf(destination, "gl_FrontSecondaryColor");
3973 } else {
3974 sprintf(destination, "IN[%u]", in_idx);
3975 }
3976
3977 semantic_name_in = input_signature[i].semantic_name;
3978 semantic_idx_in = input_signature[i].semantic_idx;
3979 set[map[i]] = input_signature[i].mask;
3980 shader_glsl_write_mask_to_str(input_signature[i].mask, reg_mask);
3981
3982 if (!output_signature)
3983 {
3984 if (shader_match_semantic(semantic_name_in, WINED3DDECLUSAGE_COLOR))
3985 {
3986 if (semantic_idx_in == 0)
3987 shader_addline(buffer, "%s%s = front_color%s;\n",
3988 destination, reg_mask, reg_mask);
3989 else if (semantic_idx_in == 1)
3990 shader_addline(buffer, "%s%s = front_secondary_color%s;\n",
3991 destination, reg_mask, reg_mask);
3992 else
3993 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
3994 destination, reg_mask, reg_mask);
3995 }
3996 else if (shader_match_semantic(semantic_name_in, WINED3DDECLUSAGE_TEXCOORD))
3997 {
3998 if (semantic_idx_in < 8)
3999 {
4000 shader_addline(buffer, "%s%s = gl_TexCoord[%u]%s;\n",
4001 destination, reg_mask, semantic_idx_in, reg_mask);
4002 }
4003 else
4004 {
4005 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
4006 destination, reg_mask, reg_mask);
4007 }
4008 }
4009 else if (shader_match_semantic(semantic_name_in, WINED3DDECLUSAGE_FOG))
4010 {
4011 shader_addline(buffer, "%s%s = vec4(gl_FogFragCoord, 0.0, 0.0, 0.0)%s;\n",
4012 destination, reg_mask, reg_mask);
4013 }
4014 else
4015 {
4016 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
4017 destination, reg_mask, reg_mask);
4018 }
4019 } else {
4020 BOOL found = FALSE;
4021
4022 output_map = reg_maps_out->output_registers;
4023 for (j = 0; output_map; output_map >>= 1, ++j)
4024 {
4025 if (!(output_map & 1)) continue;
4026
4027 semantic_name_out = output_signature[j].semantic_name;
4028 semantic_idx_out = output_signature[j].semantic_idx;
4029 shader_glsl_write_mask_to_str(output_signature[j].mask, reg_mask_out);
4030
4031 if (semantic_idx_in == semantic_idx_out
4032 && !strcmp(semantic_name_in, semantic_name_out))
4033 {
4034 shader_addline(buffer, "%s%s = OUT[%u]%s;\n",
4035 destination, reg_mask, j, reg_mask);
4036 found = TRUE;
4037 }
4038 }
4039 if(!found) {
4040 shader_addline(buffer, "%s%s = vec4(0.0, 0.0, 0.0, 0.0)%s;\n",
4041 destination, reg_mask, reg_mask);
4042 }
4043 }
4044 }
4045
4046 /* This is solely to make the compiler / linker happy and avoid warning about undefined
4047 * varyings. It shouldn't result in any real code executed on the GPU, since all read
4048 * input varyings are assigned above, if the optimizer works properly.
4049 */
4050 for(i = 0; i < in_count + 2; i++) {
4051 if (set[i] && set[i] != WINED3DSP_WRITEMASK_ALL)
4052 {
4053 unsigned int size = 0;
4054 memset(reg_mask, 0, sizeof(reg_mask));
4055 if(!(set[i] & WINED3DSP_WRITEMASK_0)) {
4056 reg_mask[size] = 'x';
4057 size++;
4058 }
4059 if(!(set[i] & WINED3DSP_WRITEMASK_1)) {
4060 reg_mask[size] = 'y';
4061 size++;
4062 }
4063 if(!(set[i] & WINED3DSP_WRITEMASK_2)) {
4064 reg_mask[size] = 'z';
4065 size++;
4066 }
4067 if(!(set[i] & WINED3DSP_WRITEMASK_3)) {
4068 reg_mask[size] = 'w';
4069 size++;
4070 }
4071
4072 if (i == in_count) {
4073 sprintf(destination, "gl_FrontColor");
4074 } else if (i == in_count + 1) {
4075 sprintf(destination, "gl_FrontSecondaryColor");
4076 } else {
4077 sprintf(destination, "IN[%u]", i);
4078 }
4079
4080 if (size == 1) {
4081 shader_addline(buffer, "%s.%s = 0.0;\n", destination, reg_mask);
4082 } else {
4083 shader_addline(buffer, "%s.%s = vec%u(0.0);\n", destination, reg_mask, size);
4084 }
4085 }
4086 }
4087
4088 HeapFree(GetProcessHeap(), 0, set);
4089}
4090
4091static void generate_texcoord_assignment(struct wined3d_shader_buffer *buffer, IWineD3DVertexShaderImpl *vs, IWineD3DPixelShaderImpl *ps)
4092{
4093 DWORD map;
4094 unsigned int i;
4095 char reg_mask[6];
4096
4097 if (!ps)
4098 return;
4099
4100 for (i = 0, map = ps->baseShader.reg_maps.texcoord; map && i < min(8, MAX_REG_TEXCRD); map >>= 1, ++i)
4101 {
4102 if (!(map & 1))
4103 continue;
4104
4105 /* so far we assume that if texcoord_mask has any write flags, they are assigned appropriately with pixel shader */
4106 if ((vs->baseShader.reg_maps.texcoord_mask[i]) & WINED3DSP_WRITEMASK_ALL)
4107 continue;
4108
4109 shader_glsl_write_mask_to_str(WINED3DSP_WRITEMASK_ALL, reg_mask);
4110 shader_addline(buffer, "gl_TexCoord[%u]%s = gl_MultiTexCoord%u%s;\n", i, reg_mask, i, reg_mask);
4111 }
4112}
4113
4114/* GL locking is done by the caller */
4115static GLhandleARB generate_param_reorder_function(struct wined3d_shader_buffer *buffer,
4116 IWineD3DVertexShader *a_vertexshader, IWineD3DPixelShader *pixelshader, const struct wined3d_gl_info *gl_info)
4117{
4118 GLhandleARB ret = 0;
4119 IWineD3DVertexShaderImpl *vs = (IWineD3DVertexShaderImpl *) a_vertexshader;
4120 IWineD3DPixelShaderImpl *ps = (IWineD3DPixelShaderImpl *) pixelshader;
4121 IWineD3DDeviceImpl *device;
4122 DWORD vs_major = vs->baseShader.reg_maps.shader_version.major;
4123 DWORD ps_major = ps ? ps->baseShader.reg_maps.shader_version.major : 0;
4124 unsigned int i;
4125 const char *semantic_name;
4126 UINT semantic_idx;
4127 char reg_mask[6];
4128 const struct wined3d_shader_signature_element *output_signature;
4129
4130 shader_buffer_clear(buffer);
4131
4132 shader_addline(buffer, "#version 120\n");
4133
4134 if(vs_major < 3 && ps_major < 3) {
4135 /* That one is easy: The vertex shader writes to the builtin varyings, the pixel shader reads from them.
4136 * Take care about the texcoord .w fixup though if we're using the fixed function fragment pipeline
4137 */
4138 device = (IWineD3DDeviceImpl *) vs->baseShader.device;
4139 if ((gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W)
4140 && ps_major == 0 && vs_major > 0 && !device->frag_pipe->ffp_proj_control)
4141 {
4142 shader_addline(buffer, "void order_ps_input() {\n");
4143 for(i = 0; i < min(8, MAX_REG_TEXCRD); i++) {
4144 if(vs->baseShader.reg_maps.texcoord_mask[i] != 0 &&
4145 vs->baseShader.reg_maps.texcoord_mask[i] != WINED3DSP_WRITEMASK_ALL) {
4146 shader_addline(buffer, "gl_TexCoord[%u].w = 1.0;\n", i);
4147 }
4148 }
4149 shader_addline(buffer, "}\n");
4150 } else {
4151 shader_addline(buffer, "void order_ps_input() {\n");
4152 generate_texcoord_assignment(buffer, vs, ps);
4153 shader_addline(buffer, "}\n");
4154 }
4155 } else if(ps_major < 3 && vs_major >= 3) {
4156 WORD map = vs->baseShader.reg_maps.output_registers;
4157
4158 /* The vertex shader writes to its own varyings, the pixel shader needs them in the builtin ones */
4159 output_signature = vs->baseShader.output_signature;
4160
4161 shader_addline(buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT);
4162 for (i = 0; map; map >>= 1, ++i)
4163 {
4164 DWORD write_mask;
4165
4166 if (!(map & 1)) continue;
4167
4168 semantic_name = output_signature[i].semantic_name;
4169 semantic_idx = output_signature[i].semantic_idx;
4170 write_mask = output_signature[i].mask;
4171 shader_glsl_write_mask_to_str(write_mask, reg_mask);
4172
4173 if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_COLOR))
4174 {
4175 if (semantic_idx == 0)
4176 shader_addline(buffer, "gl_FrontColor%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
4177 else if (semantic_idx == 1)
4178 shader_addline(buffer, "gl_FrontSecondaryColor%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
4179 }
4180 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION))
4181 {
4182 shader_addline(buffer, "gl_Position%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
4183 }
4184 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_TEXCOORD))
4185 {
4186 if (semantic_idx < 8)
4187 {
4188 if (!(gl_info->quirks & WINED3D_QUIRK_SET_TEXCOORD_W) || ps_major > 0)
4189 write_mask |= WINED3DSP_WRITEMASK_3;
4190
4191 shader_addline(buffer, "gl_TexCoord[%u]%s = OUT[%u]%s;\n",
4192 semantic_idx, reg_mask, i, reg_mask);
4193 if (!(write_mask & WINED3DSP_WRITEMASK_3))
4194 shader_addline(buffer, "gl_TexCoord[%u].w = 1.0;\n", semantic_idx);
4195 }
4196 }
4197 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE))
4198 {
4199 shader_addline(buffer, "gl_PointSize = OUT[%u].x;\n", i);
4200 }
4201 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_FOG))
4202 {
4203 shader_addline(buffer, "gl_FogFragCoord = OUT[%u].%c;\n", i, reg_mask[1]);
4204 }
4205 }
4206 shader_addline(buffer, "}\n");
4207
4208 } else if(ps_major >= 3 && vs_major >= 3) {
4209 WORD map = vs->baseShader.reg_maps.output_registers;
4210
4211 output_signature = vs->baseShader.output_signature;
4212
4213 /* This one is tricky: a 3.0 pixel shader reads from a 3.0 vertex shader */
4214 shader_addline(buffer, "varying vec4 IN[%u];\n", vec4_varyings(3, gl_info));
4215 shader_addline(buffer, "void order_ps_input(in vec4 OUT[%u]) {\n", MAX_REG_OUTPUT);
4216
4217 /* First, sort out position and point size. Those are not passed to the pixel shader */
4218 for (i = 0; map; map >>= 1, ++i)
4219 {
4220 if (!(map & 1)) continue;
4221
4222 semantic_name = output_signature[i].semantic_name;
4223 shader_glsl_write_mask_to_str(output_signature[i].mask, reg_mask);
4224
4225 if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_POSITION))
4226 {
4227 shader_addline(buffer, "gl_Position%s = OUT[%u]%s;\n", reg_mask, i, reg_mask);
4228 }
4229 else if (shader_match_semantic(semantic_name, WINED3DDECLUSAGE_PSIZE))
4230 {
4231 shader_addline(buffer, "gl_PointSize = OUT[%u].x;\n", i);
4232 }
4233 }
4234
4235 /* Then, fix the pixel shader input */
4236 handle_ps3_input(buffer, gl_info, ps->input_reg_map, ps->baseShader.input_signature,
4237 &ps->baseShader.reg_maps, output_signature, &vs->baseShader.reg_maps);
4238
4239 shader_addline(buffer, "}\n");
4240 } else if(ps_major >= 3 && vs_major < 3) {
4241 shader_addline(buffer, "varying vec4 IN[%u];\n", vec4_varyings(3, gl_info));
4242 shader_addline(buffer, "void order_ps_input() {\n");
4243 /* The vertex shader wrote to the builtin varyings. There is no need to figure out position and
4244 * point size, but we depend on the optimizers kindness to find out that the pixel shader doesn't
4245 * read gl_TexCoord and gl_ColorX, otherwise we'll run out of varyings
4246 */
4247 handle_ps3_input(buffer, gl_info, ps->input_reg_map, ps->baseShader.input_signature,
4248 &ps->baseShader.reg_maps, NULL, NULL);
4249 shader_addline(buffer, "}\n");
4250 } else {
4251 ERR("Unexpected vertex and pixel shader version condition: vs: %d, ps: %d\n", vs_major, ps_major);
4252 }
4253
4254 ret = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
4255 checkGLcall("glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB)");
4256 GL_EXTCALL(glShaderSourceARB(ret, 1, (const char**)&buffer->buffer, NULL));
4257 checkGLcall("glShaderSourceARB(ret, 1, &buffer->buffer, NULL)");
4258 GL_EXTCALL(glCompileShaderARB(ret));
4259 checkGLcall("glCompileShaderARB(ret)");
4260 shader_glsl_validate_compile_link(gl_info, ret, FALSE);
4261 return ret;
4262}
4263
4264#ifdef VBOX_WITH_VMSVGA
4265static GLhandleARB generate_passthrough_vshader(const struct wined3d_gl_info *gl_info)
4266{
4267 GLhandleARB ret = 0;
4268 static const char *passthrough_vshader[] =
4269 {
4270 "#version 120\n"
4271 "vec4 R0;\n"
4272 "void main(void)\n"
4273 "{\n"
4274 " R0 = gl_Vertex;\n"
4275 " R0.w = 1.0;\n"
4276 " R0.z = 0.0;\n"
4277 " gl_Position = gl_ModelViewProjectionMatrix * R0;\n"
4278 "}\n"
4279 };
4280
4281 ret = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
4282 checkGLcall("glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB)");
4283 GL_EXTCALL(glShaderSourceARB(ret, 1, passthrough_vshader, NULL));
4284 checkGLcall("glShaderSourceARB(ret, 1, passthrough_vshader, NULL)");
4285 GL_EXTCALL(glCompileShaderARB(ret));
4286 checkGLcall("glCompileShaderARB(ret)");
4287 shader_glsl_validate_compile_link(gl_info, ret, FALSE);
4288
4289 return ret;
4290}
4291
4292#endif
4293
4294/* GL locking is done by the caller */
4295static void hardcode_local_constants(IWineD3DBaseShaderImpl *shader, const struct wined3d_gl_info *gl_info,
4296 GLhandleARB programId, char prefix)
4297{
4298 const local_constant *lconst;
4299 GLint tmp_loc;
4300 const float *value;
4301 char glsl_name[8];
4302
4303 LIST_FOR_EACH_ENTRY(lconst, &shader->baseShader.constantsF, local_constant, entry) {
4304 value = (const float *)lconst->value;
4305 snprintf(glsl_name, sizeof(glsl_name), "%cLC%u", prefix, lconst->idx);
4306 tmp_loc = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4307 GL_EXTCALL(glUniform4fvARB(tmp_loc, 1, value));
4308 }
4309 checkGLcall("Hardcoding local constants");
4310}
4311
4312/* GL locking is done by the caller */
4313#ifdef VBOX_WITH_VMSVGA
4314static GLhandleARB shader_glsl_generate_pshader(const struct wined3d_context *context,
4315#else
4316static GLuint shader_glsl_generate_pshader(const struct wined3d_context *context,
4317#endif
4318 struct wined3d_shader_buffer *buffer, IWineD3DPixelShaderImpl *This,
4319 const struct ps_compile_args *args, struct ps_np2fixup_info *np2fixup_info)
4320{
4321 const struct shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
4322 const struct wined3d_gl_info *gl_info = context->gl_info;
4323 CONST DWORD *function = This->baseShader.function;
4324 struct shader_glsl_ctx_priv priv_ctx;
4325
4326 /* Create the hw GLSL shader object and assign it as the shader->prgId */
4327 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
4328
4329 memset(&priv_ctx, 0, sizeof(priv_ctx));
4330 priv_ctx.cur_ps_args = args;
4331 priv_ctx.cur_np2fixup_info = np2fixup_info;
4332
4333 shader_addline(buffer, "#version 120\n");
4334
4335 if (gl_info->supported[ARB_SHADER_TEXTURE_LOD] && reg_maps->usestexldd)
4336 {
4337 shader_addline(buffer, "#extension GL_ARB_shader_texture_lod : enable\n");
4338 }
4339 if (gl_info->supported[ARB_TEXTURE_RECTANGLE])
4340 {
4341 /* The spec says that it doesn't have to be explicitly enabled, but the nvidia
4342 * drivers write a warning if we don't do so
4343 */
4344 shader_addline(buffer, "#extension GL_ARB_texture_rectangle : enable\n");
4345 }
4346 if (gl_info->supported[EXT_GPU_SHADER4])
4347 {
4348 shader_addline(buffer, "#extension GL_EXT_gpu_shader4 : enable\n");
4349 }
4350
4351 /* Base Declarations */
4352 shader_generate_glsl_declarations(context, buffer, (IWineD3DBaseShader *)This, reg_maps, &priv_ctx);
4353
4354 /* Pack 3.0 inputs */
4355 if (reg_maps->shader_version.major >= 3 && args->vp_mode != vertexshader)
4356 {
4357 shader_glsl_input_pack((IWineD3DPixelShader *) This, buffer,
4358 This->baseShader.input_signature, reg_maps, args->vp_mode);
4359 }
4360
4361 /* Base Shader Body */
4362 shader_generate_main((IWineD3DBaseShader *)This, buffer, reg_maps, function, &priv_ctx);
4363
4364 /* Pixel shaders < 2.0 place the resulting color in R0 implicitly */
4365 if (reg_maps->shader_version.major < 2)
4366 {
4367 /* Some older cards like GeforceFX ones don't support multiple buffers, so also not gl_FragData */
4368 shader_addline(buffer, "gl_FragData[0] = R0;\n");
4369 }
4370
4371 if (args->srgb_correction)
4372 {
4373 shader_addline(buffer, "tmp0.xyz = pow(gl_FragData[0].xyz, vec3(srgb_const0.x));\n");
4374 shader_addline(buffer, "tmp0.xyz = tmp0.xyz * vec3(srgb_const0.y) - vec3(srgb_const0.z);\n");
4375 shader_addline(buffer, "tmp1.xyz = gl_FragData[0].xyz * vec3(srgb_const0.w);\n");
4376 shader_addline(buffer, "bvec3 srgb_compare = lessThan(gl_FragData[0].xyz, vec3(srgb_const1.x));\n");
4377 shader_addline(buffer, "gl_FragData[0].xyz = mix(tmp0.xyz, tmp1.xyz, vec3(srgb_compare));\n");
4378 shader_addline(buffer, "gl_FragData[0] = clamp(gl_FragData[0], 0.0, 1.0);\n");
4379 }
4380 /* Pixel shader < 3.0 do not replace the fog stage.
4381 * This implements linear fog computation and blending.
4382 * TODO: non linear fog
4383 * NOTE: gl_Fog.start and gl_Fog.end don't hold fog start s and end e but
4384 * -1/(e-s) and e/(e-s) respectively.
4385 */
4386 if (reg_maps->shader_version.major < 3)
4387 {
4388 switch(args->fog) {
4389 case FOG_OFF: break;
4390 case FOG_LINEAR:
4391 shader_addline(buffer, "float fogstart = -1.0 / (gl_Fog.end - gl_Fog.start);\n");
4392 shader_addline(buffer, "float fogend = gl_Fog.end * -fogstart;\n");
4393 shader_addline(buffer, "float Fog = clamp(gl_FogFragCoord * fogstart + fogend, 0.0, 1.0);\n");
4394 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
4395 break;
4396 case FOG_EXP:
4397 /* Fog = e^(-gl_Fog.density * gl_FogFragCoord) */
4398 shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_FogFragCoord);\n");
4399 shader_addline(buffer, "Fog = clamp(Fog, 0.0, 1.0);\n");
4400 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
4401 break;
4402 case FOG_EXP2:
4403 /* Fog = e^(-(gl_Fog.density * gl_FogFragCoord)^2) */
4404 shader_addline(buffer, "float Fog = exp(-gl_Fog.density * gl_Fog.density * gl_FogFragCoord * gl_FogFragCoord);\n");
4405 shader_addline(buffer, "Fog = clamp(Fog, 0.0, 1.0);\n");
4406 shader_addline(buffer, "gl_FragData[0].xyz = mix(gl_Fog.color.xyz, gl_FragData[0].xyz, Fog);\n");
4407 break;
4408 }
4409 }
4410
4411 shader_addline(buffer, "}\n");
4412
4413 TRACE("Compiling shader object %p\n", (void *)(uintptr_t)shader_obj);
4414 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer->buffer, NULL));
4415 GL_EXTCALL(glCompileShaderARB(shader_obj));
4416 shader_glsl_validate_compile_link(gl_info, shader_obj, FALSE);
4417
4418 /* Store the shader object */
4419 return shader_obj;
4420}
4421
4422/* GL locking is done by the caller */
4423#ifdef VBOX_WITH_VMSVGA
4424static GLhandleARB shader_glsl_generate_vshader(const struct wined3d_context *context,
4425#else
4426static GLuint shader_glsl_generate_vshader(const struct wined3d_context *context,
4427#endif
4428 struct wined3d_shader_buffer *buffer, IWineD3DVertexShaderImpl *This,
4429 const struct vs_compile_args *args)
4430{
4431 const struct shader_reg_maps *reg_maps = &This->baseShader.reg_maps;
4432 const struct wined3d_gl_info *gl_info = context->gl_info;
4433 CONST DWORD *function = This->baseShader.function;
4434 struct shader_glsl_ctx_priv priv_ctx;
4435
4436 /* Create the hw GLSL shader program and assign it as the shader->prgId */
4437 GLhandleARB shader_obj = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
4438
4439 shader_addline(buffer, "#version 120\n");
4440
4441 if (gl_info->supported[EXT_GPU_SHADER4])
4442 {
4443 shader_addline(buffer, "#extension GL_EXT_gpu_shader4 : enable\n");
4444 }
4445
4446 memset(&priv_ctx, 0, sizeof(priv_ctx));
4447 priv_ctx.cur_vs_args = args;
4448
4449 /* Base Declarations */
4450 shader_generate_glsl_declarations(context, buffer, (IWineD3DBaseShader *)This, reg_maps, &priv_ctx);
4451
4452 /* Base Shader Body */
4453 shader_generate_main((IWineD3DBaseShader*)This, buffer, reg_maps, function, &priv_ctx);
4454
4455 /* Unpack 3.0 outputs */
4456 if (reg_maps->shader_version.major >= 3) shader_addline(buffer, "order_ps_input(OUT);\n");
4457 else shader_addline(buffer, "order_ps_input();\n");
4458
4459 /* The D3DRS_FOGTABLEMODE render state defines if the shader-generated fog coord is used
4460 * or if the fragment depth is used. If the fragment depth is used(FOGTABLEMODE != NONE),
4461 * the fog frag coord is thrown away. If the fog frag coord is used, but not written by
4462 * the shader, it is set to 0.0(fully fogged, since start = 1.0, end = 0.0)
4463 */
4464 if(args->fog_src == VS_FOG_Z) {
4465 shader_addline(buffer, "gl_FogFragCoord = gl_Position.z;\n");
4466 } else if (!reg_maps->fog) {
4467 shader_addline(buffer, "gl_FogFragCoord = 0.0;\n");
4468 }
4469
4470 /* Write the final position.
4471 *
4472 * OpenGL coordinates specify the center of the pixel while d3d coords specify
4473 * the corner. The offsets are stored in z and w in posFixup. posFixup.y contains
4474 * 1.0 or -1.0 to turn the rendering upside down for offscreen rendering. PosFixup.x
4475 * contains 1.0 to allow a mad.
4476 */
4477 shader_addline(buffer, "gl_Position.y = gl_Position.y * posFixup.y;\n");
4478 shader_addline(buffer, "gl_Position.xy += posFixup.zw * gl_Position.ww;\n");
4479 if(args->clip_enabled) {
4480 shader_addline(buffer, "gl_ClipVertex = gl_Position;\n");
4481 }
4482
4483 /* Z coord [0;1]->[-1;1] mapping, see comment in transform_projection in state.c
4484 *
4485 * Basically we want (in homogeneous coordinates) z = z * 2 - 1. However, shaders are run
4486 * before the homogeneous divide, so we have to take the w into account: z = ((z / w) * 2 - 1) * w,
4487 * which is the same as z = z * 2 - w.
4488 */
4489 shader_addline(buffer, "gl_Position.z = gl_Position.z * 2.0 - gl_Position.w;\n");
4490
4491 shader_addline(buffer, "}\n");
4492
4493 TRACE("Compiling shader object %p\n", (void *)(uintptr_t)shader_obj);
4494 GL_EXTCALL(glShaderSourceARB(shader_obj, 1, (const char**)&buffer->buffer, NULL));
4495 GL_EXTCALL(glCompileShaderARB(shader_obj));
4496 shader_glsl_validate_compile_link(gl_info, shader_obj, FALSE);
4497
4498 return shader_obj;
4499}
4500
4501static GLhandleARB find_glsl_pshader(const struct wined3d_context *context,
4502 struct wined3d_shader_buffer *buffer, IWineD3DPixelShaderImpl *shader,
4503 const struct ps_compile_args *args,
4504 UINT *inp2fixup_info
4505 )
4506{
4507 UINT i;
4508 DWORD new_size;
4509 struct glsl_ps_compiled_shader *new_array;
4510 struct glsl_pshader_private *shader_data;
4511 struct ps_np2fixup_info *np2fixup = NULL;
4512 GLhandleARB ret;
4513
4514 if (!shader->baseShader.backend_data)
4515 {
4516 shader->baseShader.backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
4517 if (!shader->baseShader.backend_data)
4518 {
4519 ERR("Failed to allocate backend data.\n");
4520 return 0;
4521 }
4522 }
4523 shader_data = shader->baseShader.backend_data;
4524
4525 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
4526 * so a linear search is more performant than a hashmap or a binary search
4527 * (cache coherency etc)
4528 */
4529 for(i = 0; i < shader_data->num_gl_shaders; i++) {
4530 if(shader_data->gl_shaders[i].context==context
4531 && memcmp(&shader_data->gl_shaders[i].args, args, sizeof(*args)) == 0) {
4532 if(args->np2_fixup) {
4533 *inp2fixup_info = i;
4534 }
4535 return shader_data->gl_shaders[i].prgId;
4536 }
4537 }
4538
4539 TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader);
4540 if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
4541 if (shader_data->num_gl_shaders)
4542 {
4543 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
4544 new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders,
4545 new_size * sizeof(*shader_data->gl_shaders));
4546 } else {
4547 new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders));
4548 new_size = 1;
4549 }
4550
4551 if(!new_array) {
4552 ERR("Out of memory\n");
4553 return 0;
4554 }
4555 shader_data->gl_shaders = new_array;
4556 shader_data->shader_array_size = new_size;
4557 }
4558
4559 shader_data->gl_shaders[shader_data->num_gl_shaders].context = context;
4560 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
4561
4562 memset(&shader_data->gl_shaders[shader_data->num_gl_shaders].np2fixup, 0, sizeof(struct ps_np2fixup_info));
4563 if (args->np2_fixup) np2fixup = &shader_data->gl_shaders[shader_data->num_gl_shaders].np2fixup;
4564
4565 pixelshader_update_samplers(&shader->baseShader.reg_maps,
4566 ((IWineD3DDeviceImpl *)shader->baseShader.device)->stateBlock->textures);
4567
4568 shader_buffer_clear(buffer);
4569 ret = shader_glsl_generate_pshader(context, buffer, shader, args, np2fixup);
4570 *inp2fixup_info = shader_data->num_gl_shaders;
4571 shader_data->gl_shaders[shader_data->num_gl_shaders++].prgId = ret;
4572
4573 return ret;
4574}
4575
4576static inline BOOL vs_args_equal(const struct vs_compile_args *stored, const struct vs_compile_args *new,
4577 const DWORD use_map) {
4578 if((stored->swizzle_map & use_map) != new->swizzle_map) return FALSE;
4579 if((stored->clip_enabled) != new->clip_enabled) return FALSE;
4580 return stored->fog_src == new->fog_src;
4581}
4582
4583static GLhandleARB find_glsl_vshader(const struct wined3d_context *context,
4584 struct wined3d_shader_buffer *buffer, IWineD3DVertexShaderImpl *shader,
4585 const struct vs_compile_args *args)
4586{
4587 UINT i;
4588 DWORD new_size;
4589 struct glsl_vs_compiled_shader *new_array;
4590 DWORD use_map = ((IWineD3DDeviceImpl *)shader->baseShader.device)->strided_streams.use_map;
4591 struct glsl_vshader_private *shader_data;
4592 GLhandleARB ret;
4593
4594 if (!shader->baseShader.backend_data)
4595 {
4596 shader->baseShader.backend_data = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data));
4597 if (!shader->baseShader.backend_data)
4598 {
4599 ERR("Failed to allocate backend data.\n");
4600 return 0;
4601 }
4602 }
4603 shader_data = shader->baseShader.backend_data;
4604
4605 /* Usually we have very few GL shaders for each d3d shader(just 1 or maybe 2),
4606 * so a linear search is more performant than a hashmap or a binary search
4607 * (cache coherency etc)
4608 */
4609 for(i = 0; i < shader_data->num_gl_shaders; i++) {
4610 if(shader_data->gl_shaders[i].context==context
4611 && vs_args_equal(&shader_data->gl_shaders[i].args, args, use_map)) {
4612 return shader_data->gl_shaders[i].prgId;
4613 }
4614 }
4615
4616 TRACE("No matching GL shader found for shader %p, compiling a new shader.\n", shader);
4617
4618 if(shader_data->shader_array_size == shader_data->num_gl_shaders) {
4619 if (shader_data->num_gl_shaders)
4620 {
4621 new_size = shader_data->shader_array_size + max(1, shader_data->shader_array_size / 2);
4622 new_array = HeapReAlloc(GetProcessHeap(), 0, shader_data->gl_shaders,
4623 new_size * sizeof(*shader_data->gl_shaders));
4624 } else {
4625 new_array = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(*shader_data->gl_shaders));
4626 new_size = 1;
4627 }
4628
4629 if(!new_array) {
4630 ERR("Out of memory\n");
4631 return 0;
4632 }
4633 shader_data->gl_shaders = new_array;
4634 shader_data->shader_array_size = new_size;
4635 }
4636
4637 shader_data->gl_shaders[shader_data->num_gl_shaders].context = context;
4638 shader_data->gl_shaders[shader_data->num_gl_shaders].args = *args;
4639
4640 shader_buffer_clear(buffer);
4641 ret = shader_glsl_generate_vshader(context, buffer, shader, args);
4642 shader_data->gl_shaders[shader_data->num_gl_shaders++].prgId = ret;
4643
4644 return ret;
4645}
4646
4647/** Sets the GLSL program ID for the given pixel and vertex shader combination.
4648 * It sets the programId on the current StateBlock (because it should be called
4649 * inside of the DrawPrimitive() part of the render loop).
4650 *
4651 * If a program for the given combination does not exist, create one, and store
4652 * the program in the hash table. If it creates a program, it will link the
4653 * given objects, too.
4654 */
4655
4656/* GL locking is done by the caller */
4657static void set_glsl_shader_program(const struct wined3d_context *context,
4658 IWineD3DDeviceImpl *device, BOOL a_use_ps, BOOL a_use_vs)
4659{
4660 IWineD3DVertexShader *vshader = a_use_vs ? device->stateBlock->vertexShader : NULL;
4661 IWineD3DPixelShader *pshader = a_use_ps ? device->stateBlock->pixelShader : NULL;
4662 const struct wined3d_gl_info *gl_info = context->gl_info;
4663 struct shader_glsl_priv *priv = device->shader_priv;
4664 struct glsl_shader_prog_link *entry = NULL;
4665 GLhandleARB programId = 0;
4666 GLhandleARB reorder_shader_id = 0;
4667 unsigned int i;
4668 char glsl_name[8];
4669 struct ps_compile_args ps_compile_args;
4670 struct vs_compile_args vs_compile_args;
4671
4672 if (vshader) find_vs_compile_args((IWineD3DVertexShaderImpl *)vshader, device->stateBlock, &vs_compile_args);
4673 if (pshader) find_ps_compile_args((IWineD3DPixelShaderImpl *)pshader, device->stateBlock, &ps_compile_args);
4674
4675 entry = get_glsl_program_entry(priv, vshader, pshader, &vs_compile_args, &ps_compile_args, context);
4676 if (entry) {
4677 priv->glsl_program = entry;
4678 return;
4679 }
4680
4681 /* If we get to this point, then no matching program exists, so we create one */
4682 programId = GL_EXTCALL(glCreateProgramObjectARB());
4683 TRACE("Created new GLSL shader program %p\n", (void *)(uintptr_t)programId);
4684
4685 /* Create the entry */
4686 entry = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct glsl_shader_prog_link));
4687 entry->context = context;
4688 entry->programId = programId;
4689 entry->vshader = vshader;
4690 entry->pshader = pshader;
4691 entry->vs_args = vs_compile_args;
4692 entry->ps_args = ps_compile_args;
4693 entry->constant_version = 0;
4694 WINEFIXUPINFO_INIT(entry);
4695 /* Add the hash table entry */
4696 add_glsl_program_entry(priv, entry);
4697
4698 /* Set the current program */
4699 priv->glsl_program = entry;
4700
4701 /* Attach GLSL vshader */
4702 if (vshader)
4703 {
4704 GLhandleARB vshader_id = find_glsl_vshader(context, &priv->shader_buffer,
4705 (IWineD3DVertexShaderImpl *)vshader, &vs_compile_args);
4706 WORD map = ((IWineD3DBaseShaderImpl *)vshader)->baseShader.reg_maps.input_registers;
4707 char tmp_name[10];
4708
4709 reorder_shader_id = generate_param_reorder_function(&priv->shader_buffer, vshader, pshader, gl_info);
4710 TRACE("Attaching GLSL shader object %p to program %p\n", (void *)(uintptr_t)reorder_shader_id, (void *)(uintptr_t)programId);
4711 GL_EXTCALL(glAttachObjectARB(programId, reorder_shader_id));
4712 checkGLcall("glAttachObjectARB");
4713 /* Flag the reorder function for deletion, then it will be freed automatically when the program
4714 * is destroyed
4715 */
4716 GL_EXTCALL(glDeleteObjectARB(reorder_shader_id));
4717
4718 TRACE("Attaching GLSL shader object %p to program %p\n", (void *)(uintptr_t)vshader_id, (void *)(uintptr_t)programId);
4719 GL_EXTCALL(glAttachObjectARB(programId, vshader_id));
4720 checkGLcall("glAttachObjectARB");
4721
4722 /* Bind vertex attributes to a corresponding index number to match
4723 * the same index numbers as ARB_vertex_programs (makes loading
4724 * vertex attributes simpler). With this method, we can use the
4725 * exact same code to load the attributes later for both ARB and
4726 * GLSL shaders.
4727 *
4728 * We have to do this here because we need to know the Program ID
4729 * in order to make the bindings work, and it has to be done prior
4730 * to linking the GLSL program. */
4731 for (i = 0; map; map >>= 1, ++i)
4732 {
4733 if (!(map & 1)) continue;
4734
4735 snprintf(tmp_name, sizeof(tmp_name), "attrib%u", i);
4736 GL_EXTCALL(glBindAttribLocationARB(programId, i, tmp_name));
4737 }
4738 checkGLcall("glBindAttribLocationARB");
4739
4740 list_add_head(&((IWineD3DBaseShaderImpl *)vshader)->baseShader.linked_programs, &entry->vshader_entry);
4741 }
4742#ifdef VBOX_WITH_VMSVGA
4743 else
4744 if (device->strided_streams.position_transformed)
4745 {
4746 GLhandleARB passthrough_vshader_id;
4747
4748 passthrough_vshader_id = generate_passthrough_vshader(gl_info);
4749 TRACE("Attaching GLSL shader object %p to program %p\n", (void *)(uintptr_t)passthrough_vshader_id, (void *)(uintptr_t)programId);
4750 GL_EXTCALL(glAttachObjectARB(programId, passthrough_vshader_id));
4751 checkGLcall("glAttachObjectARB");
4752 /* Flag the reorder function for deletion, then it will be freed automatically when the program
4753 * is destroyed
4754 */
4755 GL_EXTCALL(glDeleteObjectARB(passthrough_vshader_id));
4756 }
4757#endif
4758
4759
4760 /* Attach GLSL pshader */
4761 if (pshader)
4762 {
4763 GLhandleARB pshader_id = find_glsl_pshader(context, &priv->shader_buffer,
4764 (IWineD3DPixelShaderImpl *)pshader, &ps_compile_args,
4765 &entry->inp2Fixup_info
4766 );
4767 TRACE("Attaching GLSL shader object %p to program %p\n", (void *)(uintptr_t)pshader_id, (void *)(uintptr_t)programId);
4768 GL_EXTCALL(glAttachObjectARB(programId, pshader_id));
4769 checkGLcall("glAttachObjectARB");
4770
4771 list_add_head(&((IWineD3DBaseShaderImpl *)pshader)->baseShader.linked_programs, &entry->pshader_entry);
4772 }
4773
4774 /* Link the program */
4775 TRACE("Linking GLSL shader program %p\n", (void *)(uintptr_t)programId);
4776 GL_EXTCALL(glLinkProgramARB(programId));
4777 shader_glsl_validate_compile_link(gl_info, programId, TRUE);
4778
4779 entry->vuniformF_locations = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
4780 sizeof(GLhandleARB) * gl_info->limits.glsl_vs_float_constants);
4781 for (i = 0; i < gl_info->limits.glsl_vs_float_constants; ++i)
4782 {
4783 snprintf(glsl_name, sizeof(glsl_name), "VC[%i]", i);
4784 entry->vuniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4785 }
4786 for (i = 0; i < MAX_CONST_I; ++i)
4787 {
4788 snprintf(glsl_name, sizeof(glsl_name), "VI[%i]", i);
4789 entry->vuniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4790 }
4791 entry->puniformF_locations = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY,
4792 sizeof(GLhandleARB) * gl_info->limits.glsl_ps_float_constants);
4793 for (i = 0; i < gl_info->limits.glsl_ps_float_constants; ++i)
4794 {
4795 snprintf(glsl_name, sizeof(glsl_name), "PC[%i]", i);
4796 entry->puniformF_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4797 }
4798 for (i = 0; i < MAX_CONST_I; ++i)
4799 {
4800 snprintf(glsl_name, sizeof(glsl_name), "PI[%i]", i);
4801 entry->puniformI_locations[i] = GL_EXTCALL(glGetUniformLocationARB(programId, glsl_name));
4802 }
4803
4804 if(pshader) {
4805 char name[32];
4806
4807 for(i = 0; i < MAX_TEXTURES; i++) {
4808 sprintf(name, "bumpenvmat%u", i);
4809 entry->bumpenvmat_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
4810 sprintf(name, "luminancescale%u", i);
4811 entry->luminancescale_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
4812 sprintf(name, "luminanceoffset%u", i);
4813 entry->luminanceoffset_location[i] = GL_EXTCALL(glGetUniformLocationARB(programId, name));
4814 }
4815
4816 if (ps_compile_args.np2_fixup) {
4817 if (WINEFIXUPINFO_ISVALID(entry)) {
4818 entry->np2Fixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "PsamplerNP2Fixup"));
4819 } else {
4820 FIXME("NP2 texcoord fixup needed for this pixelshader, but no fixup uniform found.\n");
4821 }
4822 }
4823 }
4824
4825 entry->posFixup_location = GL_EXTCALL(glGetUniformLocationARB(programId, "posFixup"));
4826 entry->ycorrection_location = GL_EXTCALL(glGetUniformLocationARB(programId, "ycorrection"));
4827 checkGLcall("Find glsl program uniform locations");
4828
4829 if (pshader
4830 && ((IWineD3DPixelShaderImpl *)pshader)->baseShader.reg_maps.shader_version.major >= 3
4831 && ((IWineD3DPixelShaderImpl *)pshader)->declared_in_count > vec4_varyings(3, gl_info))
4832 {
4833 TRACE("Shader %p needs vertex color clamping disabled\n", (void *)(uintptr_t)programId);
4834 entry->vertex_color_clamp = GL_FALSE;
4835 } else {
4836 entry->vertex_color_clamp = GL_FIXED_ONLY_ARB;
4837 }
4838
4839 /* Set the shader to allow uniform loading on it */
4840 GL_EXTCALL(glUseProgramObjectARB(programId));
4841 checkGLcall("glUseProgramObjectARB(programId)");
4842
4843#ifdef DEBUG_misha
4844 {
4845 GLint programIdTest = -1;
4846 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
4847 Assert(programIdTest == programId);
4848 }
4849#endif
4850
4851 /* Load the vertex and pixel samplers now. The function that finds the mappings makes sure
4852 * that it stays the same for each vertexshader-pixelshader pair(=linked glsl program). If
4853 * a pshader with fixed function pipeline is used there are no vertex samplers, and if a
4854 * vertex shader with fixed function pixel processing is used we make sure that the card
4855 * supports enough samplers to allow the max number of vertex samplers with all possible
4856 * fixed function fragment processing setups. So once the program is linked these samplers
4857 * won't change.
4858 */
4859 if (vshader) shader_glsl_load_vsamplers(gl_info, device->texUnitMap, programId);
4860 if (pshader) shader_glsl_load_psamplers(gl_info, device->texUnitMap, programId);
4861
4862 /* If the local constants do not have to be loaded with the environment constants,
4863 * load them now to have them hardcoded in the GLSL program. This saves some CPU cycles
4864 * later
4865 */
4866 if (pshader && !((IWineD3DBaseShaderImpl *)pshader)->baseShader.load_local_constsF)
4867 {
4868 hardcode_local_constants((IWineD3DBaseShaderImpl *) pshader, gl_info, programId, 'P');
4869 }
4870 if (vshader && !((IWineD3DBaseShaderImpl *)vshader)->baseShader.load_local_constsF)
4871 {
4872 hardcode_local_constants((IWineD3DBaseShaderImpl *) vshader, gl_info, programId, 'V');
4873 }
4874}
4875
4876/* GL locking is done by the caller */
4877static GLhandleARB create_glsl_blt_shader(const struct wined3d_gl_info *gl_info, enum tex_types tex_type)
4878{
4879 GLhandleARB program_id;
4880 GLhandleARB vshader_id, pshader_id;
4881 static const char *blt_vshader[] =
4882 {
4883 "#version 120\n"
4884 "void main(void)\n"
4885 "{\n"
4886 " gl_Position = gl_Vertex;\n"
4887 " gl_FrontColor = vec4(1.0);\n"
4888 " gl_TexCoord[0] = gl_MultiTexCoord0;\n"
4889 "}\n"
4890 };
4891
4892 static const char *blt_pshaders[tex_type_count] =
4893 {
4894 /* tex_1d */
4895 NULL,
4896 /* tex_2d */
4897 "#version 120\n"
4898 "uniform sampler2D sampler;\n"
4899 "void main(void)\n"
4900 "{\n"
4901 " gl_FragDepth = texture2D(sampler, gl_TexCoord[0].xy).x;\n"
4902 "}\n",
4903 /* tex_3d */
4904 NULL,
4905 /* tex_cube */
4906 "#version 120\n"
4907 "uniform samplerCube sampler;\n"
4908 "void main(void)\n"
4909 "{\n"
4910 " gl_FragDepth = textureCube(sampler, gl_TexCoord[0].xyz).x;\n"
4911 "}\n",
4912 /* tex_rect */
4913 "#version 120\n"
4914 "#extension GL_ARB_texture_rectangle : enable\n"
4915 "uniform sampler2DRect sampler;\n"
4916 "void main(void)\n"
4917 "{\n"
4918 " gl_FragDepth = texture2DRect(sampler, gl_TexCoord[0].xy).x;\n"
4919 "}\n",
4920 };
4921
4922 if (!blt_pshaders[tex_type])
4923 {
4924 FIXME("tex_type %#x not supported\n", tex_type);
4925 tex_type = tex_2d;
4926 }
4927
4928 vshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB));
4929 GL_EXTCALL(glShaderSourceARB(vshader_id, 1, blt_vshader, NULL));
4930 GL_EXTCALL(glCompileShaderARB(vshader_id));
4931 shader_glsl_validate_compile_link(gl_info, vshader_id, FALSE);
4932
4933 pshader_id = GL_EXTCALL(glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB));
4934 GL_EXTCALL(glShaderSourceARB(pshader_id, 1, &blt_pshaders[tex_type], NULL));
4935 GL_EXTCALL(glCompileShaderARB(pshader_id));
4936
4937 shader_glsl_validate_compile_link(gl_info, vshader_id, FALSE);
4938
4939 program_id = GL_EXTCALL(glCreateProgramObjectARB());
4940 GL_EXTCALL(glAttachObjectARB(program_id, vshader_id));
4941 GL_EXTCALL(glAttachObjectARB(program_id, pshader_id));
4942 GL_EXTCALL(glLinkProgramARB(program_id));
4943 shader_glsl_validate_compile_link(gl_info, program_id, TRUE);
4944
4945 /* Once linked we can mark the shaders for deletion. They will be deleted once the program
4946 * is destroyed
4947 */
4948 GL_EXTCALL(glDeleteObjectARB(vshader_id));
4949 GL_EXTCALL(glDeleteObjectARB(pshader_id));
4950 return program_id;
4951}
4952
4953/* GL locking is done by the caller */
4954static void shader_glsl_select(const struct wined3d_context *context, BOOL usePS, BOOL useVS)
4955{
4956 const struct wined3d_gl_info *gl_info = context->gl_info;
4957 IWineD3DDeviceImpl *device = context_get_device(context);
4958 struct shader_glsl_priv *priv = device->shader_priv;
4959 GLhandleARB program_id = 0;
4960 GLenum old_vertex_color_clamp, current_vertex_color_clamp;
4961
4962 old_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB;
4963
4964 if (useVS || usePS) set_glsl_shader_program(context, device, usePS, useVS);
4965 else priv->glsl_program = NULL;
4966
4967 current_vertex_color_clamp = priv->glsl_program ? priv->glsl_program->vertex_color_clamp : GL_FIXED_ONLY_ARB;
4968
4969 if (old_vertex_color_clamp != current_vertex_color_clamp)
4970 {
4971 if (gl_info->supported[ARB_COLOR_BUFFER_FLOAT])
4972 {
4973 GL_EXTCALL(glClampColorARB(GL_CLAMP_VERTEX_COLOR_ARB, current_vertex_color_clamp));
4974 checkGLcall("glClampColorARB");
4975 }
4976 else
4977 {
4978 FIXME("vertex color clamp needs to be changed, but extension not supported.\n");
4979 }
4980 }
4981
4982 program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
4983 if (program_id) TRACE("Using GLSL program %p\n", (void *)(uintptr_t)program_id);
4984 GL_EXTCALL(glUseProgramObjectARB(program_id));
4985 checkGLcall("glUseProgramObjectARB");
4986#ifdef DEBUG_misha
4987 {
4988 GLint programIdTest = -1;
4989 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
4990 Assert(programIdTest == program_id);
4991 }
4992#endif
4993
4994 /* In case that NP2 texcoord fixup data is found for the selected program, trigger a reload of the
4995 * constants. This has to be done because it can't be guaranteed that sampler() (from state.c) is
4996 * called between selecting the shader and using it, which results in wrong fixup for some frames. */
4997 if (priv->glsl_program && WINEFIXUPINFO_ISVALID(priv->glsl_program))
4998 {
4999 shader_glsl_load_np2fixup_constants((IWineD3DDevice *)device, usePS, useVS);
5000 }
5001}
5002
5003/* GL locking is done by the caller */
5004static void shader_glsl_select_depth_blt(IWineD3DDevice *iface, enum tex_types tex_type) {
5005 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
5006 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
5007 struct shader_glsl_priv *priv = This->shader_priv;
5008 GLhandleARB *blt_program = &priv->depth_blt_program[tex_type];
5009
5010 if (!*blt_program) {
5011 GLint loc;
5012 *blt_program = create_glsl_blt_shader(gl_info, tex_type);
5013 loc = GL_EXTCALL(glGetUniformLocationARB(*blt_program, "sampler"));
5014 GL_EXTCALL(glUseProgramObjectARB(*blt_program));
5015#ifdef DEBUG_misha
5016 {
5017 GLint programIdTest = -1;
5018 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
5019 Assert(programIdTest == *blt_program);
5020 }
5021#endif
5022 GL_EXTCALL(glUniform1iARB(loc, 0));
5023 } else {
5024 GL_EXTCALL(glUseProgramObjectARB(*blt_program));
5025#ifdef DEBUG_misha
5026 {
5027 GLint programIdTest = -1;
5028 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
5029 Assert(programIdTest == *blt_program);
5030 }
5031#endif
5032 }
5033}
5034
5035/* GL locking is done by the caller */
5036static void shader_glsl_deselect_depth_blt(IWineD3DDevice *iface) {
5037 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
5038 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
5039 struct shader_glsl_priv *priv = This->shader_priv;
5040 GLhandleARB program_id;
5041
5042 program_id = priv->glsl_program ? priv->glsl_program->programId : 0;
5043 if (program_id) TRACE("Using GLSL program %p\n", (void *)(uintptr_t)program_id);
5044
5045 GL_EXTCALL(glUseProgramObjectARB(program_id));
5046 checkGLcall("glUseProgramObjectARB");
5047#ifdef DEBUG_misha
5048 {
5049 GLint programIdTest = -1;
5050 glGetIntegerv(GL_CURRENT_PROGRAM, &programIdTest);
5051 Assert(programIdTest == program_id);
5052 }
5053#endif
5054}
5055
5056static void shader_glsl_destroy(IWineD3DBaseShader *iface) {
5057 const struct list *linked_programs;
5058 IWineD3DBaseShaderImpl *This = (IWineD3DBaseShaderImpl *) iface;
5059 IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *)This->baseShader.device;
5060 struct shader_glsl_priv *priv = device->shader_priv;
5061 const struct wined3d_gl_info *gl_info;
5062 struct wined3d_context *context;
5063
5064 /* Note: Do not use QueryInterface here to find out which shader type this is because this code
5065 * can be called from IWineD3DBaseShader::Release
5066 */
5067 char pshader = shader_is_pshader_version(This->baseShader.reg_maps.shader_version.type);
5068
5069 if(pshader) {
5070 struct glsl_pshader_private *shader_data;
5071 shader_data = This->baseShader.backend_data;
5072 if(!shader_data || shader_data->num_gl_shaders == 0)
5073 {
5074 HeapFree(GetProcessHeap(), 0, shader_data);
5075 This->baseShader.backend_data = NULL;
5076 return;
5077 }
5078
5079 context = context_acquire(device, NULL, CTXUSAGE_RESOURCELOAD);
5080 gl_info = context->gl_info;
5081
5082 if (priv->glsl_program && (IWineD3DBaseShader *)priv->glsl_program->pshader == iface)
5083 {
5084 ENTER_GL();
5085 shader_glsl_select(context, FALSE, FALSE);
5086 LEAVE_GL();
5087 }
5088 } else {
5089 struct glsl_vshader_private *shader_data;
5090 shader_data = This->baseShader.backend_data;
5091 if(!shader_data || shader_data->num_gl_shaders == 0)
5092 {
5093 HeapFree(GetProcessHeap(), 0, shader_data);
5094 This->baseShader.backend_data = NULL;
5095 return;
5096 }
5097
5098 context = context_acquire(device, NULL, CTXUSAGE_RESOURCELOAD);
5099 gl_info = context->gl_info;
5100
5101 if (priv->glsl_program && (IWineD3DBaseShader *)priv->glsl_program->vshader == iface)
5102 {
5103 ENTER_GL();
5104 shader_glsl_select(context, FALSE, FALSE);
5105 LEAVE_GL();
5106 }
5107 }
5108
5109 linked_programs = &This->baseShader.linked_programs;
5110
5111 TRACE("Deleting linked programs\n");
5112 if (linked_programs->next) {
5113 struct glsl_shader_prog_link *entry, *entry2;
5114
5115 ENTER_GL();
5116 if(pshader) {
5117 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, pshader_entry) {
5118 delete_glsl_program_entry(priv, gl_info, entry);
5119 }
5120 } else {
5121 LIST_FOR_EACH_ENTRY_SAFE(entry, entry2, linked_programs, struct glsl_shader_prog_link, vshader_entry) {
5122 delete_glsl_program_entry(priv, gl_info, entry);
5123 }
5124 }
5125 LEAVE_GL();
5126 }
5127
5128 if(pshader) {
5129 UINT i;
5130 struct glsl_pshader_private *shader_data = This->baseShader.backend_data;
5131
5132 ENTER_GL();
5133 for(i = 0; i < shader_data->num_gl_shaders; i++) {
5134 if (shader_data->gl_shaders[i].context==context_get_current())
5135 {
5136 TRACE("deleting pshader %p\n", (void *)(uintptr_t)shader_data->gl_shaders[i].prgId);
5137 GL_EXTCALL(glDeleteObjectARB(shader_data->gl_shaders[i].prgId));
5138 checkGLcall("glDeleteObjectARB");
5139 }
5140 else
5141 {
5142 WARN("Attempting to delete pshader %p created in ctx %p from ctx %p\n",
5143 (void *)(uintptr_t)shader_data->gl_shaders[i].prgId, shader_data->gl_shaders[i].context, context_get_current());
5144 }
5145 }
5146 LEAVE_GL();
5147 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
5148 }
5149 else
5150 {
5151 UINT i;
5152 struct glsl_vshader_private *shader_data = This->baseShader.backend_data;
5153
5154 ENTER_GL();
5155 for(i = 0; i < shader_data->num_gl_shaders; i++) {
5156 if (shader_data->gl_shaders[i].context==context_get_current())
5157 {
5158 TRACE("deleting vshader %p\n", (void *)(uintptr_t)shader_data->gl_shaders[i].prgId);
5159 GL_EXTCALL(glDeleteObjectARB(shader_data->gl_shaders[i].prgId));
5160 checkGLcall("glDeleteObjectARB");
5161 }
5162 else
5163 {
5164 WARN("Attempting to delete vshader %p created in ctx %p from ctx %p\n",
5165 (void *)(uintptr_t)shader_data->gl_shaders[i].prgId, shader_data->gl_shaders[i].context, context_get_current());
5166 }
5167 }
5168 LEAVE_GL();
5169 HeapFree(GetProcessHeap(), 0, shader_data->gl_shaders);
5170 }
5171
5172 HeapFree(GetProcessHeap(), 0, This->baseShader.backend_data);
5173 This->baseShader.backend_data = NULL;
5174
5175 context_release(context);
5176}
5177
5178static int glsl_program_key_compare(const void *key, const struct wine_rb_entry *entry)
5179{
5180 const glsl_program_key_t *k = key;
5181 const struct glsl_shader_prog_link *prog = WINE_RB_ENTRY_VALUE(entry,
5182 const struct glsl_shader_prog_link, program_lookup_entry);
5183 int cmp;
5184
5185 if (k->context > prog->context) return 1;
5186 else if (k->context < prog->context) return -1;
5187
5188 if (k->vshader > prog->vshader) return 1;
5189 else if (k->vshader < prog->vshader) return -1;
5190
5191 if (k->pshader > prog->pshader) return 1;
5192 else if (k->pshader < prog->pshader) return -1;
5193
5194 if (k->vshader && (cmp = memcmp(&k->vs_args, &prog->vs_args, sizeof(prog->vs_args)))) return cmp;
5195 if (k->pshader && (cmp = memcmp(&k->ps_args, &prog->ps_args, sizeof(prog->ps_args)))) return cmp;
5196
5197 return 0;
5198}
5199
5200static BOOL constant_heap_init(struct constant_heap *heap, unsigned int constant_count)
5201{
5202#ifndef VBOX
5203 SIZE_T size = (constant_count + 1) * sizeof(*heap->entries) + constant_count * sizeof(*heap->positions);
5204 void *mem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
5205#else
5206 SIZE_T size;
5207 void *mem;
5208
5209 /* Don't trash the heap if the input is bogus. */
5210 if (constant_count == 0)
5211 constant_count = 1;
5212
5213 size = (constant_count + 1) * sizeof(*heap->entries) + constant_count * sizeof(*heap->positions);
5214 mem = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, size);
5215#endif
5216
5217 if (!mem)
5218 {
5219 ERR("Failed to allocate memory\n");
5220 return FALSE;
5221 }
5222
5223 heap->entries = mem;
5224 heap->entries[1].version = 0;
5225 heap->positions = (unsigned int *)(heap->entries + constant_count + 1);
5226 heap->size = 1;
5227
5228 return TRUE;
5229}
5230
5231static void constant_heap_free(struct constant_heap *heap)
5232{
5233 HeapFree(GetProcessHeap(), 0, heap->entries);
5234}
5235
5236static const struct wine_rb_functions wined3d_glsl_program_rb_functions =
5237{
5238 wined3d_rb_alloc,
5239 wined3d_rb_realloc,
5240 wined3d_rb_free,
5241 glsl_program_key_compare,
5242};
5243
5244static HRESULT shader_glsl_alloc(IWineD3DDevice *iface) {
5245 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
5246 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
5247 struct shader_glsl_priv *priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct shader_glsl_priv));
5248 SIZE_T stack_size = wined3d_log2i(max(gl_info->limits.glsl_vs_float_constants,
5249 gl_info->limits.glsl_ps_float_constants)) + 1;
5250
5251 if (!shader_buffer_init(&priv->shader_buffer))
5252 {
5253 ERR("Failed to initialize shader buffer.\n");
5254 goto fail;
5255 }
5256
5257 priv->stack = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, stack_size * sizeof(*priv->stack));
5258 if (!priv->stack)
5259 {
5260 ERR("Failed to allocate memory.\n");
5261 goto fail;
5262 }
5263 if (!constant_heap_init(&priv->vconst_heap, gl_info->limits.glsl_vs_float_constants))
5264 {
5265 ERR("Failed to initialize vertex shader constant heap\n");
5266 goto fail;
5267 }
5268 if (!constant_heap_init(&priv->pconst_heap, gl_info->limits.glsl_ps_float_constants))
5269 {
5270 ERR("Failed to initialize pixel shader constant heap\n");
5271 goto fail;
5272 }
5273
5274 if (wine_rb_init(&priv->program_lookup, &wined3d_glsl_program_rb_functions) == -1)
5275 {
5276 ERR("Failed to initialize rbtree.\n");
5277 goto fail;
5278 }
5279
5280 priv->next_constant_version = 1;
5281
5282 This->shader_priv = priv;
5283 return WINED3D_OK;
5284
5285fail:
5286 constant_heap_free(&priv->pconst_heap);
5287 constant_heap_free(&priv->vconst_heap);
5288 HeapFree(GetProcessHeap(), 0, priv->stack);
5289 shader_buffer_free(&priv->shader_buffer);
5290 HeapFree(GetProcessHeap(), 0, priv);
5291 return E_OUTOFMEMORY;
5292}
5293
5294/* Context activation is done by the caller. */
5295static void shader_glsl_free(IWineD3DDevice *iface) {
5296 IWineD3DDeviceImpl *This = (IWineD3DDeviceImpl *)iface;
5297 const struct wined3d_gl_info *gl_info = &This->adapter->gl_info;
5298 struct shader_glsl_priv *priv = This->shader_priv;
5299 int i;
5300
5301 ENTER_GL();
5302 for (i = 0; i < tex_type_count; ++i)
5303 {
5304 if (priv->depth_blt_program[i])
5305 {
5306 GL_EXTCALL(glDeleteObjectARB(priv->depth_blt_program[i]));
5307 }
5308 }
5309 LEAVE_GL();
5310
5311 wine_rb_destroy(&priv->program_lookup, NULL, NULL);
5312 constant_heap_free(&priv->pconst_heap);
5313 constant_heap_free(&priv->vconst_heap);
5314 HeapFree(GetProcessHeap(), 0, priv->stack);
5315 shader_buffer_free(&priv->shader_buffer);
5316
5317 HeapFree(GetProcessHeap(), 0, This->shader_priv);
5318 This->shader_priv = NULL;
5319}
5320
5321static BOOL shader_glsl_dirty_const(IWineD3DDevice *iface) {
5322 /* TODO: GL_EXT_bindable_uniform can be used to share constants across shaders */
5323 return FALSE;
5324}
5325
5326static void shader_glsl_get_caps(const struct wined3d_gl_info *gl_info, struct shader_caps *pCaps)
5327{
5328 /* Nvidia Geforce6/7 or Ati R4xx/R5xx cards with GLSL support, support VS 3.0 but older Nvidia/Ati
5329 * models with GLSL support only support 2.0. In case of nvidia we can detect VS 2.0 support based
5330 * on the version of NV_vertex_program.
5331 * For Ati cards there's no way using glsl (it abstracts the lowlevel info away) and also not
5332 * using ARB_vertex_program. It is safe to assume that when a card supports pixel shader 2.0 it
5333 * supports vertex shader 2.0 too and the way around. We can detect ps2.0 using the maximum number
5334 * of native instructions, so use that here. For more info see the pixel shader versioning code below.
5335 */
5336 if ((gl_info->supported[NV_VERTEX_PROGRAM2] && !gl_info->supported[NV_VERTEX_PROGRAM3])
5337 || gl_info->limits.arb_ps_instructions <= 512
5338 || gl_info->limits.glsl_vs_float_constants < 256)
5339 pCaps->VertexShaderVersion = WINED3DVS_VERSION(2,0);
5340 else
5341 pCaps->VertexShaderVersion = WINED3DVS_VERSION(3,0);
5342 TRACE_(d3d_caps)("Hardware vertex shader version %d.%d enabled (GLSL)\n", (pCaps->VertexShaderVersion >> 8) & 0xff, pCaps->VertexShaderVersion & 0xff);
5343 pCaps->MaxVertexShaderConst = gl_info->limits.glsl_vs_float_constants;
5344
5345 /* Older DX9-class videocards (GeforceFX / Radeon >9500/X*00) only support pixel shader 2.0/2.0a/2.0b.
5346 * In OpenGL the extensions related to GLSL abstract lowlevel GL info away which is needed
5347 * to distinguish between 2.0 and 3.0 (and 2.0a/2.0b). In case of Nvidia we use their fragment
5348 * program extensions. On other hardware including ATI GL_ARB_fragment_program offers the info
5349 * in max native instructions. Intel and others also offer the info in this extension but they
5350 * don't support GLSL (at least on Windows).
5351 *
5352 * PS2.0 requires at least 96 instructions, 2.0a/2.0b go up to 512. Assume that if the number
5353 * of instructions is 512 or less we have to do with ps2.0 hardware.
5354 * NOTE: ps3.0 hardware requires 512 or more instructions but ati and nvidia offer 'enough' (1024 vs 4096) on their most basic ps3.0 hardware.
5355 */
5356 if ((gl_info->supported[NV_FRAGMENT_PROGRAM] && !gl_info->supported[NV_FRAGMENT_PROGRAM2])
5357 || gl_info->limits.arb_ps_instructions <= 512
5358 || gl_info->limits.glsl_vs_float_constants < 256)
5359 pCaps->PixelShaderVersion = WINED3DPS_VERSION(2,0);
5360 else
5361 pCaps->PixelShaderVersion = WINED3DPS_VERSION(3,0);
5362
5363 pCaps->MaxPixelShaderConst = gl_info->limits.glsl_ps_float_constants;
5364
5365 /* FIXME: The following line is card dependent. -8.0 to 8.0 is the
5366 * Direct3D minimum requirement.
5367 *
5368 * Both GL_ARB_fragment_program and GLSL require a "maximum representable magnitude"
5369 * of colors to be 2^10, and 2^32 for other floats. Should we use 1024 here?
5370 *
5371 * The problem is that the refrast clamps temporary results in the shader to
5372 * [-MaxValue;+MaxValue]. If the card's max value is bigger than the one we advertize here,
5373 * then applications may miss the clamping behavior. On the other hand, if it is smaller,
5374 * the shader will generate incorrect results too. Unfortunately, GL deliberately doesn't
5375 * offer a way to query this.
5376 */
5377 pCaps->PixelShader1xMaxValue = 8.0;
5378 TRACE_(d3d_caps)("Hardware pixel shader version %d.%d enabled (GLSL)\n", (pCaps->PixelShaderVersion >> 8) & 0xff, pCaps->PixelShaderVersion & 0xff);
5379
5380 pCaps->VSClipping = TRUE;
5381}
5382
5383static BOOL shader_glsl_color_fixup_supported(struct color_fixup_desc fixup)
5384{
5385 if (TRACE_ON(d3d_shader) && TRACE_ON(d3d))
5386 {
5387 TRACE("Checking support for fixup:\n");
5388 dump_color_fixup_desc(fixup);
5389 }
5390
5391 /* We support everything except YUV conversions. */
5392 if (!is_complex_fixup(fixup))
5393 {
5394 TRACE("[OK]\n");
5395 return TRUE;
5396 }
5397
5398 TRACE("[FAILED]\n");
5399 return FALSE;
5400}
5401
5402static const SHADER_HANDLER shader_glsl_instruction_handler_table[WINED3DSIH_TABLE_SIZE] =
5403{
5404 /* WINED3DSIH_ABS */ shader_glsl_map2gl,
5405 /* WINED3DSIH_ADD */ shader_glsl_arith,
5406 /* WINED3DSIH_BEM */ shader_glsl_bem,
5407 /* WINED3DSIH_BREAK */ shader_glsl_break,
5408 /* WINED3DSIH_BREAKC */ shader_glsl_breakc,
5409 /* WINED3DSIH_BREAKP */ NULL,
5410 /* WINED3DSIH_CALL */ shader_glsl_call,
5411 /* WINED3DSIH_CALLNZ */ shader_glsl_callnz,
5412 /* WINED3DSIH_CMP */ shader_glsl_cmp,
5413 /* WINED3DSIH_CND */ shader_glsl_cnd,
5414 /* WINED3DSIH_CRS */ shader_glsl_cross,
5415 /* WINED3DSIH_CUT */ NULL,
5416 /* WINED3DSIH_DCL */ NULL,
5417 /* WINED3DSIH_DEF */ NULL,
5418 /* WINED3DSIH_DEFB */ NULL,
5419 /* WINED3DSIH_DEFI */ NULL,
5420 /* WINED3DSIH_DP2ADD */ shader_glsl_dp2add,
5421 /* WINED3DSIH_DP3 */ shader_glsl_dot,
5422 /* WINED3DSIH_DP4 */ shader_glsl_dot,
5423 /* WINED3DSIH_DST */ shader_glsl_dst,
5424 /* WINED3DSIH_DSX */ shader_glsl_map2gl,
5425 /* WINED3DSIH_DSY */ shader_glsl_map2gl,
5426 /* WINED3DSIH_ELSE */ shader_glsl_else,
5427 /* WINED3DSIH_EMIT */ NULL,
5428 /* WINED3DSIH_ENDIF */ shader_glsl_end,
5429 /* WINED3DSIH_ENDLOOP */ shader_glsl_end,
5430 /* WINED3DSIH_ENDREP */ shader_glsl_end,
5431 /* WINED3DSIH_EXP */ shader_glsl_map2gl,
5432 /* WINED3DSIH_EXPP */ shader_glsl_expp,
5433 /* WINED3DSIH_FRC */ shader_glsl_map2gl,
5434 /* WINED3DSIH_IADD */ NULL,
5435 /* WINED3DSIH_IF */ shader_glsl_if,
5436 /* WINED3DSIH_IFC */ shader_glsl_ifc,
5437 /* WINED3DSIH_IGE */ NULL,
5438 /* WINED3DSIH_LABEL */ shader_glsl_label,
5439 /* WINED3DSIH_LIT */ shader_glsl_lit,
5440 /* WINED3DSIH_LOG */ shader_glsl_log,
5441 /* WINED3DSIH_LOGP */ shader_glsl_log,
5442 /* WINED3DSIH_LOOP */ shader_glsl_loop,
5443 /* WINED3DSIH_LRP */ shader_glsl_lrp,
5444 /* WINED3DSIH_LT */ NULL,
5445 /* WINED3DSIH_M3x2 */ shader_glsl_mnxn,
5446 /* WINED3DSIH_M3x3 */ shader_glsl_mnxn,
5447 /* WINED3DSIH_M3x4 */ shader_glsl_mnxn,
5448 /* WINED3DSIH_M4x3 */ shader_glsl_mnxn,
5449 /* WINED3DSIH_M4x4 */ shader_glsl_mnxn,
5450 /* WINED3DSIH_MAD */ shader_glsl_mad,
5451 /* WINED3DSIH_MAX */ shader_glsl_map2gl,
5452 /* WINED3DSIH_MIN */ shader_glsl_map2gl,
5453 /* WINED3DSIH_MOV */ shader_glsl_mov,
5454 /* WINED3DSIH_MOVA */ shader_glsl_mov,
5455 /* WINED3DSIH_MUL */ shader_glsl_arith,
5456 /* WINED3DSIH_NOP */ NULL,
5457 /* WINED3DSIH_NRM */ shader_glsl_nrm,
5458 /* WINED3DSIH_PHASE */ NULL,
5459 /* WINED3DSIH_POW */ shader_glsl_pow,
5460 /* WINED3DSIH_RCP */ shader_glsl_rcp,
5461 /* WINED3DSIH_REP */ shader_glsl_rep,
5462 /* WINED3DSIH_RET */ shader_glsl_ret,
5463 /* WINED3DSIH_RSQ */ shader_glsl_rsq,
5464#ifdef VBOX_WITH_VMSVGA
5465 /* WINED3DSIH_SETP */ shader_glsl_setp,
5466#else
5467 /* WINED3DSIH_SETP */ NULL,
5468#endif
5469 /* WINED3DSIH_SGE */ shader_glsl_compare,
5470 /* WINED3DSIH_SGN */ shader_glsl_sgn,
5471 /* WINED3DSIH_SINCOS */ shader_glsl_sincos,
5472 /* WINED3DSIH_SLT */ shader_glsl_compare,
5473 /* WINED3DSIH_SUB */ shader_glsl_arith,
5474 /* WINED3DSIH_TEX */ shader_glsl_tex,
5475 /* WINED3DSIH_TEXBEM */ shader_glsl_texbem,
5476 /* WINED3DSIH_TEXBEML */ shader_glsl_texbem,
5477 /* WINED3DSIH_TEXCOORD */ shader_glsl_texcoord,
5478 /* WINED3DSIH_TEXDEPTH */ shader_glsl_texdepth,
5479 /* WINED3DSIH_TEXDP3 */ shader_glsl_texdp3,
5480 /* WINED3DSIH_TEXDP3TEX */ shader_glsl_texdp3tex,
5481 /* WINED3DSIH_TEXKILL */ shader_glsl_texkill,
5482 /* WINED3DSIH_TEXLDD */ shader_glsl_texldd,
5483 /* WINED3DSIH_TEXLDL */ shader_glsl_texldl,
5484 /* WINED3DSIH_TEXM3x2DEPTH */ shader_glsl_texm3x2depth,
5485 /* WINED3DSIH_TEXM3x2PAD */ shader_glsl_texm3x2pad,
5486 /* WINED3DSIH_TEXM3x2TEX */ shader_glsl_texm3x2tex,
5487 /* WINED3DSIH_TEXM3x3 */ shader_glsl_texm3x3,
5488 /* WINED3DSIH_TEXM3x3DIFF */ NULL,
5489 /* WINED3DSIH_TEXM3x3PAD */ shader_glsl_texm3x3pad,
5490 /* WINED3DSIH_TEXM3x3SPEC */ shader_glsl_texm3x3spec,
5491 /* WINED3DSIH_TEXM3x3TEX */ shader_glsl_texm3x3tex,
5492 /* WINED3DSIH_TEXM3x3VSPEC */ shader_glsl_texm3x3vspec,
5493 /* WINED3DSIH_TEXREG2AR */ shader_glsl_texreg2ar,
5494 /* WINED3DSIH_TEXREG2GB */ shader_glsl_texreg2gb,
5495 /* WINED3DSIH_TEXREG2RGB */ shader_glsl_texreg2rgb,
5496};
5497
5498static void shader_glsl_handle_instruction(const struct wined3d_shader_instruction *ins) {
5499 SHADER_HANDLER hw_fct;
5500
5501 /* Select handler */
5502 hw_fct = shader_glsl_instruction_handler_table[ins->handler_idx];
5503
5504 /* Unhandled opcode */
5505 if (!hw_fct)
5506 {
5507 FIXME("Backend can't handle opcode %#x\n", ins->handler_idx);
5508 return;
5509 }
5510 hw_fct(ins);
5511
5512 shader_glsl_add_instruction_modifiers(ins);
5513}
5514
5515const shader_backend_t glsl_shader_backend = {
5516 shader_glsl_handle_instruction,
5517 shader_glsl_select,
5518 shader_glsl_select_depth_blt,
5519 shader_glsl_deselect_depth_blt,
5520 shader_glsl_update_float_vertex_constants,
5521 shader_glsl_update_float_pixel_constants,
5522 shader_glsl_load_constants,
5523 shader_glsl_load_np2fixup_constants,
5524 shader_glsl_destroy,
5525 shader_glsl_alloc,
5526 shader_glsl_free,
5527 shader_glsl_dirty_const,
5528 shader_glsl_get_caps,
5529 shader_glsl_color_fixup_supported,
5530};
5531
5532#if defined(VBOXWINEDBG_SHADERS) || defined(VBOX_WINE_WITH_PROFILE)
5533void vboxWDbgPrintF(char * szString, ...)
5534{
5535 char szBuffer[4096*2] = {0};
5536 va_list pArgList;
5537 va_start(pArgList, szString);
5538 _vsnprintf(szBuffer, sizeof(szBuffer) / sizeof(szBuffer[0]), szString, pArgList);
5539 va_end(pArgList);
5540
5541 OutputDebugStringA(szBuffer);
5542}
5543#endif
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette