28{
30
31 {
33
34 vector unsigned char vinvert,vperm1,vperm2,vperm3,vperm4;
36 vector float vbs1,vbs2,vbs3,vbs4,vbs5,vbs6,vbs7,vbs8;
37 vector float vbs9,vbs10,vbs11,vbs12,vbs13,vbs14,vbs15,vbs16;
41
42 vzero = vec_xor(vzero,vzero);
43#ifdef __APPLE__
44 vinvert = (
vector unsigned char)(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3);
45#else
46 vinvert = (
vector unsigned char){12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3};
47#endif
48 vperm1 = vec_lvsl(0,
b1);
49 vperm2 = vec_perm(vperm1,vperm1,vinvert);
50
55 v5 = vec_perm(
v1,
v2,vperm1);
56 v6 = vec_perm(
v3,v4,vperm2);
57
58 vbs1 = vec_add(v5,v6);
59 vbs8 = vec_sub(v5,v6);
60
63 v5 = vec_perm(
v2,
v1,vperm1);
64 v6 = vec_perm(v4,
v3,vperm2);
65
66 vbs2 = vec_add(v5,v6);
67 vbs7 = vec_sub(v5,v6);
68
71 v5 = vec_perm(
v1,
v2,vperm1);
72 v6 = vec_perm(
v3,v4,vperm2);
73
74 vbs3 = vec_add(v5,v6);
75 vbs6 = vec_sub(v5,v6);
76
78 v5 = vec_perm(
v2,
v1,vperm1);
79 v6 = vec_perm(
v1,
v3,vperm2);
80
81 vbs4 = vec_add(v5,v6);
82 vbs5 = vec_sub(v5,v6);
83
84 v1 = vec_ld(0,costab);
85 vbs8 = vec_madd(vbs8,
v1,vzero);
86 v2 = vec_ld(16,costab);
87 vbs7 = vec_madd(vbs7,
v2,vzero);
88 v3 = vec_ld(32,costab);
89 vbs6 = vec_madd(vbs6,
v3,vzero);
90 v4 = vec_ld(48,costab);
91 vbs5 = vec_madd(vbs5,v4,vzero);
92 vbs6 = vec_perm(vbs6,vbs6,vinvert);
93 vbs5 = vec_perm(vbs5,vbs5,vinvert);
94
95
97
98 v1 = vec_perm(vbs4,vbs4,vinvert);
99 vbs9 = vec_add(vbs1,
v1);
100 v3 = vec_sub(vbs1,
v1);
101 v5 = vec_ld(0,costab);
102 v2 = vec_perm(vbs3,vbs3,vinvert);
103 vbs10 = vec_add(vbs2,
v2);
104 v4 = vec_sub(vbs2,
v2);
105 v6 = vec_ld(16,costab);
106 vbs12 = vec_madd(
v3,v5,vzero);
107 vbs11 = vec_madd(v4,v6,vzero);
108
109 v7 = vec_sub(vbs7,vbs6);
110 v8 = vec_sub(vbs8,vbs5);
111 vbs13 = vec_add(vbs5,vbs8);
112 vbs14 = vec_add(vbs6,vbs7);
113 vbs15 = vec_madd(v7,v6,vzero);
114 vbs16 = vec_madd(v8,v5,vzero);
115
116
118
119 v1 = vec_perm(vbs10,vbs10,vinvert);
120 v5 = vec_perm(vbs14,vbs14,vinvert);
121 vbs1 = vec_add(
v1,vbs9);
122 vbs5 = vec_add(v5,vbs13);
123 v2 = vec_sub(vbs9,
v1);
124 v6 = vec_sub(vbs13,v5);
125 v3 = vec_ld(0,costab);
126 vbs11 = vec_perm(vbs11,vbs11,vinvert);
127 vbs15 = vec_perm(vbs15,vbs15,vinvert);
128 vbs3 = vec_add(vbs11,vbs12);
129 vbs7 = vec_add(vbs15,vbs16);
130 v4 = vec_sub(vbs12,vbs11);
131 v7 = vec_sub(vbs16,vbs15);
132 vbs2 = vec_madd(
v2,
v3,vzero);
133 vbs4 = vec_madd(v4,
v3,vzero);
134 vbs6 = vec_madd(v6,
v3,vzero);
135 vbs8 = vec_madd(v7,
v3,vzero);
136
137 vbs2 = vec_perm(vbs2,vbs2,vinvert);
138 vbs4 = vec_perm(vbs4,vbs4,vinvert);
139 vbs6 = vec_perm(vbs6,vbs6,vinvert);
140 vbs8 = vec_perm(vbs8,vbs8,vinvert);
141
142
144
145#ifdef __APPLE__
146 vperm1 = (
vector unsigned char)(0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23);
147 vperm2 = (
vector unsigned char)(12,13,14,15,8,9,10,11,28,29,30,31,24,25,26,27);
148 vperm3 = (
vector unsigned char)(0,1,2,3,4,5,6,7,20,21,22,23,16,17,18,19);
149#else
150 vperm1 = (
vector unsigned char){0,1,2,3,4,5,6,7,16,17,18,19,20,21,22,23};
151 vperm2 = (
vector unsigned char){12,13,14,15,8,9,10,11,28,29,30,31,24,25,26,27};
152 vperm3 = (
vector unsigned char){0,1,2,3,4,5,6,7,20,21,22,23,16,17,18,19};
153#endif
154 vperm4 = vec_add(vperm3,vec_splat_u8(8));
155
156 v1 = vec_ld(0,costab);
157 v2 = vec_splat(
v1,0);
158 v3 = vec_splat(
v1,1);
160
161 v2 = vec_perm(vbs1,vbs3,vperm1);
162 v3 = vec_perm(vbs2,vbs4,vperm1);
163 v4 = vec_perm(vbs1,vbs3,vperm2);
164 v5 = vec_perm(vbs2,vbs4,vperm2);
169 v4 = vec_madd(v6,
v1,vzero);
170 v5 = vec_nmsub(v7,
v1,vzero);
171 vbs9 = vec_perm(
v2,v4,vperm3);
172 vbs11 = vec_perm(
v2,v4,vperm4);
173 vbs10 = vec_perm(
v3,v5,vperm3);
174 vbs12 = vec_perm(
v3,v5,vperm4);
175
176 v2 = vec_perm(vbs5,vbs7,vperm1);
177 v3 = vec_perm(vbs6,vbs8,vperm1);
178 v4 = vec_perm(vbs5,vbs7,vperm2);
179 v5 = vec_perm(vbs6,vbs8,vperm2);
184 v4 = vec_madd(v6,
v1,vzero);
185 v5 = vec_nmsub(v7,
v1,vzero);
186 vbs13 = vec_perm(
v2,v4,vperm3);
187 vbs15 = vec_perm(
v2,v4,vperm4);
188 vbs14 = vec_perm(
v3,v5,vperm3);
189 vbs16 = vec_perm(
v3,v5,vperm4);
190
191
193
194 v1 = vec_lde(0,costab);
195#ifdef __APPLE__
197#else
199#endif
200 v3 = vec_splat(
v1,0);
201 v1 = vec_madd(
v2,
v3,vzero);
202
203 v2 = vec_mergeh(vbs9,vbs10);
204 v3 = vec_mergel(vbs9,vbs10);
205 v4 = vec_mergeh(vbs11,vbs12);
206 v5 = vec_mergel(vbs11,vbs12);
207 v6 = vec_mergeh(
v2,
v3);
208 v7 = vec_mergel(
v2,
v3);
209 v2 = vec_mergeh(v4,v5);
210 v3 = vec_mergel(v4,v5);
211 v4 = vec_sub(v6,v7);
213 v6 = vec_add(v6,v7);
215 v2 = vec_madd(v4,
v1,vzero);
216 v3 = vec_madd(v5,
v1,vzero);
217 vbs1 = vec_mergeh(v6,
v2);
218 vbs2 = vec_mergel(v6,
v2);
219 vbs3 = vec_mergeh(v7,
v3);
220 vbs4 = vec_mergel(v7,
v3);
221
222 v2 = vec_mergeh(vbs13,vbs14);
223 v3 = vec_mergel(vbs13,vbs14);
224 v4 = vec_mergeh(vbs15,vbs16);
225 v5 = vec_mergel(vbs15,vbs16);
226 v6 = vec_mergeh(
v2,
v3);
227 v7 = vec_mergel(
v2,
v3);
228 v2 = vec_mergeh(v4,v5);
229 v3 = vec_mergel(v4,v5);
230 v4 = vec_sub(v6,v7);
232 v6 = vec_add(v6,v7);
234 v2 = vec_madd(v4,
v1,vzero);
235 v3 = vec_madd(v5,
v1,vzero);
236 vbs5 = vec_mergeh(v6,
v2);
237 vbs6 = vec_mergel(v6,
v2);
238 vbs7 = vec_mergeh(v7,
v3);
239 vbs8 = vec_mergel(v7,
v3);
240
242 vec_st(vbs2,16,
bufs);
243 vec_st(vbs3,32,
bufs);
244 vec_st(vbs4,48,
bufs);
245 vec_st(vbs5,64,
bufs);
246 vec_st(vbs6,80,
bufs);
247 vec_st(vbs7,96,
bufs);
248 vec_st(vbs8,112,
bufs);
249 }
250
251 {
254
257
259 {
263 }
264
266 {
274 }
275 }
276
277
278 out0[0x10*16] =
bufs[0];
279 out0[0x10*15] =
bufs[16+0] +
bufs[16+8];
280 out0[0x10*14] =
bufs[8];
281 out0[0x10*13] =
bufs[16+8] +
bufs[16+4];
282 out0[0x10*12] =
bufs[4];
283 out0[0x10*11] =
bufs[16+4] +
bufs[16+12];
284 out0[0x10*10] =
bufs[12];
285 out0[0x10* 9] =
bufs[16+12] +
bufs[16+2];
286 out0[0x10* 8] =
bufs[2];
287 out0[0x10* 7] =
bufs[16+2] +
bufs[16+10];
288 out0[0x10* 6] =
bufs[10];
289 out0[0x10* 5] =
bufs[16+10] +
bufs[16+6];
290 out0[0x10* 4] =
bufs[6];
291 out0[0x10* 3] =
bufs[16+6] +
bufs[16+14];
292 out0[0x10* 2] =
bufs[14];
293 out0[0x10* 1] =
bufs[16+14] +
bufs[16+1];
294 out0[0x10* 0] =
bufs[1];
295
296 out1[0x10* 0] =
bufs[1];
297 out1[0x10* 1] =
bufs[16+1] +
bufs[16+9];
298 out1[0x10* 2] =
bufs[9];
299 out1[0x10* 3] =
bufs[16+9] +
bufs[16+5];
300 out1[0x10* 4] =
bufs[5];
301 out1[0x10* 5] =
bufs[16+5] +
bufs[16+13];
302 out1[0x10* 6] =
bufs[13];
303 out1[0x10* 7] =
bufs[16+13] +
bufs[16+3];
304 out1[0x10* 8] =
bufs[3];
305 out1[0x10* 9] =
bufs[16+3] +
bufs[16+11];
306 out1[0x10*10] =
bufs[11];
307 out1[0x10*11] =
bufs[16+11] +
bufs[16+7];
308 out1[0x10*12] =
bufs[7];
309 out1[0x10*13] =
bufs[16+7] +
bufs[16+15];
310 out1[0x10*14] =
bufs[15];
311 out1[0x10*15] =
bufs[16+15];
312
313}
GLfloat GLfloat GLfloat GLfloat v3
GLfloat GLfloat GLfloat v2
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
static CRYPT_DATA_BLOB b1[]
static float(__cdecl *square_half_float)(float x