00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "util/u_memory.h"
00030 #include "pipe/p_shader_tokens.h"
00031 #include "tgsi/tgsi_parse.h"
00032 #include "tgsi/tgsi_util.h"
00033 #include "tgsi/tgsi_exec.h"
00034 #include "draw_vs.h"
00035 #include "draw_vs_aos.h"
00036 #include "draw_vertex.h"
00037
00038 #include "rtasm/rtasm_x86sse.h"
00039
00040 #ifdef PIPE_ARCH_X86
00041
00042
00043
00044
00045
00046 static void emit_load_R32G32B32A32( struct aos_compilation *cp,
00047 struct x86_reg data,
00048 struct x86_reg src_ptr )
00049 {
00050 sse_movups(cp->func, data, src_ptr);
00051 }
00052
00053 static void emit_load_R32G32B32( struct aos_compilation *cp,
00054 struct x86_reg data,
00055 struct x86_reg src_ptr )
00056 {
00057 #if 1
00058 sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
00059
00060 sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
00061
00062 sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
00063
00064 sse_movlps(cp->func, data, src_ptr);
00065
00066 #else
00067 sse_movups(cp->func, data, src_ptr);
00068
00069 sse2_pshufd(cp->func, data, data, SHUF(W,X,Y,Z) );
00070
00071 sse_movss(cp->func, data, aos_get_internal_xmm( cp, IMM_ONES ) );
00072
00073 sse2_pshufd(cp->func, data, data, SHUF(Y,Z,W,X) );
00074
00075 #endif
00076 }
00077
00078 static void emit_load_R32G32( struct aos_compilation *cp,
00079 struct x86_reg data,
00080 struct x86_reg src_ptr )
00081 {
00082 sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
00083 sse_movlps(cp->func, data, src_ptr);
00084 }
00085
00086
00087 static void emit_load_R32( struct aos_compilation *cp,
00088 struct x86_reg data,
00089 struct x86_reg src_ptr )
00090 {
00091 sse_movss(cp->func, data, src_ptr);
00092 sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
00093 }
00094
00095
00096 static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
00097 struct x86_reg data,
00098 struct x86_reg src_ptr )
00099 {
00100 sse_movss(cp->func, data, src_ptr);
00101 sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
00102 sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
00103 sse2_cvtdq2ps(cp->func, data, data);
00104 sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
00105 }
00106
00107
00108
00109
00110
00111 static void emit_swizzle( struct aos_compilation *cp,
00112 struct x86_reg dest,
00113 struct x86_reg src,
00114 ubyte shuffle )
00115 {
00116 sse_shufps(cp->func, dest, src, shuffle);
00117 }
00118
00119
00120
00121 static boolean get_buffer_ptr( struct aos_compilation *cp,
00122 boolean linear,
00123 unsigned buf_idx,
00124 struct x86_reg elt,
00125 struct x86_reg ptr)
00126 {
00127 struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
00128 buf_idx * sizeof(struct aos_buffer));
00129
00130 struct x86_reg buf_stride = x86_make_disp(buf,
00131 Offset(struct aos_buffer, stride));
00132 if (linear) {
00133 struct x86_reg buf_ptr = x86_make_disp(buf,
00134 Offset(struct aos_buffer, ptr));
00135
00136
00137
00138
00139 x86_mov(cp->func, ptr, buf_ptr);
00140 x86_mov(cp->func, elt, buf_stride);
00141 x86_add(cp->func, elt, ptr);
00142 if (buf_idx == 0) sse_prefetchnta(cp->func, x86_make_disp(elt, 192));
00143 x86_mov(cp->func, buf_ptr, elt);
00144 }
00145 else {
00146 struct x86_reg buf_base_ptr = x86_make_disp(buf,
00147 Offset(struct aos_buffer, base_ptr));
00148
00149
00150
00151
00152 x86_mov(cp->func, ptr, buf_stride);
00153 x86_imul(cp->func, ptr, elt);
00154 x86_add(cp->func, ptr, buf_base_ptr);
00155 }
00156
00157 cp->insn_counter++;
00158
00159 return TRUE;
00160 }
00161
00162
00163 static boolean load_input( struct aos_compilation *cp,
00164 unsigned idx,
00165 struct x86_reg bufptr )
00166 {
00167 unsigned format = cp->vaos->base.key.element[idx].in.format;
00168 unsigned offset = cp->vaos->base.key.element[idx].in.offset;
00169 struct x86_reg dataXMM = aos_get_xmm_reg(cp);
00170
00171
00172
00173 struct x86_reg src = x86_make_disp(bufptr, offset);
00174
00175 aos_adopt_xmm_reg( cp,
00176 dataXMM,
00177 TGSI_FILE_INPUT,
00178 idx,
00179 TRUE );
00180
00181 switch (format) {
00182 case PIPE_FORMAT_R32_FLOAT:
00183 emit_load_R32(cp, dataXMM, src);
00184 break;
00185 case PIPE_FORMAT_R32G32_FLOAT:
00186 emit_load_R32G32(cp, dataXMM, src);
00187 break;
00188 case PIPE_FORMAT_R32G32B32_FLOAT:
00189 emit_load_R32G32B32(cp, dataXMM, src);
00190 break;
00191 case PIPE_FORMAT_R32G32B32A32_FLOAT:
00192 emit_load_R32G32B32A32(cp, dataXMM, src);
00193 break;
00194 case PIPE_FORMAT_B8G8R8A8_UNORM:
00195 emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
00196 emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
00197 break;
00198 case PIPE_FORMAT_R8G8B8A8_UNORM:
00199 emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
00200 break;
00201 default:
00202 ERROR(cp, "unhandled input format");
00203 return FALSE;
00204 }
00205
00206 return TRUE;
00207 }
00208
00209 static boolean load_inputs( struct aos_compilation *cp,
00210 unsigned buffer,
00211 struct x86_reg ptr )
00212 {
00213 unsigned i;
00214
00215 for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) {
00216 if (cp->vaos->base.key.element[i].in.buffer == buffer) {
00217
00218 if (!load_input( cp, i, ptr ))
00219 return FALSE;
00220
00221 cp->insn_counter++;
00222 }
00223 }
00224
00225 return TRUE;
00226 }
00227
00228 boolean aos_init_inputs( struct aos_compilation *cp, boolean linear )
00229 {
00230 unsigned i;
00231 for (i = 0; i < cp->vaos->nr_vb; i++) {
00232 struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
00233 i * sizeof(struct aos_buffer));
00234
00235 struct x86_reg buf_base_ptr = x86_make_disp(buf,
00236 Offset(struct aos_buffer, base_ptr));
00237
00238 if (cp->vaos->base.key.const_vbuffers & (1<<i)) {
00239 struct x86_reg ptr = cp->tmp_EAX;
00240
00241 x86_mov(cp->func, ptr, buf_base_ptr);
00242
00243
00244
00245 load_inputs( cp, i, x86_deref(ptr) );
00246
00247
00248
00249 aos_spill_all( cp );
00250
00251 }
00252 else if (linear) {
00253
00254 struct x86_reg elt = cp->idx_EBX;
00255 struct x86_reg ptr = cp->tmp_EAX;
00256
00257 struct x86_reg buf_stride = x86_make_disp(buf,
00258 Offset(struct aos_buffer, stride));
00259
00260 struct x86_reg buf_ptr = x86_make_disp(buf,
00261 Offset(struct aos_buffer, ptr));
00262
00263
00264
00265
00266 x86_mov(cp->func, ptr, buf_stride);
00267 x86_imul(cp->func, ptr, elt);
00268 x86_add(cp->func, ptr, buf_base_ptr);
00269
00270
00271
00272
00273
00274 if (cp->vaos->nr_vb == 1)
00275 x86_mov( cp->func, elt, ptr );
00276 else
00277 x86_mov( cp->func, buf_ptr, ptr );
00278
00279 cp->insn_counter++;
00280 }
00281 }
00282
00283 return TRUE;
00284 }
00285
00286 boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
00287 {
00288 unsigned j;
00289
00290 for (j = 0; j < cp->vaos->nr_vb; j++) {
00291 if (cp->vaos->base.key.const_vbuffers & (1<<j)) {
00292
00293 }
00294 else if (linear && cp->vaos->nr_vb == 1) {
00295 load_inputs( cp, 0, cp->idx_EBX );
00296 }
00297 else {
00298 struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX);
00299 struct x86_reg ptr = cp->tmp_EAX;
00300
00301 if (!get_buffer_ptr( cp, linear, j, elt, ptr ))
00302 return FALSE;
00303
00304 if (!load_inputs( cp, j, ptr ))
00305 return FALSE;
00306 }
00307 }
00308
00309 return TRUE;
00310 }
00311
00312 boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear )
00313 {
00314 if (linear && cp->vaos->nr_vb == 1) {
00315 struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
00316 (0 * sizeof(struct aos_buffer) +
00317 Offset(struct aos_buffer, stride)));
00318
00319 x86_add(cp->func, cp->idx_EBX, stride);
00320 sse_prefetchnta(cp->func, x86_make_disp(cp->idx_EBX, 192));
00321 }
00322 else if (linear) {
00323
00324 }
00325 else {
00326 x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4));
00327 }
00328
00329 return TRUE;
00330 }
00331
00332
00333
00334
00335
00336
00337 static void emit_store_R32G32B32A32( struct aos_compilation *cp,
00338 struct x86_reg dst_ptr,
00339 struct x86_reg dataXMM )
00340 {
00341 sse_movups(cp->func, dst_ptr, dataXMM);
00342 }
00343
00344 static void emit_store_R32G32B32( struct aos_compilation *cp,
00345 struct x86_reg dst_ptr,
00346 struct x86_reg dataXMM )
00347 {
00348 sse_movlps(cp->func, dst_ptr, dataXMM);
00349 sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) );
00350 sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM);
00351 }
00352
00353 static void emit_store_R32G32( struct aos_compilation *cp,
00354 struct x86_reg dst_ptr,
00355 struct x86_reg dataXMM )
00356 {
00357 sse_movlps(cp->func, dst_ptr, dataXMM);
00358 }
00359
00360 static void emit_store_R32( struct aos_compilation *cp,
00361 struct x86_reg dst_ptr,
00362 struct x86_reg dataXMM )
00363 {
00364 sse_movss(cp->func, dst_ptr, dataXMM);
00365 }
00366
00367
00368
00369 static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
00370 struct x86_reg dst_ptr,
00371 struct x86_reg dataXMM )
00372 {
00373 sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255));
00374 sse2_cvtps2dq(cp->func, dataXMM, dataXMM);
00375 sse2_packssdw(cp->func, dataXMM, dataXMM);
00376 sse2_packuswb(cp->func, dataXMM, dataXMM);
00377 sse_movss(cp->func, dst_ptr, dataXMM);
00378 }
00379
00380
00381
00382
00383
00384 static boolean emit_output( struct aos_compilation *cp,
00385 struct x86_reg ptr,
00386 struct x86_reg dataXMM,
00387 unsigned format )
00388 {
00389 switch (format) {
00390 case EMIT_1F:
00391 case EMIT_1F_PSIZE:
00392 emit_store_R32(cp, ptr, dataXMM);
00393 break;
00394 case EMIT_2F:
00395 emit_store_R32G32(cp, ptr, dataXMM);
00396 break;
00397 case EMIT_3F:
00398 emit_store_R32G32B32(cp, ptr, dataXMM);
00399 break;
00400 case EMIT_4F:
00401 emit_store_R32G32B32A32(cp, ptr, dataXMM);
00402 break;
00403 case EMIT_4UB:
00404 if (1) {
00405 emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
00406 emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
00407 }
00408 else {
00409 emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
00410 }
00411 break;
00412 default:
00413 ERROR(cp, "unhandled output format");
00414 return FALSE;
00415 }
00416
00417 return TRUE;
00418 }
00419
00420
00421
00422 boolean aos_emit_outputs( struct aos_compilation *cp )
00423 {
00424 unsigned i;
00425
00426 for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) {
00427 unsigned format = cp->vaos->base.key.element[i].out.format;
00428 unsigned offset = cp->vaos->base.key.element[i].out.offset;
00429 unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output;
00430
00431 struct x86_reg data;
00432
00433 if (format == EMIT_1F_PSIZE) {
00434 data = aos_get_internal_xmm( cp, IMM_PSIZE );
00435 }
00436 else {
00437 data = aos_get_shader_reg( cp,
00438 TGSI_FILE_OUTPUT,
00439 vs_output );
00440 }
00441
00442 if (data.file != file_XMM) {
00443 struct x86_reg tmp = aos_get_xmm_reg( cp );
00444 sse_movaps(cp->func, tmp, data);
00445 data = tmp;
00446 }
00447
00448 if (!emit_output( cp,
00449 x86_make_disp( cp->outbuf_ECX, offset ),
00450 data,
00451 format ))
00452 return FALSE;
00453
00454 aos_release_xmm_reg( cp, data.idx );
00455
00456 cp->insn_counter++;
00457 }
00458
00459 return TRUE;
00460 }
00461
00462 #endif