00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028 #include "pipe/p_debug.h"
00029 #include "pipe/p_shader_tokens.h"
00030 #include "util/u_math.h"
00031 #include "tgsi/tgsi_parse.h"
00032 #include "tgsi/tgsi_util.h"
00033 #include "tgsi_exec.h"
00034 #include "tgsi_sse2.h"
00035
00036 #include "rtasm/rtasm_x86sse.h"
00037
00038 #ifdef PIPE_ARCH_X86
00039
00040
00041
00042
00043
00044 #define HIGH_PRECISION 1
00045
00046 #define FAST_MATH 1
00047
00048
00049 #define FOR_EACH_CHANNEL( CHAN )\
00050 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)
00051
00052 #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
00053 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN)))
00054
00055 #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\
00056 if (IS_DST0_CHANNEL_ENABLED( INST, CHAN ))
00057
00058 #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\
00059 FOR_EACH_CHANNEL( CHAN )\
00060 IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )
00061
00062 #define CHAN_X 0
00063 #define CHAN_Y 1
00064 #define CHAN_Z 2
00065 #define CHAN_W 3
00066
00067 #define TEMP_ONE_I TGSI_EXEC_TEMP_ONE_I
00068 #define TEMP_ONE_C TGSI_EXEC_TEMP_ONE_C
00069
00070 #define TEMP_R0 TGSI_EXEC_TEMP_R0
00071 #define TEMP_ADDR TGSI_EXEC_TEMP_ADDR
00072 #define TEMP_EXEC_MASK_I TGSI_EXEC_MASK_I
00073 #define TEMP_EXEC_MASK_C TGSI_EXEC_MASK_C
00074
00075
00080 static struct x86_reg
00081 make_xmm(
00082 unsigned xmm )
00083 {
00084 return x86_make_reg(
00085 file_XMM,
00086 (enum x86_reg_name) xmm );
00087 }
00088
00093 static struct x86_reg
00094 get_const_base( void )
00095 {
00096 return x86_make_reg(
00097 file_REG32,
00098 reg_CX );
00099 }
00100
00101 static struct x86_reg
00102 get_input_base( void )
00103 {
00104 return x86_make_reg(
00105 file_REG32,
00106 reg_AX );
00107 }
00108
00109 static struct x86_reg
00110 get_output_base( void )
00111 {
00112 return x86_make_reg(
00113 file_REG32,
00114 reg_DX );
00115 }
00116
00117 static struct x86_reg
00118 get_temp_base( void )
00119 {
00120 return x86_make_reg(
00121 file_REG32,
00122 reg_BX );
00123 }
00124
00125 static struct x86_reg
00126 get_coef_base( void )
00127 {
00128 return get_output_base();
00129 }
00130
00131 static struct x86_reg
00132 get_immediate_base( void )
00133 {
00134 return x86_make_reg(
00135 file_REG32,
00136 reg_DI );
00137 }
00138
00139
00145 static struct x86_reg
00146 get_immediate(
00147 unsigned vec,
00148 unsigned chan )
00149 {
00150 return x86_make_disp(
00151 get_immediate_base(),
00152 (vec * 4 + chan) * 4 );
00153 }
00154
00155 static struct x86_reg
00156 get_const(
00157 unsigned vec,
00158 unsigned chan )
00159 {
00160 return x86_make_disp(
00161 get_const_base(),
00162 (vec * 4 + chan) * 4 );
00163 }
00164
00165 static struct x86_reg
00166 get_input(
00167 unsigned vec,
00168 unsigned chan )
00169 {
00170 return x86_make_disp(
00171 get_input_base(),
00172 (vec * 4 + chan) * 16 );
00173 }
00174
00175 static struct x86_reg
00176 get_output(
00177 unsigned vec,
00178 unsigned chan )
00179 {
00180 return x86_make_disp(
00181 get_output_base(),
00182 (vec * 4 + chan) * 16 );
00183 }
00184
00185 static struct x86_reg
00186 get_temp(
00187 unsigned vec,
00188 unsigned chan )
00189 {
00190 return x86_make_disp(
00191 get_temp_base(),
00192 (vec * 4 + chan) * 16 );
00193 }
00194
00195 static struct x86_reg
00196 get_coef(
00197 unsigned vec,
00198 unsigned chan,
00199 unsigned member )
00200 {
00201 return x86_make_disp(
00202 get_coef_base(),
00203 ((vec * 3 + member) * 4 + chan) * 4 );
00204 }
00205
00206
00207 static void
00208 emit_ret(
00209 struct x86_function *func )
00210 {
00211 x86_ret( func );
00212 }
00213
00214
00225 static void
00226 emit_const(
00227 struct x86_function *func,
00228 uint xmm,
00229 int vec,
00230 uint chan,
00231 uint indirect,
00232 uint indirectFile,
00233 int indirectIndex )
00234 {
00235 if (indirect) {
00236
00237
00238
00239 struct x86_reg r0 = get_input_base();
00240 struct x86_reg r1 = get_output_base();
00241 uint i;
00242
00243 assert( indirectFile == TGSI_FILE_ADDRESS );
00244 assert( indirectIndex == 0 );
00245
00246 x86_push( func, r0 );
00247 x86_push( func, r1 );
00248
00249
00250
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263
00264
00265 for (i = 0; i < QUAD_SIZE; i++) {
00266
00267 x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) );
00268
00269 x86_mov( func, r0, x86_make_disp( get_temp( TEMP_EXEC_MASK_I, TEMP_EXEC_MASK_C ), i * 4 ) );
00270
00271 x86_and( func, r1, r0 );
00272
00273 x86_lea( func, r0, get_const( vec, chan ) );
00274
00275
00276
00277 x86_add( func, r1, r1 );
00278 x86_add( func, r1, r1 );
00279 x86_add( func, r1, r1 );
00280 x86_add( func, r1, r1 );
00281
00282 x86_add( func, r0, r1 );
00283 x86_mov( func, r1, x86_deref( r0 ) );
00284 x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 );
00285 }
00286
00287 x86_pop( func, r1 );
00288 x86_pop( func, r0 );
00289
00290 sse_movaps(
00291 func,
00292 make_xmm( xmm ),
00293 get_temp( TEMP_R0, CHAN_X ) );
00294 }
00295 else {
00296
00297 assert( vec >= 0 );
00298
00299 sse_movss(
00300 func,
00301 make_xmm( xmm ),
00302 get_const( vec, chan ) );
00303 sse_shufps(
00304 func,
00305 make_xmm( xmm ),
00306 make_xmm( xmm ),
00307 SHUF( 0, 0, 0, 0 ) );
00308 }
00309 }
00310
00311 static void
00312 emit_immediate(
00313 struct x86_function *func,
00314 unsigned xmm,
00315 unsigned vec,
00316 unsigned chan )
00317 {
00318 sse_movss(
00319 func,
00320 make_xmm( xmm ),
00321 get_immediate( vec, chan ) );
00322 sse_shufps(
00323 func,
00324 make_xmm( xmm ),
00325 make_xmm( xmm ),
00326 SHUF( 0, 0, 0, 0 ) );
00327 }
00328
00329
00336 static void
00337 emit_inputf(
00338 struct x86_function *func,
00339 unsigned xmm,
00340 unsigned vec,
00341 unsigned chan )
00342 {
00343 sse_movups(
00344 func,
00345 make_xmm( xmm ),
00346 get_input( vec, chan ) );
00347 }
00348
00355 static void
00356 emit_output(
00357 struct x86_function *func,
00358 unsigned xmm,
00359 unsigned vec,
00360 unsigned chan )
00361 {
00362 sse_movups(
00363 func,
00364 get_output( vec, chan ),
00365 make_xmm( xmm ) );
00366 }
00367
00374 static void
00375 emit_tempf(
00376 struct x86_function *func,
00377 unsigned xmm,
00378 unsigned vec,
00379 unsigned chan )
00380 {
00381 sse_movaps(
00382 func,
00383 make_xmm( xmm ),
00384 get_temp( vec, chan ) );
00385 }
00386
00394 static void
00395 emit_coef(
00396 struct x86_function *func,
00397 unsigned xmm,
00398 unsigned vec,
00399 unsigned chan,
00400 unsigned member )
00401 {
00402 sse_movss(
00403 func,
00404 make_xmm( xmm ),
00405 get_coef( vec, chan, member ) );
00406 sse_shufps(
00407 func,
00408 make_xmm( xmm ),
00409 make_xmm( xmm ),
00410 SHUF( 0, 0, 0, 0 ) );
00411 }
00412
00417 static void
00418 emit_inputs(
00419 struct x86_function *func,
00420 unsigned xmm,
00421 unsigned vec,
00422 unsigned chan )
00423 {
00424 sse_movups(
00425 func,
00426 get_input( vec, chan ),
00427 make_xmm( xmm ) );
00428 }
00429
00430 static void
00431 emit_temps(
00432 struct x86_function *func,
00433 unsigned xmm,
00434 unsigned vec,
00435 unsigned chan )
00436 {
00437 sse_movaps(
00438 func,
00439 get_temp( vec, chan ),
00440 make_xmm( xmm ) );
00441 }
00442
00443 static void
00444 emit_addrs(
00445 struct x86_function *func,
00446 unsigned xmm,
00447 unsigned vec,
00448 unsigned chan )
00449 {
00450 assert( vec == 0 );
00451
00452 emit_temps(
00453 func,
00454 xmm,
00455 vec + TGSI_EXEC_TEMP_ADDR,
00456 chan );
00457 }
00458
00463 static void
00464 emit_coef_a0(
00465 struct x86_function *func,
00466 unsigned xmm,
00467 unsigned vec,
00468 unsigned chan )
00469 {
00470 emit_coef(
00471 func,
00472 xmm,
00473 vec,
00474 chan,
00475 0 );
00476 }
00477
00478 static void
00479 emit_coef_dadx(
00480 struct x86_function *func,
00481 unsigned xmm,
00482 unsigned vec,
00483 unsigned chan )
00484 {
00485 emit_coef(
00486 func,
00487 xmm,
00488 vec,
00489 chan,
00490 1 );
00491 }
00492
00493 static void
00494 emit_coef_dady(
00495 struct x86_function *func,
00496 unsigned xmm,
00497 unsigned vec,
00498 unsigned chan )
00499 {
00500 emit_coef(
00501 func,
00502 xmm,
00503 vec,
00504 chan,
00505 2 );
00506 }
00507
00512 static void
00513 emit_push_gp(
00514 struct x86_function *func )
00515 {
00516 x86_push(
00517 func,
00518 x86_make_reg( file_REG32, reg_AX) );
00519 x86_push(
00520 func,
00521 x86_make_reg( file_REG32, reg_CX) );
00522 x86_push(
00523 func,
00524 x86_make_reg( file_REG32, reg_DX) );
00525 }
00526
00527 static void
00528 x86_pop_gp(
00529 struct x86_function *func )
00530 {
00531
00532
00533 x86_pop(
00534 func,
00535 x86_make_reg( file_REG32, reg_DX) );
00536 x86_pop(
00537 func,
00538 x86_make_reg( file_REG32, reg_CX) );
00539 x86_pop(
00540 func,
00541 x86_make_reg( file_REG32, reg_AX) );
00542 }
00543
00544 static void
00545 emit_func_call_dst(
00546 struct x86_function *func,
00547 unsigned xmm_dst,
00548 void (PIPE_CDECL *code)() )
00549 {
00550 sse_movaps(
00551 func,
00552 get_temp( TEMP_R0, 0 ),
00553 make_xmm( xmm_dst ) );
00554
00555 emit_push_gp(
00556 func );
00557
00558 {
00559 struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
00560
00561 x86_lea(
00562 func,
00563 ecx,
00564 get_temp( TEMP_R0, 0 ) );
00565
00566 x86_push( func, ecx );
00567 x86_mov_reg_imm( func, ecx, (unsigned long) code );
00568 x86_call( func, ecx );
00569 x86_pop(func, ecx );
00570 }
00571
00572
00573 x86_pop_gp(
00574 func );
00575
00576 sse_movaps(
00577 func,
00578 make_xmm( xmm_dst ),
00579 get_temp( TEMP_R0, 0 ) );
00580 }
00581
00582 static void
00583 emit_func_call_dst_src(
00584 struct x86_function *func,
00585 unsigned xmm_dst,
00586 unsigned xmm_src,
00587 void (PIPE_CDECL *code)() )
00588 {
00589 sse_movaps(
00590 func,
00591 get_temp( TEMP_R0, 1 ),
00592 make_xmm( xmm_src ) );
00593
00594 emit_func_call_dst(
00595 func,
00596 xmm_dst,
00597 code );
00598 }
00599
00604 static void
00605 emit_abs(
00606 struct x86_function *func,
00607 unsigned xmm )
00608 {
00609 sse_andps(
00610 func,
00611 make_xmm( xmm ),
00612 get_temp(
00613 TGSI_EXEC_TEMP_7FFFFFFF_I,
00614 TGSI_EXEC_TEMP_7FFFFFFF_C ) );
00615 }
00616
00617 static void
00618 emit_add(
00619 struct x86_function *func,
00620 unsigned xmm_dst,
00621 unsigned xmm_src )
00622 {
00623 sse_addps(
00624 func,
00625 make_xmm( xmm_dst ),
00626 make_xmm( xmm_src ) );
00627 }
00628
00629 static void PIPE_CDECL
00630 cos4f(
00631 float *store )
00632 {
00633 store[0] = cosf( store[0] );
00634 store[1] = cosf( store[1] );
00635 store[2] = cosf( store[2] );
00636 store[3] = cosf( store[3] );
00637 }
00638
00639 static void
00640 emit_cos(
00641 struct x86_function *func,
00642 unsigned xmm_dst )
00643 {
00644 emit_func_call_dst(
00645 func,
00646 xmm_dst,
00647 cos4f );
00648 }
00649
00650 static void PIPE_CDECL
00651 ex24f(
00652 float *store )
00653 {
00654 #if FAST_MATH
00655 store[0] = util_fast_exp2( store[0] );
00656 store[1] = util_fast_exp2( store[1] );
00657 store[2] = util_fast_exp2( store[2] );
00658 store[3] = util_fast_exp2( store[3] );
00659 #else
00660 store[0] = powf( 2.0f, store[0] );
00661 store[1] = powf( 2.0f, store[1] );
00662 store[2] = powf( 2.0f, store[2] );
00663 store[3] = powf( 2.0f, store[3] );
00664 #endif
00665 }
00666
00667 static void
00668 emit_ex2(
00669 struct x86_function *func,
00670 unsigned xmm_dst )
00671 {
00672 emit_func_call_dst(
00673 func,
00674 xmm_dst,
00675 ex24f );
00676 }
00677
00678 static void
00679 emit_f2it(
00680 struct x86_function *func,
00681 unsigned xmm )
00682 {
00683 sse2_cvttps2dq(
00684 func,
00685 make_xmm( xmm ),
00686 make_xmm( xmm ) );
00687 }
00688
00689 static void
00690 emit_i2f(
00691 struct x86_function *func,
00692 unsigned xmm )
00693 {
00694 sse2_cvtdq2ps(
00695 func,
00696 make_xmm( xmm ),
00697 make_xmm( xmm ) );
00698 }
00699
00700 static void PIPE_CDECL
00701 flr4f(
00702 float *store )
00703 {
00704 store[0] = floorf( store[0] );
00705 store[1] = floorf( store[1] );
00706 store[2] = floorf( store[2] );
00707 store[3] = floorf( store[3] );
00708 }
00709
00710 static void
00711 emit_flr(
00712 struct x86_function *func,
00713 unsigned xmm_dst )
00714 {
00715 emit_func_call_dst(
00716 func,
00717 xmm_dst,
00718 flr4f );
00719 }
00720
00721 static void PIPE_CDECL
00722 frc4f(
00723 float *store )
00724 {
00725 store[0] -= floorf( store[0] );
00726 store[1] -= floorf( store[1] );
00727 store[2] -= floorf( store[2] );
00728 store[3] -= floorf( store[3] );
00729 }
00730
00731 static void
00732 emit_frc(
00733 struct x86_function *func,
00734 unsigned xmm_dst )
00735 {
00736 emit_func_call_dst(
00737 func,
00738 xmm_dst,
00739 frc4f );
00740 }
00741
00742 static void PIPE_CDECL
00743 lg24f(
00744 float *store )
00745 {
00746 store[0] = util_fast_log2( store[0] );
00747 store[1] = util_fast_log2( store[1] );
00748 store[2] = util_fast_log2( store[2] );
00749 store[3] = util_fast_log2( store[3] );
00750 }
00751
00752 static void
00753 emit_lg2(
00754 struct x86_function *func,
00755 unsigned xmm_dst )
00756 {
00757 emit_func_call_dst(
00758 func,
00759 xmm_dst,
00760 lg24f );
00761 }
00762
00763 static void
00764 emit_MOV(
00765 struct x86_function *func,
00766 unsigned xmm_dst,
00767 unsigned xmm_src )
00768 {
00769 sse_movups(
00770 func,
00771 make_xmm( xmm_dst ),
00772 make_xmm( xmm_src ) );
00773 }
00774
00775 static void
00776 emit_mul (struct x86_function *func,
00777 unsigned xmm_dst,
00778 unsigned xmm_src)
00779 {
00780 sse_mulps(
00781 func,
00782 make_xmm( xmm_dst ),
00783 make_xmm( xmm_src ) );
00784 }
00785
00786 static void
00787 emit_neg(
00788 struct x86_function *func,
00789 unsigned xmm )
00790 {
00791 sse_xorps(
00792 func,
00793 make_xmm( xmm ),
00794 get_temp(
00795 TGSI_EXEC_TEMP_80000000_I,
00796 TGSI_EXEC_TEMP_80000000_C ) );
00797 }
00798
00799 static void PIPE_CDECL
00800 pow4f(
00801 float *store )
00802 {
00803 #if FAST_MATH
00804 store[0] = util_fast_pow( store[0], store[4] );
00805 store[1] = util_fast_pow( store[1], store[5] );
00806 store[2] = util_fast_pow( store[2], store[6] );
00807 store[3] = util_fast_pow( store[3], store[7] );
00808 #else
00809 store[0] = powf( store[0], store[4] );
00810 store[1] = powf( store[1], store[5] );
00811 store[2] = powf( store[2], store[6] );
00812 store[3] = powf( store[3], store[7] );
00813 #endif
00814 }
00815
00816 static void
00817 emit_pow(
00818 struct x86_function *func,
00819 unsigned xmm_dst,
00820 unsigned xmm_src )
00821 {
00822 emit_func_call_dst_src(
00823 func,
00824 xmm_dst,
00825 xmm_src,
00826 pow4f );
00827 }
00828
00829 static void
00830 emit_rcp (
00831 struct x86_function *func,
00832 unsigned xmm_dst,
00833 unsigned xmm_src )
00834 {
00835
00836
00837
00838
00839 sse2_rcpps(
00840 func,
00841 make_xmm( xmm_dst ),
00842 make_xmm( xmm_src ) );
00843 }
00844
00845 static void PIPE_CDECL
00846 rnd4f(
00847 float *store )
00848 {
00849 store[0] = floorf( store[0] + 0.5f );
00850 store[1] = floorf( store[1] + 0.5f );
00851 store[2] = floorf( store[2] + 0.5f );
00852 store[3] = floorf( store[3] + 0.5f );
00853 }
00854
00855 static void
00856 emit_rnd(
00857 struct x86_function *func,
00858 unsigned xmm_dst )
00859 {
00860 emit_func_call_dst(
00861 func,
00862 xmm_dst,
00863 rnd4f );
00864 }
00865
00866 static void
00867 emit_rsqrt(
00868 struct x86_function *func,
00869 unsigned xmm_dst,
00870 unsigned xmm_src )
00871 {
00872 #if HIGH_PRECISION
00873
00874
00875
00876
00877
00878
00879
00880
00881
00882 {
00883 struct x86_reg dst = make_xmm( xmm_dst );
00884 struct x86_reg src = make_xmm( xmm_src );
00885 struct x86_reg tmp0 = make_xmm( 2 );
00886 struct x86_reg tmp1 = make_xmm( 3 );
00887
00888 assert( xmm_dst != xmm_src );
00889 assert( xmm_dst != 2 && xmm_dst != 3 );
00890 assert( xmm_src != 2 && xmm_src != 3 );
00891
00892 sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) );
00893 sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) );
00894 sse_rsqrtps( func, tmp1, src );
00895 sse_mulps( func, src, tmp1 );
00896 sse_mulps( func, dst, tmp1 );
00897 sse_mulps( func, src, tmp1 );
00898 sse_subps( func, tmp0, src );
00899 sse_mulps( func, dst, tmp0 );
00900 }
00901 #else
00902
00903
00904
00905 sse_rsqrtps(
00906 func,
00907 make_xmm( xmm_dst ),
00908 make_xmm( xmm_src ) );
00909 #endif
00910 }
00911
00912 static void
00913 emit_setsign(
00914 struct x86_function *func,
00915 unsigned xmm )
00916 {
00917 sse_orps(
00918 func,
00919 make_xmm( xmm ),
00920 get_temp(
00921 TGSI_EXEC_TEMP_80000000_I,
00922 TGSI_EXEC_TEMP_80000000_C ) );
00923 }
00924
00925 static void PIPE_CDECL
00926 sgn4f(
00927 float *store )
00928 {
00929 store[0] = store[0] < 0.0f ? -1.0f : store[0] > 0.0f ? 1.0f : 0.0f;
00930 store[1] = store[1] < 0.0f ? -1.0f : store[1] > 0.0f ? 1.0f : 0.0f;
00931 store[2] = store[2] < 0.0f ? -1.0f : store[2] > 0.0f ? 1.0f : 0.0f;
00932 store[3] = store[3] < 0.0f ? -1.0f : store[3] > 0.0f ? 1.0f : 0.0f;
00933 }
00934
00935 static void
00936 emit_sgn(
00937 struct x86_function *func,
00938 unsigned xmm_dst )
00939 {
00940 emit_func_call_dst(
00941 func,
00942 xmm_dst,
00943 sgn4f );
00944 }
00945
00946 static void PIPE_CDECL
00947 sin4f(
00948 float *store )
00949 {
00950 store[0] = sinf( store[0] );
00951 store[1] = sinf( store[1] );
00952 store[2] = sinf( store[2] );
00953 store[3] = sinf( store[3] );
00954 }
00955
00956 static void
00957 emit_sin (struct x86_function *func,
00958 unsigned xmm_dst)
00959 {
00960 emit_func_call_dst(
00961 func,
00962 xmm_dst,
00963 sin4f );
00964 }
00965
00966 static void
00967 emit_sub(
00968 struct x86_function *func,
00969 unsigned xmm_dst,
00970 unsigned xmm_src )
00971 {
00972 sse_subps(
00973 func,
00974 make_xmm( xmm_dst ),
00975 make_xmm( xmm_src ) );
00976 }
00977
00982 static void
00983 emit_fetch(
00984 struct x86_function *func,
00985 unsigned xmm,
00986 const struct tgsi_full_src_register *reg,
00987 const unsigned chan_index )
00988 {
00989 unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index );
00990
00991 switch (swizzle) {
00992 case TGSI_EXTSWIZZLE_X:
00993 case TGSI_EXTSWIZZLE_Y:
00994 case TGSI_EXTSWIZZLE_Z:
00995 case TGSI_EXTSWIZZLE_W:
00996 switch (reg->SrcRegister.File) {
00997 case TGSI_FILE_CONSTANT:
00998 emit_const(
00999 func,
01000 xmm,
01001 reg->SrcRegister.Index,
01002 swizzle,
01003 reg->SrcRegister.Indirect,
01004 reg->SrcRegisterInd.File,
01005 reg->SrcRegisterInd.Index );
01006 break;
01007
01008 case TGSI_FILE_IMMEDIATE:
01009 emit_immediate(
01010 func,
01011 xmm,
01012 reg->SrcRegister.Index,
01013 swizzle );
01014 break;
01015
01016 case TGSI_FILE_INPUT:
01017 emit_inputf(
01018 func,
01019 xmm,
01020 reg->SrcRegister.Index,
01021 swizzle );
01022 break;
01023
01024 case TGSI_FILE_TEMPORARY:
01025 emit_tempf(
01026 func,
01027 xmm,
01028 reg->SrcRegister.Index,
01029 swizzle );
01030 break;
01031
01032 default:
01033 assert( 0 );
01034 }
01035 break;
01036
01037 case TGSI_EXTSWIZZLE_ZERO:
01038 emit_tempf(
01039 func,
01040 xmm,
01041 TGSI_EXEC_TEMP_00000000_I,
01042 TGSI_EXEC_TEMP_00000000_C );
01043 break;
01044
01045 case TGSI_EXTSWIZZLE_ONE:
01046 emit_tempf(
01047 func,
01048 xmm,
01049 TEMP_ONE_I,
01050 TEMP_ONE_C );
01051 break;
01052
01053 default:
01054 assert( 0 );
01055 }
01056
01057 switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) {
01058 case TGSI_UTIL_SIGN_CLEAR:
01059 emit_abs( func, xmm );
01060 break;
01061
01062 case TGSI_UTIL_SIGN_SET:
01063 emit_setsign( func, xmm );
01064 break;
01065
01066 case TGSI_UTIL_SIGN_TOGGLE:
01067 emit_neg( func, xmm );
01068 break;
01069
01070 case TGSI_UTIL_SIGN_KEEP:
01071 break;
01072 }
01073 }
01074
01075 #define FETCH( FUNC, INST, XMM, INDEX, CHAN )\
01076 emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN )
01077
01082 static void
01083 emit_store(
01084 struct x86_function *func,
01085 unsigned xmm,
01086 const struct tgsi_full_dst_register *reg,
01087 const struct tgsi_full_instruction *inst,
01088 unsigned chan_index )
01089 {
01090 switch( reg->DstRegister.File ) {
01091 case TGSI_FILE_OUTPUT:
01092 emit_output(
01093 func,
01094 xmm,
01095 reg->DstRegister.Index,
01096 chan_index );
01097 break;
01098
01099 case TGSI_FILE_TEMPORARY:
01100 emit_temps(
01101 func,
01102 xmm,
01103 reg->DstRegister.Index,
01104 chan_index );
01105 break;
01106
01107 case TGSI_FILE_ADDRESS:
01108 emit_addrs(
01109 func,
01110 xmm,
01111 reg->DstRegister.Index,
01112 chan_index );
01113 break;
01114
01115 default:
01116 assert( 0 );
01117 }
01118
01119 switch( inst->Instruction.Saturate ) {
01120 case TGSI_SAT_NONE:
01121 break;
01122
01123 case TGSI_SAT_ZERO_ONE:
01124
01125 break;
01126
01127 case TGSI_SAT_MINUS_PLUS_ONE:
01128 assert( 0 );
01129 break;
01130 }
01131 }
01132
01133 #define STORE( FUNC, INST, XMM, INDEX, CHAN )\
01134 emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
01135
01140 static void
01141 emit_kil(
01142 struct x86_function *func,
01143 const struct tgsi_full_src_register *reg )
01144 {
01145 unsigned uniquemask;
01146 unsigned registers[4];
01147 unsigned nextregister = 0;
01148 unsigned firstchan = ~0;
01149 unsigned chan_index;
01150
01151
01152
01153
01154 uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE);
01155
01156 FOR_EACH_CHANNEL( chan_index ) {
01157 unsigned swizzle;
01158
01159
01160 swizzle = tgsi_util_get_full_src_register_extswizzle(
01161 reg,
01162 chan_index );
01163
01164
01165 if( !(uniquemask & (1 << swizzle)) ) {
01166 uniquemask |= 1 << swizzle;
01167
01168
01169 registers[chan_index] = nextregister;
01170 emit_fetch(
01171 func,
01172 nextregister,
01173 reg,
01174 chan_index );
01175 nextregister++;
01176
01177
01178 if( firstchan == ~0 ) {
01179 firstchan = chan_index;
01180 }
01181 }
01182 }
01183
01184 x86_push(
01185 func,
01186 x86_make_reg( file_REG32, reg_AX ) );
01187 x86_push(
01188 func,
01189 x86_make_reg( file_REG32, reg_DX ) );
01190
01191 FOR_EACH_CHANNEL( chan_index ) {
01192 if( uniquemask & (1 << chan_index) ) {
01193 sse_cmpps(
01194 func,
01195 make_xmm( registers[chan_index] ),
01196 get_temp(
01197 TGSI_EXEC_TEMP_00000000_I,
01198 TGSI_EXEC_TEMP_00000000_C ),
01199 cc_LessThan );
01200
01201 if( chan_index == firstchan ) {
01202 sse_pmovmskb(
01203 func,
01204 x86_make_reg( file_REG32, reg_AX ),
01205 make_xmm( registers[chan_index] ) );
01206 }
01207 else {
01208 sse_pmovmskb(
01209 func,
01210 x86_make_reg( file_REG32, reg_DX ),
01211 make_xmm( registers[chan_index] ) );
01212 x86_or(
01213 func,
01214 x86_make_reg( file_REG32, reg_AX ),
01215 x86_make_reg( file_REG32, reg_DX ) );
01216 }
01217 }
01218 }
01219
01220 x86_or(
01221 func,
01222 get_temp(
01223 TGSI_EXEC_TEMP_KILMASK_I,
01224 TGSI_EXEC_TEMP_KILMASK_C ),
01225 x86_make_reg( file_REG32, reg_AX ) );
01226
01227 x86_pop(
01228 func,
01229 x86_make_reg( file_REG32, reg_DX ) );
01230 x86_pop(
01231 func,
01232 x86_make_reg( file_REG32, reg_AX ) );
01233 }
01234
01235
01236 static void
01237 emit_kilp(
01238 struct x86_function *func )
01239 {
01240
01241 }
01242
01243
01244 static void
01245 emit_setcc(
01246 struct x86_function *func,
01247 struct tgsi_full_instruction *inst,
01248 enum sse_cc cc )
01249 {
01250 unsigned chan_index;
01251
01252 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01253 FETCH( func, *inst, 0, 0, chan_index );
01254 FETCH( func, *inst, 1, 1, chan_index );
01255 sse_cmpps(
01256 func,
01257 make_xmm( 0 ),
01258 make_xmm( 1 ),
01259 cc );
01260 sse_andps(
01261 func,
01262 make_xmm( 0 ),
01263 get_temp(
01264 TEMP_ONE_I,
01265 TEMP_ONE_C ) );
01266 STORE( func, *inst, 0, 0, chan_index );
01267 }
01268 }
01269
01270 static void
01271 emit_cmp(
01272 struct x86_function *func,
01273 struct tgsi_full_instruction *inst )
01274 {
01275 unsigned chan_index;
01276
01277 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01278 FETCH( func, *inst, 0, 0, chan_index );
01279 FETCH( func, *inst, 1, 1, chan_index );
01280 FETCH( func, *inst, 2, 2, chan_index );
01281 sse_cmpps(
01282 func,
01283 make_xmm( 0 ),
01284 get_temp(
01285 TGSI_EXEC_TEMP_00000000_I,
01286 TGSI_EXEC_TEMP_00000000_C ),
01287 cc_LessThan );
01288 sse_andps(
01289 func,
01290 make_xmm( 1 ),
01291 make_xmm( 0 ) );
01292 sse_andnps(
01293 func,
01294 make_xmm( 0 ),
01295 make_xmm( 2 ) );
01296 sse_orps(
01297 func,
01298 make_xmm( 0 ),
01299 make_xmm( 1 ) );
01300 STORE( func, *inst, 0, 0, chan_index );
01301 }
01302 }
01303
01304
01309 static boolean
01310 indirect_temp_reference(const struct tgsi_full_instruction *inst)
01311 {
01312 uint i;
01313 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
01314 const struct tgsi_full_src_register *reg = &inst->FullSrcRegisters[i];
01315 if (reg->SrcRegister.File == TGSI_FILE_TEMPORARY &&
01316 reg->SrcRegister.Indirect)
01317 return TRUE;
01318 }
01319 for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
01320 const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[i];
01321 if (reg->DstRegister.File == TGSI_FILE_TEMPORARY &&
01322 reg->DstRegister.Indirect)
01323 return TRUE;
01324 }
01325 return FALSE;
01326 }
01327
01328
01329 static int
01330 emit_instruction(
01331 struct x86_function *func,
01332 struct tgsi_full_instruction *inst )
01333 {
01334 unsigned chan_index;
01335
01336
01337 if (indirect_temp_reference(inst))
01338 return FALSE;
01339
01340 switch (inst->Instruction.Opcode) {
01341 case TGSI_OPCODE_ARL:
01342 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01343 FETCH( func, *inst, 0, 0, chan_index );
01344 emit_flr(func, 0);
01345 emit_f2it( func, 0 );
01346 STORE( func, *inst, 0, 0, chan_index );
01347 }
01348 break;
01349
01350 case TGSI_OPCODE_MOV:
01351 case TGSI_OPCODE_SWZ:
01352 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01353 FETCH( func, *inst, 0, 0, chan_index );
01354 STORE( func, *inst, 0, 0, chan_index );
01355 }
01356 break;
01357
01358 case TGSI_OPCODE_LIT:
01359 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
01360 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
01361 emit_tempf(
01362 func,
01363 0,
01364 TEMP_ONE_I,
01365 TEMP_ONE_C);
01366 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) {
01367 STORE( func, *inst, 0, 0, CHAN_X );
01368 }
01369 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) {
01370 STORE( func, *inst, 0, 0, CHAN_W );
01371 }
01372 }
01373 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
01374 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
01375 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
01376 FETCH( func, *inst, 0, 0, CHAN_X );
01377 sse_maxps(
01378 func,
01379 make_xmm( 0 ),
01380 get_temp(
01381 TGSI_EXEC_TEMP_00000000_I,
01382 TGSI_EXEC_TEMP_00000000_C ) );
01383 STORE( func, *inst, 0, 0, CHAN_Y );
01384 }
01385 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
01386
01387 FETCH( func, *inst, 1, 0, CHAN_Y );
01388
01389 sse_maxps(
01390 func,
01391 make_xmm( 1 ),
01392 get_temp(
01393 TGSI_EXEC_TEMP_00000000_I,
01394 TGSI_EXEC_TEMP_00000000_C ) );
01395
01396 FETCH( func, *inst, 2, 0, CHAN_W );
01397
01398 sse_minps(
01399 func,
01400 make_xmm( 2 ),
01401 get_temp(
01402 TGSI_EXEC_TEMP_128_I,
01403 TGSI_EXEC_TEMP_128_C ) );
01404
01405 sse_maxps(
01406 func,
01407 make_xmm( 2 ),
01408 get_temp(
01409 TGSI_EXEC_TEMP_MINUS_128_I,
01410 TGSI_EXEC_TEMP_MINUS_128_C ) );
01411 emit_pow( func, 1, 2 );
01412 FETCH( func, *inst, 0, 0, CHAN_X );
01413 sse_xorps(
01414 func,
01415 make_xmm( 2 ),
01416 make_xmm( 2 ) );
01417 sse_cmpps(
01418 func,
01419 make_xmm( 2 ),
01420 make_xmm( 0 ),
01421 cc_LessThan );
01422 sse_andps(
01423 func,
01424 make_xmm( 2 ),
01425 make_xmm( 1 ) );
01426 STORE( func, *inst, 2, 0, CHAN_Z );
01427 }
01428 }
01429 break;
01430
01431 case TGSI_OPCODE_RCP:
01432
01433 FETCH( func, *inst, 0, 0, CHAN_X );
01434 emit_rcp( func, 0, 0 );
01435 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01436 STORE( func, *inst, 0, 0, chan_index );
01437 }
01438 break;
01439
01440 case TGSI_OPCODE_RSQ:
01441
01442 FETCH( func, *inst, 0, 0, CHAN_X );
01443 emit_rsqrt( func, 1, 0 );
01444 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01445 STORE( func, *inst, 1, 0, chan_index );
01446 }
01447 break;
01448
01449 case TGSI_OPCODE_EXP:
01450 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
01451 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
01452 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01453 FETCH( func, *inst, 0, 0, CHAN_X );
01454 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
01455 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01456 emit_MOV( func, 1, 0 );
01457 emit_flr( func, 1 );
01458
01459 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
01460 emit_MOV( func, 2, 1 );
01461 emit_ex2( func, 2 );
01462 STORE( func, *inst, 2, 0, CHAN_X );
01463 }
01464
01465 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01466 emit_MOV( func, 2, 0 );
01467 emit_sub( func, 2, 1 );
01468 STORE( func, *inst, 2, 0, CHAN_Y );
01469 }
01470 }
01471
01472 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01473 emit_ex2( func, 0 );
01474 STORE( func, *inst, 0, 0, CHAN_Z );
01475 }
01476 }
01477
01478 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
01479 emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C );
01480 STORE( func, *inst, 0, 0, CHAN_W );
01481 }
01482 break;
01483
01484 case TGSI_OPCODE_LOG:
01485 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
01486 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
01487 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01488 FETCH( func, *inst, 0, 0, CHAN_X );
01489 emit_abs( func, 0 );
01490 emit_MOV( func, 1, 0 );
01491 emit_lg2( func, 1 );
01492
01493 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) {
01494 STORE( func, *inst, 1, 0, CHAN_Z );
01495 }
01496 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
01497 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01498 emit_flr( func, 1 );
01499
01500 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) {
01501 STORE( func, *inst, 1, 0, CHAN_X );
01502 }
01503
01504 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) {
01505 emit_ex2( func, 1 );
01506 emit_rcp( func, 1, 1 );
01507 emit_mul( func, 0, 1 );
01508 STORE( func, *inst, 0, 0, CHAN_Y );
01509 }
01510 }
01511 }
01512
01513 if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) {
01514 emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C );
01515 STORE( func, *inst, 0, 0, CHAN_W );
01516 }
01517 break;
01518
01519 case TGSI_OPCODE_MUL:
01520 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01521 FETCH( func, *inst, 0, 0, chan_index );
01522 FETCH( func, *inst, 1, 1, chan_index );
01523 emit_mul( func, 0, 1 );
01524 STORE( func, *inst, 0, 0, chan_index );
01525 }
01526 break;
01527
01528 case TGSI_OPCODE_ADD:
01529 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01530 FETCH( func, *inst, 0, 0, chan_index );
01531 FETCH( func, *inst, 1, 1, chan_index );
01532 emit_add( func, 0, 1 );
01533 STORE( func, *inst, 0, 0, chan_index );
01534 }
01535 break;
01536
01537 case TGSI_OPCODE_DP3:
01538
01539 FETCH( func, *inst, 0, 0, CHAN_X );
01540 FETCH( func, *inst, 1, 1, CHAN_X );
01541 emit_mul( func, 0, 1 );
01542 FETCH( func, *inst, 1, 0, CHAN_Y );
01543 FETCH( func, *inst, 2, 1, CHAN_Y );
01544 emit_mul( func, 1, 2 );
01545 emit_add( func, 0, 1 );
01546 FETCH( func, *inst, 1, 0, CHAN_Z );
01547 FETCH( func, *inst, 2, 1, CHAN_Z );
01548 emit_mul( func, 1, 2 );
01549 emit_add( func, 0, 1 );
01550 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01551 STORE( func, *inst, 0, 0, chan_index );
01552 }
01553 break;
01554
01555 case TGSI_OPCODE_DP4:
01556
01557 FETCH( func, *inst, 0, 0, CHAN_X );
01558 FETCH( func, *inst, 1, 1, CHAN_X );
01559 emit_mul( func, 0, 1 );
01560 FETCH( func, *inst, 1, 0, CHAN_Y );
01561 FETCH( func, *inst, 2, 1, CHAN_Y );
01562 emit_mul( func, 1, 2 );
01563 emit_add( func, 0, 1 );
01564 FETCH( func, *inst, 1, 0, CHAN_Z );
01565 FETCH( func, *inst, 2, 1, CHAN_Z );
01566 emit_mul(func, 1, 2 );
01567 emit_add(func, 0, 1 );
01568 FETCH( func, *inst, 1, 0, CHAN_W );
01569 FETCH( func, *inst, 2, 1, CHAN_W );
01570 emit_mul( func, 1, 2 );
01571 emit_add( func, 0, 1 );
01572 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01573 STORE( func, *inst, 0, 0, chan_index );
01574 }
01575 break;
01576
01577 case TGSI_OPCODE_DST:
01578 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
01579 emit_tempf(
01580 func,
01581 0,
01582 TEMP_ONE_I,
01583 TEMP_ONE_C );
01584 STORE( func, *inst, 0, 0, CHAN_X );
01585 }
01586 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
01587 FETCH( func, *inst, 0, 0, CHAN_Y );
01588 FETCH( func, *inst, 1, 1, CHAN_Y );
01589 emit_mul( func, 0, 1 );
01590 STORE( func, *inst, 0, 0, CHAN_Y );
01591 }
01592 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
01593 FETCH( func, *inst, 0, 0, CHAN_Z );
01594 STORE( func, *inst, 0, 0, CHAN_Z );
01595 }
01596 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
01597 FETCH( func, *inst, 0, 1, CHAN_W );
01598 STORE( func, *inst, 0, 0, CHAN_W );
01599 }
01600 break;
01601
01602 case TGSI_OPCODE_MIN:
01603 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01604 FETCH( func, *inst, 0, 0, chan_index );
01605 FETCH( func, *inst, 1, 1, chan_index );
01606 sse_minps(
01607 func,
01608 make_xmm( 0 ),
01609 make_xmm( 1 ) );
01610 STORE( func, *inst, 0, 0, chan_index );
01611 }
01612 break;
01613
01614 case TGSI_OPCODE_MAX:
01615 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01616 FETCH( func, *inst, 0, 0, chan_index );
01617 FETCH( func, *inst, 1, 1, chan_index );
01618 sse_maxps(
01619 func,
01620 make_xmm( 0 ),
01621 make_xmm( 1 ) );
01622 STORE( func, *inst, 0, 0, chan_index );
01623 }
01624 break;
01625
01626 case TGSI_OPCODE_SLT:
01627
01628 emit_setcc( func, inst, cc_LessThan );
01629 break;
01630
01631 case TGSI_OPCODE_SGE:
01632
01633 emit_setcc( func, inst, cc_NotLessThan );
01634 break;
01635
01636 case TGSI_OPCODE_MAD:
01637
01638 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01639 FETCH( func, *inst, 0, 0, chan_index );
01640 FETCH( func, *inst, 1, 1, chan_index );
01641 FETCH( func, *inst, 2, 2, chan_index );
01642 emit_mul( func, 0, 1 );
01643 emit_add( func, 0, 2 );
01644 STORE( func, *inst, 0, 0, chan_index );
01645 }
01646 break;
01647
01648 case TGSI_OPCODE_SUB:
01649 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01650 FETCH( func, *inst, 0, 0, chan_index );
01651 FETCH( func, *inst, 1, 1, chan_index );
01652 emit_sub( func, 0, 1 );
01653 STORE( func, *inst, 0, 0, chan_index );
01654 }
01655 break;
01656
01657 case TGSI_OPCODE_LERP:
01658
01659 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01660 FETCH( func, *inst, 0, 0, chan_index );
01661 FETCH( func, *inst, 1, 1, chan_index );
01662 FETCH( func, *inst, 2, 2, chan_index );
01663 emit_sub( func, 1, 2 );
01664 emit_mul( func, 0, 1 );
01665 emit_add( func, 0, 2 );
01666 STORE( func, *inst, 0, 0, chan_index );
01667 }
01668 break;
01669
01670 case TGSI_OPCODE_CND:
01671 return 0;
01672 break;
01673
01674 case TGSI_OPCODE_CND0:
01675 return 0;
01676 break;
01677
01678 case TGSI_OPCODE_DOT2ADD:
01679
01680 return 0;
01681 break;
01682
01683 case TGSI_OPCODE_INDEX:
01684 return 0;
01685 break;
01686
01687 case TGSI_OPCODE_NEGATE:
01688 return 0;
01689 break;
01690
01691 case TGSI_OPCODE_FRAC:
01692
01693 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01694 FETCH( func, *inst, 0, 0, chan_index );
01695 emit_frc( func, 0 );
01696 STORE( func, *inst, 0, 0, chan_index );
01697 }
01698 break;
01699
01700 case TGSI_OPCODE_CLAMP:
01701 return 0;
01702 break;
01703
01704 case TGSI_OPCODE_FLOOR:
01705
01706 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01707 FETCH( func, *inst, 0, 0, chan_index );
01708 emit_flr( func, 0 );
01709 STORE( func, *inst, 0, 0, chan_index );
01710 }
01711 break;
01712
01713 case TGSI_OPCODE_ROUND:
01714 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01715 FETCH( func, *inst, 0, 0, chan_index );
01716 emit_rnd( func, 0 );
01717 STORE( func, *inst, 0, 0, chan_index );
01718 }
01719 break;
01720
01721 case TGSI_OPCODE_EXPBASE2:
01722
01723 FETCH( func, *inst, 0, 0, CHAN_X );
01724 emit_ex2( func, 0 );
01725 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01726 STORE( func, *inst, 0, 0, chan_index );
01727 }
01728 break;
01729
01730 case TGSI_OPCODE_LOGBASE2:
01731
01732 FETCH( func, *inst, 0, 0, CHAN_X );
01733 emit_lg2( func, 0 );
01734 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01735 STORE( func, *inst, 0, 0, chan_index );
01736 }
01737 break;
01738
01739 case TGSI_OPCODE_POWER:
01740
01741 FETCH( func, *inst, 0, 0, CHAN_X );
01742 FETCH( func, *inst, 1, 1, CHAN_X );
01743 emit_pow( func, 0, 1 );
01744 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01745 STORE( func, *inst, 0, 0, chan_index );
01746 }
01747 break;
01748
01749 case TGSI_OPCODE_CROSSPRODUCT:
01750
01751 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
01752 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
01753 FETCH( func, *inst, 1, 1, CHAN_Z );
01754 FETCH( func, *inst, 3, 0, CHAN_Z );
01755 }
01756 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
01757 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
01758 FETCH( func, *inst, 0, 0, CHAN_Y );
01759 FETCH( func, *inst, 4, 1, CHAN_Y );
01760 }
01761 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
01762 emit_MOV( func, 2, 0 );
01763 emit_mul( func, 2, 1 );
01764 emit_MOV( func, 5, 3 );
01765 emit_mul( func, 5, 4 );
01766 emit_sub( func, 2, 5 );
01767 STORE( func, *inst, 2, 0, CHAN_X );
01768 }
01769 if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ||
01770 IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) {
01771 FETCH( func, *inst, 2, 1, CHAN_X );
01772 FETCH( func, *inst, 5, 0, CHAN_X );
01773 }
01774 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
01775 emit_mul( func, 3, 2 );
01776 emit_mul( func, 1, 5 );
01777 emit_sub( func, 3, 1 );
01778 STORE( func, *inst, 3, 0, CHAN_Y );
01779 }
01780 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
01781 emit_mul( func, 5, 4 );
01782 emit_mul( func, 0, 2 );
01783 emit_sub( func, 5, 0 );
01784 STORE( func, *inst, 5, 0, CHAN_Z );
01785 }
01786 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
01787 emit_tempf(
01788 func,
01789 0,
01790 TEMP_ONE_I,
01791 TEMP_ONE_C );
01792 STORE( func, *inst, 0, 0, CHAN_W );
01793 }
01794 break;
01795
01796 case TGSI_OPCODE_MULTIPLYMATRIX:
01797 return 0;
01798 break;
01799
01800 case TGSI_OPCODE_ABS:
01801 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01802 FETCH( func, *inst, 0, 0, chan_index );
01803 emit_abs( func, 0) ;
01804
01805 STORE( func, *inst, 0, 0, chan_index );
01806 }
01807 break;
01808
01809 case TGSI_OPCODE_RCC:
01810 return 0;
01811 break;
01812
01813 case TGSI_OPCODE_DPH:
01814 FETCH( func, *inst, 0, 0, CHAN_X );
01815 FETCH( func, *inst, 1, 1, CHAN_X );
01816 emit_mul( func, 0, 1 );
01817 FETCH( func, *inst, 1, 0, CHAN_Y );
01818 FETCH( func, *inst, 2, 1, CHAN_Y );
01819 emit_mul( func, 1, 2 );
01820 emit_add( func, 0, 1 );
01821 FETCH( func, *inst, 1, 0, CHAN_Z );
01822 FETCH( func, *inst, 2, 1, CHAN_Z );
01823 emit_mul( func, 1, 2 );
01824 emit_add( func, 0, 1 );
01825 FETCH( func, *inst, 1, 1, CHAN_W );
01826 emit_add( func, 0, 1 );
01827 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01828 STORE( func, *inst, 0, 0, chan_index );
01829 }
01830 break;
01831
01832 case TGSI_OPCODE_COS:
01833 FETCH( func, *inst, 0, 0, CHAN_X );
01834 emit_cos( func, 0 );
01835 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01836 STORE( func, *inst, 0, 0, chan_index );
01837 }
01838 break;
01839
01840 case TGSI_OPCODE_DDX:
01841 return 0;
01842 break;
01843
01844 case TGSI_OPCODE_DDY:
01845 return 0;
01846 break;
01847
01848 case TGSI_OPCODE_KILP:
01849
01850 emit_kilp( func );
01851 return 0;
01852 break;
01853
01854 case TGSI_OPCODE_KIL:
01855
01856 emit_kil( func, &inst->FullSrcRegisters[0] );
01857 break;
01858
01859 case TGSI_OPCODE_PK2H:
01860 return 0;
01861 break;
01862
01863 case TGSI_OPCODE_PK2US:
01864 return 0;
01865 break;
01866
01867 case TGSI_OPCODE_PK4B:
01868 return 0;
01869 break;
01870
01871 case TGSI_OPCODE_PK4UB:
01872 return 0;
01873 break;
01874
01875 case TGSI_OPCODE_RFL:
01876 return 0;
01877 break;
01878
01879 case TGSI_OPCODE_SEQ:
01880 emit_setcc( func, inst, cc_Equal );
01881 break;
01882
01883 case TGSI_OPCODE_SFL:
01884 return 0;
01885 break;
01886
01887 case TGSI_OPCODE_SGT:
01888 emit_setcc( func, inst, cc_NotLessThanEqual );
01889 break;
01890
01891 case TGSI_OPCODE_SIN:
01892 FETCH( func, *inst, 0, 0, CHAN_X );
01893 emit_sin( func, 0 );
01894 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01895 STORE( func, *inst, 0, 0, chan_index );
01896 }
01897 break;
01898
01899 case TGSI_OPCODE_SLE:
01900 emit_setcc( func, inst, cc_LessThanEqual );
01901 break;
01902
01903 case TGSI_OPCODE_SNE:
01904 emit_setcc( func, inst, cc_NotEqual );
01905 break;
01906
01907 case TGSI_OPCODE_STR:
01908 return 0;
01909 break;
01910
01911 case TGSI_OPCODE_TEX:
01912 if (0) {
01913
01914
01915 emit_tempf(
01916 func,
01917 0,
01918 TEMP_ONE_I,
01919 TEMP_ONE_C );
01920 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01921 STORE( func, *inst, 0, 0, chan_index );
01922 }
01923 }
01924 else {
01925 return 0;
01926 }
01927 break;
01928
01929 case TGSI_OPCODE_TXD:
01930 return 0;
01931 break;
01932
01933 case TGSI_OPCODE_UP2H:
01934 return 0;
01935 break;
01936
01937 case TGSI_OPCODE_UP2US:
01938 return 0;
01939 break;
01940
01941 case TGSI_OPCODE_UP4B:
01942 return 0;
01943 break;
01944
01945 case TGSI_OPCODE_UP4UB:
01946 return 0;
01947 break;
01948
01949 case TGSI_OPCODE_X2D:
01950 return 0;
01951 break;
01952
01953 case TGSI_OPCODE_ARA:
01954 return 0;
01955 break;
01956
01957 case TGSI_OPCODE_ARR:
01958 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01959 FETCH( func, *inst, 0, 0, chan_index );
01960 emit_rnd( func, 0 );
01961 emit_f2it( func, 0 );
01962 STORE( func, *inst, 0, 0, chan_index );
01963 }
01964 break;
01965
01966 case TGSI_OPCODE_BRA:
01967 return 0;
01968 break;
01969
01970 case TGSI_OPCODE_CAL:
01971 return 0;
01972 break;
01973
01974 case TGSI_OPCODE_RET:
01975 emit_ret( func );
01976 break;
01977
01978 case TGSI_OPCODE_END:
01979 break;
01980
01981 case TGSI_OPCODE_SSG:
01982
01983 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
01984 FETCH( func, *inst, 0, 0, chan_index );
01985 emit_sgn( func, 0 );
01986 STORE( func, *inst, 0, 0, chan_index );
01987 }
01988 break;
01989
01990 case TGSI_OPCODE_CMP:
01991 emit_cmp (func, inst);
01992 break;
01993
01994 case TGSI_OPCODE_SCS:
01995 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) {
01996 FETCH( func, *inst, 0, 0, CHAN_X );
01997 emit_cos( func, 0 );
01998 STORE( func, *inst, 0, 0, CHAN_X );
01999 }
02000 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) {
02001 FETCH( func, *inst, 0, 0, CHAN_X );
02002 emit_sin( func, 0 );
02003 STORE( func, *inst, 0, 0, CHAN_Y );
02004 }
02005 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) {
02006 emit_tempf(
02007 func,
02008 0,
02009 TGSI_EXEC_TEMP_00000000_I,
02010 TGSI_EXEC_TEMP_00000000_C );
02011 STORE( func, *inst, 0, 0, CHAN_Z );
02012 }
02013 IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) {
02014 emit_tempf(
02015 func,
02016 0,
02017 TEMP_ONE_I,
02018 TEMP_ONE_C );
02019 STORE( func, *inst, 0, 0, CHAN_W );
02020 }
02021 break;
02022
02023 case TGSI_OPCODE_TXB:
02024 return 0;
02025 break;
02026
02027 case TGSI_OPCODE_NRM:
02028
02029 case TGSI_OPCODE_NRM4:
02030
02031 {
02032 uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4;
02033
02034 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) ||
02035 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) ||
02036 IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) ||
02037 (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 4)) {
02038
02039
02040
02041
02042
02043 FETCH(func, *inst, 0, 0, CHAN_X);
02044 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
02045 emit_MOV(func, 4, 0);
02046 }
02047 emit_mul(func, 0, 0);
02048
02049
02050
02051 FETCH(func, *inst, 1, 0, CHAN_Y);
02052 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
02053 emit_MOV(func, 5, 1);
02054 }
02055 emit_mul(func, 1, 1);
02056 emit_add(func, 0, 1);
02057
02058
02059
02060 FETCH(func, *inst, 1, 0, CHAN_Z);
02061 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
02062 emit_MOV(func, 6, 1);
02063 }
02064 emit_mul(func, 1, 1);
02065 emit_add(func, 0, 1);
02066
02067 if (dims == 4) {
02068
02069
02070 FETCH(func, *inst, 1, 0, CHAN_W);
02071 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) {
02072 emit_MOV(func, 7, 1);
02073 }
02074 emit_mul(func, 1, 1);
02075 emit_add(func, 0, 1);
02076 }
02077
02078
02079 emit_rsqrt(func, 1, 0);
02080
02081
02082 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) {
02083 emit_mul(func, 4, 1);
02084 STORE(func, *inst, 4, 0, CHAN_X);
02085 }
02086
02087
02088 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) {
02089 emit_mul(func, 5, 1);
02090 STORE(func, *inst, 5, 0, CHAN_Y);
02091 }
02092
02093
02094 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) {
02095 emit_mul(func, 6, 1);
02096 STORE(func, *inst, 6, 0, CHAN_Z);
02097 }
02098
02099
02100 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && dims == 4) {
02101 emit_mul(func, 7, 1);
02102 STORE(func, *inst, 7, 0, CHAN_W);
02103 }
02104 }
02105
02106
02107 if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W) && dims == 3) {
02108 emit_tempf(func, 0, TEMP_ONE_I, TEMP_ONE_C);
02109 STORE(func, *inst, 0, 0, CHAN_W);
02110 }
02111 }
02112 break;
02113
02114 case TGSI_OPCODE_DIV:
02115 return 0;
02116 break;
02117
02118 case TGSI_OPCODE_DP2:
02119 return 0;
02120 break;
02121
02122 case TGSI_OPCODE_TXL:
02123 return 0;
02124 break;
02125
02126 case TGSI_OPCODE_BRK:
02127 return 0;
02128 break;
02129
02130 case TGSI_OPCODE_IF:
02131 return 0;
02132 break;
02133
02134 case TGSI_OPCODE_LOOP:
02135 return 0;
02136 break;
02137
02138 case TGSI_OPCODE_REP:
02139 return 0;
02140 break;
02141
02142 case TGSI_OPCODE_ELSE:
02143 return 0;
02144 break;
02145
02146 case TGSI_OPCODE_ENDIF:
02147 return 0;
02148 break;
02149
02150 case TGSI_OPCODE_ENDLOOP:
02151 return 0;
02152 break;
02153
02154 case TGSI_OPCODE_ENDREP:
02155 return 0;
02156 break;
02157
02158 case TGSI_OPCODE_PUSHA:
02159 return 0;
02160 break;
02161
02162 case TGSI_OPCODE_POPA:
02163 return 0;
02164 break;
02165
02166 case TGSI_OPCODE_CEIL:
02167 return 0;
02168 break;
02169
02170 case TGSI_OPCODE_I2F:
02171 return 0;
02172 break;
02173
02174 case TGSI_OPCODE_NOT:
02175 return 0;
02176 break;
02177
02178 case TGSI_OPCODE_TRUNC:
02179 FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
02180 FETCH( func, *inst, 0, 0, chan_index );
02181 emit_f2it( func, 0 );
02182 emit_i2f( func, 0 );
02183 STORE( func, *inst, 0, 0, chan_index );
02184 }
02185 break;
02186
02187 case TGSI_OPCODE_SHL:
02188 return 0;
02189 break;
02190
02191 case TGSI_OPCODE_SHR:
02192 return 0;
02193 break;
02194
02195 case TGSI_OPCODE_AND:
02196 return 0;
02197 break;
02198
02199 case TGSI_OPCODE_OR:
02200 return 0;
02201 break;
02202
02203 case TGSI_OPCODE_MOD:
02204 return 0;
02205 break;
02206
02207 case TGSI_OPCODE_XOR:
02208 return 0;
02209 break;
02210
02211 case TGSI_OPCODE_SAD:
02212 return 0;
02213 break;
02214
02215 case TGSI_OPCODE_TXF:
02216 return 0;
02217 break;
02218
02219 case TGSI_OPCODE_TXQ:
02220 return 0;
02221 break;
02222
02223 case TGSI_OPCODE_CONT:
02224 return 0;
02225 break;
02226
02227 case TGSI_OPCODE_EMIT:
02228 return 0;
02229 break;
02230
02231 case TGSI_OPCODE_ENDPRIM:
02232 return 0;
02233 break;
02234
02235 default:
02236 return 0;
02237 }
02238
02239 return 1;
02240 }
02241
02242 static void
02243 emit_declaration(
02244 struct x86_function *func,
02245 struct tgsi_full_declaration *decl )
02246 {
02247 if( decl->Declaration.File == TGSI_FILE_INPUT ) {
02248 unsigned first, last, mask;
02249 unsigned i, j;
02250
02251 first = decl->DeclarationRange.First;
02252 last = decl->DeclarationRange.Last;
02253 mask = decl->Declaration.UsageMask;
02254
02255 for( i = first; i <= last; i++ ) {
02256 for( j = 0; j < NUM_CHANNELS; j++ ) {
02257 if( mask & (1 << j) ) {
02258 switch( decl->Declaration.Interpolate ) {
02259 case TGSI_INTERPOLATE_CONSTANT:
02260 emit_coef_a0( func, 0, i, j );
02261 emit_inputs( func, 0, i, j );
02262 break;
02263
02264 case TGSI_INTERPOLATE_LINEAR:
02265 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
02266 emit_coef_dadx( func, 1, i, j );
02267 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
02268 emit_coef_dady( func, 3, i, j );
02269 emit_mul( func, 0, 1 );
02270 emit_coef_a0( func, 4, i, j );
02271 emit_mul( func, 2, 3 );
02272 emit_add( func, 0, 4 );
02273 emit_add( func, 0, 2 );
02274 emit_inputs( func, 0, i, j );
02275 break;
02276
02277 case TGSI_INTERPOLATE_PERSPECTIVE:
02278 emit_tempf( func, 0, 0, TGSI_SWIZZLE_X );
02279 emit_coef_dadx( func, 1, i, j );
02280 emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y );
02281 emit_coef_dady( func, 3, i, j );
02282 emit_mul( func, 0, 1 );
02283 emit_tempf( func, 4, 0, TGSI_SWIZZLE_W );
02284 emit_coef_a0( func, 5, i, j );
02285 emit_rcp( func, 4, 4 );
02286 emit_mul( func, 2, 3 );
02287 emit_add( func, 0, 5 );
02288 emit_add( func, 0, 2 );
02289 emit_mul( func, 0, 4 );
02290 emit_inputs( func, 0, i, j );
02291 break;
02292
02293 default:
02294 assert( 0 );
02295 break;
02296 }
02297 }
02298 }
02299 }
02300 }
02301 }
02302
02303 static void aos_to_soa( struct x86_function *func,
02304 uint arg_aos,
02305 uint arg_soa,
02306 uint arg_num,
02307 uint arg_stride )
02308 {
02309 struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX );
02310 struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX );
02311 struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX );
02312 struct x86_reg stride = x86_make_reg( file_REG32, reg_DX );
02313 int inner_loop;
02314
02315
02316
02317 x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
02318
02319 x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) );
02320 x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) );
02321 x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) );
02322 x86_mov( func, stride, x86_fn_arg( func, arg_stride ) );
02323
02324
02325 inner_loop = x86_get_label( func );
02326 {
02327 x86_push( func, aos_input );
02328 sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
02329 sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
02330 x86_add( func, aos_input, stride );
02331 sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) );
02332 sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) );
02333 x86_add( func, aos_input, stride );
02334 sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
02335 sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
02336 x86_add( func, aos_input, stride );
02337 sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) );
02338 sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) );
02339 x86_pop( func, aos_input );
02340
02341 sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
02342 sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
02343 sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 );
02344 sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd );
02345 sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 );
02346 sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd );
02347
02348 sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) );
02349 sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) );
02350 sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) );
02351 sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) );
02352
02353
02354 x86_lea( func, aos_input, x86_make_disp(aos_input, 16) );
02355 x86_lea( func, soa_input, x86_make_disp(soa_input, 64) );
02356 }
02357
02358 x86_dec( func, num_inputs );
02359 x86_jcc( func, cc_NE, inner_loop );
02360
02361
02362 x86_pop( func, aos_input );
02363 }
02364
02365 static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
02366 {
02367 struct x86_reg soa_output;
02368 struct x86_reg aos_output;
02369 struct x86_reg num_outputs;
02370 struct x86_reg temp;
02371 int inner_loop;
02372
02373 soa_output = x86_make_reg( file_REG32, reg_AX );
02374 aos_output = x86_make_reg( file_REG32, reg_BX );
02375 num_outputs = x86_make_reg( file_REG32, reg_CX );
02376 temp = x86_make_reg( file_REG32, reg_DX );
02377
02378
02379 x86_push( func, aos_output );
02380
02381 x86_mov( func, soa_output, x86_fn_arg( func, soa ) );
02382 x86_mov( func, aos_output, x86_fn_arg( func, aos ) );
02383 x86_mov( func, num_outputs, x86_fn_arg( func, num ) );
02384
02385
02386 inner_loop = x86_get_label( func );
02387 {
02388 sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) );
02389 sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) );
02390 sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) );
02391 sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) );
02392
02393 sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) );
02394 sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) );
02395 sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) );
02396 sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) );
02397 sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
02398 sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
02399
02400 x86_mov( func, temp, x86_fn_arg( func, stride ) );
02401 x86_push( func, aos_output );
02402 sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
02403 sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
02404 x86_add( func, aos_output, temp );
02405 sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
02406 sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
02407 x86_add( func, aos_output, temp );
02408 sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
02409 sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
02410 x86_add( func, aos_output, temp );
02411 sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) );
02412 sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) );
02413 x86_pop( func, aos_output );
02414
02415
02416 x86_lea( func, aos_output, x86_make_disp(aos_output, 16) );
02417 x86_lea( func, soa_output, x86_make_disp(soa_output, 64) );
02418 }
02419
02420 x86_dec( func, num_outputs );
02421 x86_jcc( func, cc_NE, inner_loop );
02422
02423
02424 x86_pop( func, aos_output );
02425 }
02426
02443 unsigned
02444 tgsi_emit_sse2(
02445 const struct tgsi_token *tokens,
02446 struct x86_function *func,
02447 float (*immediates)[4],
02448 boolean do_swizzles )
02449 {
02450 struct tgsi_parse_context parse;
02451 boolean instruction_phase = FALSE;
02452 unsigned ok = 1;
02453 uint num_immediates = 0;
02454
02455 util_init_math();
02456
02457 func->csr = func->store;
02458
02459 tgsi_parse_init( &parse, tokens );
02460
02461
02462
02463 x86_push(
02464 func,
02465 get_immediate_base() );
02466
02467 x86_push(
02468 func,
02469 get_temp_base() );
02470
02471
02472
02473
02474
02475 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
02476
02477 x86_mov(
02478 func,
02479 get_input_base(),
02480 x86_fn_arg( func, 1 ) );
02481
02482 x86_mov(
02483 func,
02484 get_const_base(),
02485 x86_fn_arg( func, 3 ) );
02486 x86_mov(
02487 func,
02488 get_temp_base(),
02489 x86_fn_arg( func, 4 ) );
02490 x86_mov(
02491 func,
02492 get_coef_base(),
02493 x86_fn_arg( func, 5 ) );
02494 x86_mov(
02495 func,
02496 get_immediate_base(),
02497 x86_fn_arg( func, 6 ) );
02498 }
02499 else {
02500 assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
02501
02502 if (do_swizzles)
02503 aos_to_soa( func,
02504 6,
02505 1,
02506 7,
02507 8 );
02508
02509 x86_mov(
02510 func,
02511 get_input_base(),
02512 x86_fn_arg( func, 1 ) );
02513 x86_mov(
02514 func,
02515 get_output_base(),
02516 x86_fn_arg( func, 2 ) );
02517 x86_mov(
02518 func,
02519 get_const_base(),
02520 x86_fn_arg( func, 3 ) );
02521 x86_mov(
02522 func,
02523 get_temp_base(),
02524 x86_fn_arg( func, 4 ) );
02525 x86_mov(
02526 func,
02527 get_immediate_base(),
02528 x86_fn_arg( func, 5 ) );
02529 }
02530
02531 while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
02532 tgsi_parse_token( &parse );
02533
02534 switch( parse.FullToken.Token.Type ) {
02535 case TGSI_TOKEN_TYPE_DECLARATION:
02536 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
02537 emit_declaration(
02538 func,
02539 &parse.FullToken.FullDeclaration );
02540 }
02541 break;
02542
02543 case TGSI_TOKEN_TYPE_INSTRUCTION:
02544 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
02545 if( !instruction_phase ) {
02546
02547 instruction_phase = TRUE;
02548 x86_mov(
02549 func,
02550 get_output_base(),
02551 x86_fn_arg( func, 2 ) );
02552 }
02553 }
02554
02555 ok = emit_instruction(
02556 func,
02557 &parse.FullToken.FullInstruction );
02558
02559 if (!ok) {
02560 debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n",
02561 parse.FullToken.FullInstruction.Instruction.Opcode,
02562 parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
02563 "vertex shader" : "fragment shader");
02564 }
02565 break;
02566
02567 case TGSI_TOKEN_TYPE_IMMEDIATE:
02568
02569 {
02570 const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1;
02571 uint i;
02572 assert(size <= 4);
02573 assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
02574 for( i = 0; i < size; i++ ) {
02575 immediates[num_immediates][i] =
02576 parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
02577 }
02578 #if 0
02579 debug_printf("SSE FS immediate[%d] = %f %f %f %f\n",
02580 num_immediates,
02581 immediates[num_immediates][0],
02582 immediates[num_immediates][1],
02583 immediates[num_immediates][2],
02584 immediates[num_immediates][3]);
02585 #endif
02586 num_immediates++;
02587 }
02588 break;
02589
02590 default:
02591 ok = 0;
02592 assert( 0 );
02593 }
02594 }
02595
02596 if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
02597 if (do_swizzles)
02598 soa_to_aos( func, 9, 2, 10, 11 );
02599 }
02600
02601
02602
02603 x86_pop(
02604 func,
02605 get_temp_base() );
02606
02607 x86_pop(
02608 func,
02609 get_immediate_base() );
02610
02611 emit_ret( func );
02612
02613 tgsi_parse_free( &parse );
02614
02615 return ok;
02616 }
02617
02618 #endif
02619