00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include "brw_context.h"
00033 #include "brw_vs.h"
00034
00035 #include "pipe/p_shader_tokens.h"
00036 #include "tgsi/tgsi_parse.h"
00037
00038 struct brw_prog_info {
00039 unsigned num_temps;
00040 unsigned num_addrs;
00041 unsigned num_consts;
00042
00043 unsigned writes_psize;
00044
00045 unsigned pos_idx;
00046 unsigned result_edge_idx;
00047 unsigned edge_flag_idx;
00048 unsigned psize_idx;
00049 };
00050
00051
00052
00053
00054 static void brw_vs_alloc_regs( struct brw_vs_compile *c,
00055 struct brw_prog_info *info )
00056 {
00057 unsigned i, reg = 0, mrf;
00058 unsigned nr_params;
00059
00060
00061
00062 c->r0 = brw_vec8_grf(reg, 0); reg++;
00063
00064
00065
00066 if (c->key.nr_userclip) {
00067 for (i = 0; i < c->key.nr_userclip; i++) {
00068 c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
00069 }
00070
00071
00072
00073 reg += ((6+c->key.nr_userclip+3)/4)*2;
00074 }
00075
00076
00077
00078 nr_params = c->prog_data.max_const;
00079 for (i = 0; i < nr_params; i++) {
00080 c->regs[TGSI_FILE_CONSTANT][i] = stride(brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
00081 }
00082 reg += (nr_params+1)/2;
00083 c->prog_data.curb_read_length = reg - 1;
00084
00085
00086
00087
00088
00089 c->nr_inputs = c->vp->info.num_inputs;
00090 for (i = 0; i < c->nr_inputs; i++) {
00091 c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0);
00092 reg++;
00093 }
00094
00095
00096
00097
00098
00099 c->nr_outputs = 0;
00100 c->first_output = reg;
00101 mrf = 4;
00102 for (i = 0; i < c->vp->info.num_outputs; i++) {
00103 c->nr_outputs++;
00104 #if 0
00105 if (i == VERT_RESULT_HPOS) {
00106 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
00107 reg++;
00108 }
00109 else if (i == VERT_RESULT_PSIZ) {
00110 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
00111 reg++;
00112 mrf++;
00113 }
00114 else {
00115 c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
00116 mrf++;
00117 }
00118 #else
00119
00120 if (i == info->pos_idx) {
00121 c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
00122 reg++;
00123 } else {
00124 c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
00125 mrf++;
00126 }
00127 #endif
00128 }
00129
00130
00131
00132 for (i = 0; i < info->num_temps; i++) {
00133 c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0);
00134 reg++;
00135 }
00136
00137
00138
00139
00140 for (i = 0; i < info->num_addrs; i++) {
00141 c->regs[TGSI_FILE_ADDRESS][i] = brw_reg(BRW_GENERAL_REGISTER_FILE,
00142 reg,
00143 0,
00144 BRW_REGISTER_TYPE_D,
00145 BRW_VERTICAL_STRIDE_8,
00146 BRW_WIDTH_8,
00147 BRW_HORIZONTAL_STRIDE_1,
00148 BRW_SWIZZLE_XXXX,
00149 TGSI_WRITEMASK_X);
00150 reg++;
00151 }
00152
00153 for (i = 0; i < 128; i++) {
00154 if (c->output_regs[i].used_in_src) {
00155 c->output_regs[i].reg = brw_vec8_grf(reg, 0);
00156 reg++;
00157 }
00158 }
00159
00160 c->stack = brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
00161 reg += 2;
00162
00163
00164
00165
00166 c->first_tmp = reg;
00167 c->last_tmp = reg;
00168
00169
00170
00171
00172
00173 c->prog_data.urb_read_length = (c->nr_inputs+1)/2;
00174
00175 c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4;
00176 c->prog_data.total_grf = reg;
00177 }
00178
00179
00180 static struct brw_reg get_tmp( struct brw_vs_compile *c )
00181 {
00182 struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
00183
00184 if (++c->last_tmp > c->prog_data.total_grf)
00185 c->prog_data.total_grf = c->last_tmp;
00186
00187 return tmp;
00188 }
00189
00190 static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
00191 {
00192 if (tmp.nr == c->last_tmp-1)
00193 c->last_tmp--;
00194 }
00195
00196 static void release_tmps( struct brw_vs_compile *c )
00197 {
00198 c->last_tmp = c->first_tmp;
00199 }
00200
00201
00202 static void unalias1( struct brw_vs_compile *c,
00203 struct brw_reg dst,
00204 struct brw_reg arg0,
00205 void (*func)( struct brw_vs_compile *,
00206 struct brw_reg,
00207 struct brw_reg ))
00208 {
00209 if (dst.file == arg0.file && dst.nr == arg0.nr) {
00210 struct brw_compile *p = &c->func;
00211 struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
00212 func(c, tmp, arg0);
00213 brw_MOV(p, dst, tmp);
00214 }
00215 else {
00216 func(c, dst, arg0);
00217 }
00218 }
00219
00220 static void unalias2( struct brw_vs_compile *c,
00221 struct brw_reg dst,
00222 struct brw_reg arg0,
00223 struct brw_reg arg1,
00224 void (*func)( struct brw_vs_compile *,
00225 struct brw_reg,
00226 struct brw_reg,
00227 struct brw_reg ))
00228 {
00229 if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
00230 (dst.file == arg1.file && dst.nr == arg1.nr)) {
00231 struct brw_compile *p = &c->func;
00232 struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
00233 func(c, tmp, arg0, arg1);
00234 brw_MOV(p, dst, tmp);
00235 }
00236 else {
00237 func(c, dst, arg0, arg1);
00238 }
00239 }
00240
00241 static void emit_sop( struct brw_compile *p,
00242 struct brw_reg dst,
00243 struct brw_reg arg0,
00244 struct brw_reg arg1,
00245 unsigned cond)
00246 {
00247 brw_push_insn_state(p);
00248 brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
00249 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
00250 brw_MOV(p, dst, brw_imm_f(1.0f));
00251 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
00252 brw_MOV(p, dst, brw_imm_f(0.0f));
00253 brw_pop_insn_state(p);
00254 }
00255
00256 static void emit_seq( struct brw_compile *p,
00257 struct brw_reg dst,
00258 struct brw_reg arg0,
00259 struct brw_reg arg1 )
00260 {
00261 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
00262 }
00263
00264 static void emit_sne( struct brw_compile *p,
00265 struct brw_reg dst,
00266 struct brw_reg arg0,
00267 struct brw_reg arg1 )
00268 {
00269 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
00270 }
00271 static void emit_slt( struct brw_compile *p,
00272 struct brw_reg dst,
00273 struct brw_reg arg0,
00274 struct brw_reg arg1 )
00275 {
00276 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L);
00277 }
00278
00279 static void emit_sle( struct brw_compile *p,
00280 struct brw_reg dst,
00281 struct brw_reg arg0,
00282 struct brw_reg arg1 )
00283 {
00284 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE);
00285 }
00286
00287 static void emit_sgt( struct brw_compile *p,
00288 struct brw_reg dst,
00289 struct brw_reg arg0,
00290 struct brw_reg arg1 )
00291 {
00292 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G);
00293 }
00294
00295 static void emit_sge( struct brw_compile *p,
00296 struct brw_reg dst,
00297 struct brw_reg arg0,
00298 struct brw_reg arg1 )
00299 {
00300 emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE);
00301 }
00302
00303 static void emit_max( struct brw_compile *p,
00304 struct brw_reg dst,
00305 struct brw_reg arg0,
00306 struct brw_reg arg1 )
00307 {
00308 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
00309 brw_SEL(p, dst, arg1, arg0);
00310 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
00311 }
00312
00313 static void emit_min( struct brw_compile *p,
00314 struct brw_reg dst,
00315 struct brw_reg arg0,
00316 struct brw_reg arg1 )
00317 {
00318 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
00319 brw_SEL(p, dst, arg0, arg1);
00320 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
00321 }
00322
00323
00324 static void emit_math1( struct brw_vs_compile *c,
00325 unsigned function,
00326 struct brw_reg dst,
00327 struct brw_reg arg0,
00328 unsigned precision)
00329 {
00330
00331
00332
00333
00334
00335
00336
00337 struct brw_compile *p = &c->func;
00338 struct brw_reg tmp = dst;
00339 boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
00340 dst.file != BRW_GENERAL_REGISTER_FILE);
00341
00342 if (need_tmp)
00343 tmp = get_tmp(c);
00344
00345 brw_math(p,
00346 tmp,
00347 function,
00348 BRW_MATH_SATURATE_NONE,
00349 2,
00350 arg0,
00351 BRW_MATH_DATA_SCALAR,
00352 precision);
00353
00354 if (need_tmp) {
00355 brw_MOV(p, dst, tmp);
00356 release_tmp(c, tmp);
00357 }
00358 }
00359
00360 static void emit_math2( struct brw_vs_compile *c,
00361 unsigned function,
00362 struct brw_reg dst,
00363 struct brw_reg arg0,
00364 struct brw_reg arg1,
00365 unsigned precision)
00366 {
00367 struct brw_compile *p = &c->func;
00368 struct brw_reg tmp = dst;
00369 boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
00370 dst.file != BRW_GENERAL_REGISTER_FILE);
00371
00372 if (need_tmp)
00373 tmp = get_tmp(c);
00374
00375 brw_MOV(p, brw_message_reg(3), arg1);
00376
00377 brw_math(p,
00378 tmp,
00379 function,
00380 BRW_MATH_SATURATE_NONE,
00381 2,
00382 arg0,
00383 BRW_MATH_DATA_SCALAR,
00384 precision);
00385
00386 if (need_tmp) {
00387 brw_MOV(p, dst, tmp);
00388 release_tmp(c, tmp);
00389 }
00390 }
00391
00392
00393
00394 static void emit_exp_noalias( struct brw_vs_compile *c,
00395 struct brw_reg dst,
00396 struct brw_reg arg0 )
00397 {
00398 struct brw_compile *p = &c->func;
00399
00400
00401 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) {
00402 struct brw_reg tmp = get_tmp(c);
00403 struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
00404
00405
00406 brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0));
00407
00408
00409
00410
00411
00412
00413 brw_ADD(p, brw_writemask(tmp_d, TGSI_WRITEMASK_X), tmp_d, brw_imm_d(127));
00414
00415
00416
00417
00418 brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), TGSI_WRITEMASK_X),
00419 tmp_d, brw_imm_d(23));
00420
00421 release_tmp(c, tmp);
00422 }
00423
00424 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) {
00425
00426 brw_FRC(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0, 0));
00427 }
00428
00429 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) {
00430
00431
00432
00433
00434
00435
00436
00437 emit_math1(c,
00438 BRW_MATH_FUNCTION_EXP,
00439 brw_writemask(dst, TGSI_WRITEMASK_Z),
00440 brw_swizzle1(arg0, 0),
00441 BRW_MATH_PRECISION_PARTIAL);
00442 }
00443
00444 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) {
00445
00446 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), brw_imm_f(1));
00447 }
00448 }
00449
00450
00451 static void emit_log_noalias( struct brw_vs_compile *c,
00452 struct brw_reg dst,
00453 struct brw_reg arg0 )
00454 {
00455 struct brw_compile *p = &c->func;
00456 struct brw_reg tmp = dst;
00457 struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
00458 struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD);
00459 boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
00460 dst.file != BRW_GENERAL_REGISTER_FILE);
00461
00462 if (need_tmp) {
00463 tmp = get_tmp(c);
00464 tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
00465 }
00466
00467
00468
00469
00470
00471
00472
00473
00474
00475
00476 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_XZ) {
00477 brw_AND(p,
00478 brw_writemask(tmp_ud, TGSI_WRITEMASK_X),
00479 brw_swizzle1(arg0_ud, 0),
00480 brw_imm_ud((1U<<31)-1));
00481
00482 brw_SHR(p,
00483 brw_writemask(tmp_ud, TGSI_WRITEMASK_X),
00484 tmp_ud,
00485 brw_imm_ud(23));
00486
00487 brw_ADD(p,
00488 brw_writemask(tmp, TGSI_WRITEMASK_X),
00489 retype(tmp_ud, BRW_REGISTER_TYPE_D),
00490 brw_imm_d(-127));
00491 }
00492
00493 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_YZ) {
00494 brw_AND(p,
00495 brw_writemask(tmp_ud, TGSI_WRITEMASK_Y),
00496 brw_swizzle1(arg0_ud, 0),
00497 brw_imm_ud((1<<23)-1));
00498
00499 brw_OR(p,
00500 brw_writemask(tmp_ud, TGSI_WRITEMASK_Y),
00501 tmp_ud,
00502 brw_imm_ud(127<<23));
00503 }
00504
00505 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) {
00506
00507
00508
00509
00510
00511
00512
00513
00514
00515
00516
00517
00518 emit_math1(c,
00519 BRW_MATH_FUNCTION_LOG,
00520 brw_writemask(tmp, TGSI_WRITEMASK_Z),
00521 brw_swizzle1(tmp, 1),
00522 BRW_MATH_PRECISION_FULL);
00523
00524 brw_ADD(p,
00525 brw_writemask(tmp, TGSI_WRITEMASK_Z),
00526 brw_swizzle1(tmp, 2),
00527 brw_swizzle1(tmp, 0));
00528 }
00529
00530 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) {
00531
00532 brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_W), brw_imm_f(1));
00533 }
00534
00535 if (need_tmp) {
00536 brw_MOV(p, dst, tmp);
00537 release_tmp(c, tmp);
00538 }
00539 }
00540
00541
00542
00543
00544
00545
00546 static void emit_dst_noalias( struct brw_vs_compile *c,
00547 struct brw_reg dst,
00548 struct brw_reg arg0,
00549 struct brw_reg arg1)
00550 {
00551 struct brw_compile *p = &c->func;
00552
00553
00554
00555 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X)
00556 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_X), brw_imm_f(1.0));
00557 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y)
00558 brw_MUL(p, brw_writemask(dst, TGSI_WRITEMASK_Y), arg0, arg1);
00559 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z)
00560 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Z), arg0);
00561 if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W)
00562 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), arg1);
00563 }
00564
00565 static void emit_xpd( struct brw_compile *p,
00566 struct brw_reg dst,
00567 struct brw_reg t,
00568 struct brw_reg u)
00569 {
00570 brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3), brw_swizzle(u,2,0,1,3));
00571 brw_MAC(p, dst, negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3));
00572 }
00573
00574
00575
00576 static void emit_lit_noalias( struct brw_vs_compile *c,
00577 struct brw_reg dst,
00578 struct brw_reg arg0 )
00579 {
00580 struct brw_compile *p = &c->func;
00581 struct brw_instruction *if_insn;
00582 struct brw_reg tmp = dst;
00583 boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
00584
00585 if (need_tmp)
00586 tmp = get_tmp(c);
00587
00588 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_YZ), brw_imm_f(0));
00589 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_XW), brw_imm_f(1));
00590
00591
00592
00593
00594
00595
00596 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
00597 if_insn = brw_IF(p, BRW_EXECUTE_8);
00598 {
00599 brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0,0));
00600
00601 brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
00602 brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_Z), brw_swizzle1(arg0,1));
00603 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
00604
00605 emit_math2(c,
00606 BRW_MATH_FUNCTION_POW,
00607 brw_writemask(dst, TGSI_WRITEMASK_Z),
00608 brw_swizzle1(tmp, 2),
00609 brw_swizzle1(arg0, 3),
00610 BRW_MATH_PRECISION_PARTIAL);
00611 }
00612
00613 brw_ENDIF(p, if_insn);
00614 }
00615
00616
00617
00618
00619
00620
00621
00622 static struct brw_reg get_reg( struct brw_vs_compile *c,
00623 unsigned file,
00624 unsigned index )
00625 {
00626 switch (file) {
00627 case TGSI_FILE_TEMPORARY:
00628 case TGSI_FILE_INPUT:
00629 case TGSI_FILE_OUTPUT:
00630 assert(c->regs[file][index].nr != 0);
00631 return c->regs[file][index];
00632 case TGSI_FILE_CONSTANT:
00633 assert(c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm].nr != 0);
00634 return c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm];
00635 case TGSI_FILE_IMMEDIATE:
00636 assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0);
00637 return c->regs[TGSI_FILE_CONSTANT][index];
00638 case TGSI_FILE_ADDRESS:
00639 assert(index == 0);
00640 return c->regs[file][index];
00641
00642 case TGSI_FILE_NULL:
00643 return brw_null_reg();
00644
00645 default:
00646 assert(0);
00647 return brw_null_reg();
00648 }
00649 }
00650
00651
00652
00653 static struct brw_reg deref( struct brw_vs_compile *c,
00654 struct brw_reg arg,
00655 int offset)
00656 {
00657 struct brw_compile *p = &c->func;
00658 struct brw_reg tmp = vec4(get_tmp(c));
00659 struct brw_reg vp_address = retype(vec1(get_reg(c, TGSI_FILE_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
00660 unsigned byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
00661 struct brw_reg indirect = brw_vec4_indirect(0,0);
00662
00663 {
00664 brw_push_insn_state(p);
00665 brw_set_access_mode(p, BRW_ALIGN_1);
00666
00667
00668
00669
00670
00671 brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
00672 brw_MOV(p, tmp, indirect);
00673
00674 brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
00675 brw_MOV(p, suboffset(tmp, 4), indirect);
00676
00677 brw_pop_insn_state(p);
00678 }
00679
00680 return vec8(tmp);
00681 }
00682
00683
00684 static void emit_arl( struct brw_vs_compile *c,
00685 struct brw_reg dst,
00686 struct brw_reg arg0 )
00687 {
00688 struct brw_compile *p = &c->func;
00689 struct brw_reg tmp = dst;
00690 boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
00691
00692 if (need_tmp)
00693 tmp = get_tmp(c);
00694
00695 brw_RNDD(p, tmp, arg0);
00696 brw_MUL(p, dst, tmp, brw_imm_d(16));
00697
00698 if (need_tmp)
00699 release_tmp(c, tmp);
00700 }
00701
00702
00703
00704
00705
00706
00707 static struct brw_reg get_arg( struct brw_vs_compile *c,
00708 struct tgsi_src_register *src )
00709 {
00710 struct brw_reg reg;
00711
00712 if (src->File == TGSI_FILE_NULL)
00713 return brw_null_reg();
00714
00715 #if 0
00716 if (src->RelAddr)
00717 reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
00718 else
00719 #endif
00720 reg = get_reg(c, src->File, src->Index);
00721
00722
00723
00724 reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SwizzleX,
00725 src->SwizzleY,
00726 src->SwizzleZ,
00727 src->SwizzleW);
00728
00729
00730
00731 reg.negate = src->Negate ? 1 : 0;
00732
00733 return reg;
00734 }
00735
00736
00737 static struct brw_reg get_dst( struct brw_vs_compile *c,
00738 const struct tgsi_dst_register *dst )
00739 {
00740 struct brw_reg reg = get_reg(c, dst->File, dst->Index);
00741
00742 reg.dw1.bits.writemask = dst->WriteMask;
00743
00744 return reg;
00745 }
00746
00747
00748
00749
00750 static void emit_swz( struct brw_vs_compile *c,
00751 struct brw_reg dst,
00752 struct tgsi_src_register src )
00753 {
00754 struct brw_compile *p = &c->func;
00755 unsigned zeros_mask = 0;
00756 unsigned ones_mask = 0;
00757 unsigned src_mask = 0;
00758 ubyte src_swz[4];
00759 boolean need_tmp = (src.Negate &&
00760 dst.file != BRW_GENERAL_REGISTER_FILE);
00761 struct brw_reg tmp = dst;
00762 unsigned i;
00763
00764 if (need_tmp)
00765 tmp = get_tmp(c);
00766
00767 for (i = 0; i < 4; i++) {
00768 if (dst.dw1.bits.writemask & (1<<i)) {
00769 ubyte s = 0;
00770 switch(i) {
00771 case 0:
00772 s = src.SwizzleX;
00773 break;
00774 s = src.SwizzleY;
00775 case 1:
00776 break;
00777 s = src.SwizzleZ;
00778 case 2:
00779 break;
00780 s = src.SwizzleW;
00781 case 3:
00782 break;
00783 }
00784 switch (s) {
00785 case TGSI_SWIZZLE_X:
00786 case TGSI_SWIZZLE_Y:
00787 case TGSI_SWIZZLE_Z:
00788 case TGSI_SWIZZLE_W:
00789 src_mask |= 1<<i;
00790 src_swz[i] = s;
00791 break;
00792 case TGSI_EXTSWIZZLE_ZERO:
00793 zeros_mask |= 1<<i;
00794 break;
00795 case TGSI_EXTSWIZZLE_ONE:
00796 ones_mask |= 1<<i;
00797 break;
00798 }
00799 }
00800 }
00801
00802
00803
00804 if (src_mask) {
00805 struct brw_reg arg0;
00806
00807 #if 0
00808 if (src.RelAddr)
00809 arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
00810 else
00811 #endif
00812 arg0 = get_reg(c, src.File, src.Index);
00813
00814 arg0 = brw_swizzle(arg0,
00815 src_swz[0], src_swz[1],
00816 src_swz[2], src_swz[3]);
00817
00818 brw_MOV(p, brw_writemask(tmp, src_mask), arg0);
00819 }
00820
00821 if (zeros_mask)
00822 brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0));
00823
00824 if (ones_mask)
00825 brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
00826
00827 if (src.Negate)
00828 brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
00829
00830 if (need_tmp) {
00831 brw_MOV(p, dst, tmp);
00832 release_tmp(c, tmp);
00833 }
00834 }
00835
00836
00837
00838
00839
00840 static void emit_vertex_write( struct brw_vs_compile *c, struct brw_prog_info *info)
00841 {
00842 struct brw_compile *p = &c->func;
00843 struct brw_reg m0 = brw_message_reg(0);
00844 struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][info->pos_idx];
00845 struct brw_reg ndc;
00846
00847 if (c->key.copy_edgeflag) {
00848 brw_MOV(p,
00849 get_reg(c, TGSI_FILE_OUTPUT, info->result_edge_idx),
00850 get_reg(c, TGSI_FILE_INPUT, info->edge_flag_idx));
00851 }
00852
00853
00854
00855
00856 if (!c->key.know_w_is_one) {
00857 ndc = get_tmp(c);
00858 emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
00859 brw_MUL(p, brw_writemask(ndc, TGSI_WRITEMASK_XYZ), pos, ndc);
00860 }
00861 else {
00862 ndc = pos;
00863 }
00864
00865
00866
00867
00868 if (info->writes_psize ||
00869 c->key.nr_userclip ||
00870 !c->key.know_w_is_one)
00871 {
00872 struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
00873 unsigned i;
00874
00875 brw_MOV(p, header1, brw_imm_ud(0));
00876
00877 brw_set_access_mode(p, BRW_ALIGN_16);
00878
00879 if (info->writes_psize) {
00880 struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][info->psize_idx];
00881 brw_MUL(p, brw_writemask(header1, TGSI_WRITEMASK_W),
00882 brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
00883 brw_AND(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1,
00884 brw_imm_ud(0x7ff<<8));
00885 }
00886
00887
00888 for (i = 0; i < c->key.nr_userclip; i++) {
00889 brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
00890 brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
00891 brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<i));
00892 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
00893 }
00894
00895
00896
00897
00898
00899
00900
00901
00902
00903
00904
00905 if (!c->key.know_w_is_one) {
00906 brw_CMP(p,
00907 vec8(brw_null_reg()),
00908 BRW_CONDITIONAL_L,
00909 brw_swizzle1(ndc, 3),
00910 brw_imm_f(0));
00911
00912 brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<6));
00913 brw_MOV(p, ndc, brw_imm_f(0));
00914 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
00915 }
00916
00917 brw_set_access_mode(p, BRW_ALIGN_1);
00918 brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1);
00919 brw_set_access_mode(p, BRW_ALIGN_16);
00920
00921 release_tmp(c, header1);
00922 }
00923 else {
00924 brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
00925 }
00926
00927
00928
00929
00930
00931 brw_set_access_mode(p, BRW_ALIGN_1);
00932 brw_MOV(p, offset(m0, 2), ndc);
00933 brw_MOV(p, offset(m0, 3), pos);
00934
00935
00936 brw_urb_WRITE(p,
00937 brw_null_reg(),
00938 0,
00939 c->r0,
00940 0,
00941 1,
00942 c->nr_outputs + 3,
00943 0,
00944 1,
00945 1,
00946 0,
00947 BRW_URB_SWIZZLE_INTERLEAVE);
00948
00949 }
00950
00951 static void
00952 post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst )
00953 {
00954 struct tgsi_parse_context parse;
00955 const struct tgsi_token *tokens = c->vp->program.tokens;
00956 tgsi_parse_init(&parse, tokens);
00957 while (!tgsi_parse_end_of_tokens(&parse)) {
00958 tgsi_parse_token(&parse);
00959 if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
00960 #if 0
00961 struct brw_instruction *brw_inst1, *brw_inst2;
00962 const struct tgsi_full_instruction *inst1, *inst2;
00963 int offset;
00964 inst1 = &parse.FullToken.FullInstruction;
00965 brw_inst1 = inst1->Data;
00966 switch (inst1->Opcode) {
00967 case TGSI_OPCODE_CAL:
00968 case TGSI_OPCODE_BRA:
00969 target_insn = inst1->BranchTarget;
00970 inst2 = &c->vp->program.Base.Instructions[target_insn];
00971 brw_inst2 = inst2->Data;
00972 offset = brw_inst2 - brw_inst1;
00973 brw_set_src1(brw_inst1, brw_imm_d(offset*16));
00974 break;
00975 case TGSI_OPCODE_END:
00976 offset = end_inst - brw_inst1;
00977 brw_set_src1(brw_inst1, brw_imm_d(offset*16));
00978 break;
00979 default:
00980 break;
00981 }
00982 #endif
00983 }
00984 }
00985 tgsi_parse_free(&parse);
00986 }
00987
00988 static void process_declaration(const struct tgsi_full_declaration *decl,
00989 struct brw_prog_info *info)
00990 {
00991 int first = decl->DeclarationRange.First;
00992 int last = decl->DeclarationRange.Last;
00993
00994 switch(decl->Declaration.File) {
00995 case TGSI_FILE_CONSTANT:
00996 info->num_consts += last - first + 1;
00997 break;
00998 case TGSI_FILE_INPUT: {
00999 }
01000 break;
01001 case TGSI_FILE_OUTPUT: {
01002 assert(last == first);
01003 if (decl->Declaration.Semantic) {
01004 switch (decl->Semantic.SemanticName) {
01005 case TGSI_SEMANTIC_POSITION: {
01006 info->pos_idx = first;
01007 }
01008 break;
01009 case TGSI_SEMANTIC_COLOR:
01010 break;
01011 case TGSI_SEMANTIC_BCOLOR:
01012 break;
01013 case TGSI_SEMANTIC_FOG:
01014 break;
01015 case TGSI_SEMANTIC_PSIZE: {
01016 info->writes_psize = TRUE;
01017 info->psize_idx = first;
01018 }
01019 break;
01020 case TGSI_SEMANTIC_GENERIC:
01021 break;
01022 }
01023 }
01024 }
01025 break;
01026 case TGSI_FILE_TEMPORARY: {
01027 info->num_temps += (last - first) + 1;
01028 }
01029 break;
01030 case TGSI_FILE_SAMPLER: {
01031 }
01032 break;
01033 case TGSI_FILE_ADDRESS: {
01034 info->num_addrs += (last - first) + 1;
01035 }
01036 break;
01037 case TGSI_FILE_IMMEDIATE: {
01038 }
01039 break;
01040 case TGSI_FILE_NULL: {
01041 }
01042 break;
01043 }
01044 }
01045
01046 static void process_instruction(struct brw_vs_compile *c,
01047 struct tgsi_full_instruction *inst,
01048 struct brw_prog_info *info)
01049 {
01050 struct brw_reg args[3], dst;
01051 struct brw_compile *p = &c->func;
01052
01053 unsigned i;
01054 unsigned index;
01055 unsigned file;
01056
01057 const struct tgsi_dst_register *dst_reg = &inst->FullDstRegisters[0].DstRegister;
01058
01059
01060
01061
01062
01063 for (i = 0; i < 3; i++) {
01064 struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
01065 index = src->SrcRegister.Index;
01066 file = src->SrcRegister.File;
01067 if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src)
01068 args[i] = c->output_regs[index].reg;
01069 else
01070 args[i] = get_arg(c, &src->SrcRegister);
01071 }
01072
01073
01074
01075
01076
01077 index = dst_reg->Index;
01078 file = dst_reg->File;
01079 if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src)
01080 dst = c->output_regs[index].reg;
01081 else
01082 dst = get_dst(c, dst_reg);
01083
01084 switch (inst->Instruction.Opcode) {
01085 case TGSI_OPCODE_ABS:
01086 brw_MOV(p, dst, brw_abs(args[0]));
01087 break;
01088 case TGSI_OPCODE_ADD:
01089 brw_ADD(p, dst, args[0], args[1]);
01090 break;
01091 case TGSI_OPCODE_DP3:
01092 brw_DP3(p, dst, args[0], args[1]);
01093 break;
01094 case TGSI_OPCODE_DP4:
01095 brw_DP4(p, dst, args[0], args[1]);
01096 break;
01097 case TGSI_OPCODE_DPH:
01098 brw_DPH(p, dst, args[0], args[1]);
01099 break;
01100 case TGSI_OPCODE_DST:
01101 unalias2(c, dst, args[0], args[1], emit_dst_noalias);
01102 break;
01103 case TGSI_OPCODE_EXP:
01104 unalias1(c, dst, args[0], emit_exp_noalias);
01105 break;
01106 case TGSI_OPCODE_EX2:
01107 emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
01108 break;
01109 case TGSI_OPCODE_ARL:
01110 emit_arl(c, dst, args[0]);
01111 break;
01112 case TGSI_OPCODE_FLR:
01113 brw_RNDD(p, dst, args[0]);
01114 break;
01115 case TGSI_OPCODE_FRC:
01116 brw_FRC(p, dst, args[0]);
01117 break;
01118 case TGSI_OPCODE_LOG:
01119 unalias1(c, dst, args[0], emit_log_noalias);
01120 break;
01121 case TGSI_OPCODE_LG2:
01122 emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
01123 break;
01124 case TGSI_OPCODE_LIT:
01125 unalias1(c, dst, args[0], emit_lit_noalias);
01126 break;
01127 case TGSI_OPCODE_MAD:
01128 brw_MOV(p, brw_acc_reg(), args[2]);
01129 brw_MAC(p, dst, args[0], args[1]);
01130 break;
01131 case TGSI_OPCODE_MAX:
01132 emit_max(p, dst, args[0], args[1]);
01133 break;
01134 case TGSI_OPCODE_MIN:
01135 emit_min(p, dst, args[0], args[1]);
01136 break;
01137 case TGSI_OPCODE_MOV:
01138 case TGSI_OPCODE_SWZ:
01139 #if 0
01140
01141
01142
01143 emit_swz(c, dst, inst->SrcReg[0] );
01144 #endif
01145 brw_MOV(p, dst, args[0]);
01146 break;
01147 case TGSI_OPCODE_MUL:
01148 brw_MUL(p, dst, args[0], args[1]);
01149 break;
01150 case TGSI_OPCODE_POW:
01151 emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
01152 break;
01153 case TGSI_OPCODE_RCP:
01154 emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
01155 break;
01156 case TGSI_OPCODE_RSQ:
01157 emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
01158 break;
01159
01160 case TGSI_OPCODE_SEQ:
01161 emit_seq(p, dst, args[0], args[1]);
01162 break;
01163 case TGSI_OPCODE_SNE:
01164 emit_sne(p, dst, args[0], args[1]);
01165 break;
01166 case TGSI_OPCODE_SGE:
01167 emit_sge(p, dst, args[0], args[1]);
01168 break;
01169 case TGSI_OPCODE_SGT:
01170 emit_sgt(p, dst, args[0], args[1]);
01171 break;
01172 case TGSI_OPCODE_SLT:
01173 emit_slt(p, dst, args[0], args[1]);
01174 break;
01175 case TGSI_OPCODE_SLE:
01176 emit_sle(p, dst, args[0], args[1]);
01177 break;
01178 case TGSI_OPCODE_SUB:
01179 brw_ADD(p, dst, args[0], negate(args[1]));
01180 break;
01181 case TGSI_OPCODE_XPD:
01182 emit_xpd(p, dst, args[0], args[1]);
01183 break;
01184 #if 0
01185 case TGSI_OPCODE_IF:
01186 assert(if_insn < MAX_IFSN);
01187 if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
01188 break;
01189 case TGSI_OPCODE_ELSE:
01190 if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
01191 break;
01192 case TGSI_OPCODE_ENDIF:
01193 assert(if_insn > 0);
01194 brw_ENDIF(p, if_inst[--if_insn]);
01195 break;
01196 case TGSI_OPCODE_BRA:
01197 brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
01198 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
01199 brw_set_predicate_control_flag_value(p, 0xff);
01200 break;
01201 case TGSI_OPCODE_CAL:
01202 brw_set_access_mode(p, BRW_ALIGN_1);
01203 brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
01204 brw_set_access_mode(p, BRW_ALIGN_16);
01205 brw_ADD(p, get_addr_reg(stack_index),
01206 get_addr_reg(stack_index), brw_imm_d(4));
01207 inst->Data = &p->store[p->nr_insn];
01208 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
01209 break;
01210 #endif
01211 case TGSI_OPCODE_RET:
01212 #if 0
01213 brw_ADD(p, get_addr_reg(stack_index),
01214 get_addr_reg(stack_index), brw_imm_d(-4));
01215 brw_set_access_mode(p, BRW_ALIGN_1);
01216 brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0));
01217 brw_set_access_mode(p, BRW_ALIGN_16);
01218 #else
01219
01220 #endif
01221 break;
01222 case TGSI_OPCODE_END:
01223 brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
01224 break;
01225 case TGSI_OPCODE_BGNSUB:
01226 case TGSI_OPCODE_ENDSUB:
01227 break;
01228 default:
01229 debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode);
01230 break;
01231 }
01232
01233 if (dst_reg->File == TGSI_FILE_OUTPUT
01234 && dst_reg->Index != info->pos_idx
01235 && c->output_regs[dst_reg->Index].used_in_src)
01236 brw_MOV(p, get_dst(c, dst_reg), dst);
01237
01238 release_tmps(c);
01239 }
01240
01241
01242
01243 void brw_vs_emit(struct brw_vs_compile *c)
01244 {
01245 #define MAX_IFSN 32
01246 struct brw_compile *p = &c->func;
01247 struct brw_instruction *end_inst;
01248 struct tgsi_parse_context parse;
01249 struct brw_indirect stack_index = brw_indirect(0, 0);
01250 const struct tgsi_token *tokens = c->vp->program.tokens;
01251 struct brw_prog_info prog_info;
01252 unsigned allocated_registers = 0;
01253 memset(&prog_info, 0, sizeof(struct brw_prog_info));
01254
01255 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
01256 brw_set_access_mode(p, BRW_ALIGN_16);
01257
01258 tgsi_parse_init(&parse, tokens);
01259
01260
01261 while (!tgsi_parse_end_of_tokens(&parse)) {
01262 tgsi_parse_token(&parse);
01263 unsigned i;
01264 switch (parse.FullToken.Token.Type) {
01265 case TGSI_TOKEN_TYPE_INSTRUCTION: {
01266 const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
01267 for (i = 0; i < 3; ++i) {
01268 const struct tgsi_src_register *src = &inst->FullSrcRegisters[i].SrcRegister;
01269 unsigned index = src->Index;
01270 unsigned file = src->File;
01271 if (file == TGSI_FILE_OUTPUT)
01272 c->output_regs[index].used_in_src = TRUE;
01273 }
01274 }
01275 break;
01276 default:
01277
01278 break;
01279 }
01280 }
01281 tgsi_parse_free(&parse);
01282
01283 tgsi_parse_init(&parse, tokens);
01284
01285 while (!tgsi_parse_end_of_tokens(&parse)) {
01286 tgsi_parse_token(&parse);
01287
01288 switch (parse.FullToken.Token.Type) {
01289 case TGSI_TOKEN_TYPE_DECLARATION: {
01290 struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
01291 process_declaration(decl, &prog_info);
01292 }
01293 break;
01294 case TGSI_TOKEN_TYPE_IMMEDIATE: {
01295 struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate;
01296
01297 c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float;
01298 c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float;
01299 c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float;
01300 c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u.ImmediateFloat32[3].Float;
01301 c->prog_data.num_imm++;
01302 }
01303 break;
01304 case TGSI_TOKEN_TYPE_INSTRUCTION: {
01305 struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
01306 if (!allocated_registers) {
01307
01308
01309
01310 c->prog_data.num_consts = prog_info.num_consts;
01311 c->prog_data.max_const = prog_info.num_consts + c->prog_data.num_imm;
01312 brw_vs_alloc_regs(c, &prog_info);
01313
01314 brw_set_access_mode(p, BRW_ALIGN_1);
01315 brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
01316 brw_set_access_mode(p, BRW_ALIGN_16);
01317 allocated_registers = 1;
01318 }
01319 process_instruction(c, inst, &prog_info);
01320 }
01321 break;
01322 }
01323 }
01324
01325 end_inst = &p->store[p->nr_insn];
01326 emit_vertex_write(c, &prog_info);
01327 post_vs_emit(c, end_inst);
01328 tgsi_parse_free(&parse);
01329
01330 }