00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00039 #include "pipe/p_defines.h"
00040 #include "pipe/p_state.h"
00041
00042 #include "cell_context.h"
00043
00044 #include "rtasm/rtasm_ppc_spe.h"
00045
00046
00060 static void
00061 emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
00062 struct spe_function *f, int mask, int alphas)
00063 {
00064
00065
00066
00067
00068 if (dsa->alpha.enabled
00069 && (dsa->alpha.func != PIPE_FUNC_NEVER)
00070 && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
00071 int ref = spe_allocate_available_register(f);
00072 int tmp_a = spe_allocate_available_register(f);
00073 int tmp_b = spe_allocate_available_register(f);
00074 union {
00075 float f;
00076 unsigned u;
00077 } ref_val;
00078 boolean complement = FALSE;
00079
00080 ref_val.f = dsa->alpha.ref;
00081
00082 spe_il(f, ref, ref_val.u & 0x0000ffff);
00083 spe_ilh(f, ref, ref_val.u >> 16);
00084
00085 switch (dsa->alpha.func) {
00086 case PIPE_FUNC_NOTEQUAL:
00087 complement = TRUE;
00088
00089
00090 case PIPE_FUNC_EQUAL:
00091 spe_fceq(f, tmp_a, ref, alphas);
00092 break;
00093
00094 case PIPE_FUNC_LEQUAL:
00095 complement = TRUE;
00096
00097
00098 case PIPE_FUNC_GREATER:
00099 spe_fcgt(f, tmp_a, ref, alphas);
00100 break;
00101
00102 case PIPE_FUNC_LESS:
00103 complement = TRUE;
00104
00105
00106 case PIPE_FUNC_GEQUAL:
00107 spe_fcgt(f, tmp_a, ref, alphas);
00108 spe_fceq(f, tmp_b, ref, alphas);
00109 spe_or(f, tmp_a, tmp_b, tmp_a);
00110 break;
00111
00112 case PIPE_FUNC_ALWAYS:
00113 case PIPE_FUNC_NEVER:
00114 default:
00115 assert(0);
00116 break;
00117 }
00118
00119 if (complement) {
00120 spe_andc(f, mask, mask, tmp_a);
00121 } else {
00122 spe_and(f, mask, mask, tmp_a);
00123 }
00124
00125 spe_release_register(f, ref);
00126 spe_release_register(f, tmp_a);
00127 spe_release_register(f, tmp_b);
00128 } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) {
00129 spe_il(f, mask, 0);
00130 }
00131 }
00132
00133
00149 static boolean
00150 emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
00151 struct spe_function *f, int mask, int stored, int calculated)
00152 {
00153 unsigned func = (dsa->depth.enabled)
00154 ? dsa->depth.func : PIPE_FUNC_ALWAYS;
00155 int tmp = spe_allocate_available_register(f);
00156 boolean compliment = FALSE;
00157
00158 switch (func) {
00159 case PIPE_FUNC_NEVER:
00160 spe_il(f, mask, 0);
00161 break;
00162
00163 case PIPE_FUNC_NOTEQUAL:
00164 compliment = TRUE;
00165
00166 case PIPE_FUNC_EQUAL:
00167 spe_ceq(f, mask, calculated, stored);
00168 break;
00169
00170 case PIPE_FUNC_LEQUAL:
00171 compliment = TRUE;
00172
00173 case PIPE_FUNC_GREATER:
00174 spe_clgt(f, mask, calculated, stored);
00175 break;
00176
00177 case PIPE_FUNC_LESS:
00178 compliment = TRUE;
00179
00180 case PIPE_FUNC_GEQUAL:
00181 spe_clgt(f, mask, calculated, stored);
00182 spe_ceq(f, tmp, calculated, stored);
00183 spe_or(f, mask, mask, tmp);
00184 break;
00185
00186 case PIPE_FUNC_ALWAYS:
00187 spe_il(f, mask, ~0);
00188 break;
00189
00190 default:
00191 assert(0);
00192 break;
00193 }
00194
00195 spe_release_register(f, tmp);
00196 return compliment;
00197 }
00198
00199
00208 static void
00209 emit_stencil_op(struct spe_function *f,
00210 int out, int in, int mask, unsigned op, unsigned ref)
00211 {
00212 const int clamp = spe_allocate_available_register(f);
00213 const int clamp_mask = spe_allocate_available_register(f);
00214 const int result = spe_allocate_available_register(f);
00215
00216 switch(op) {
00217 case PIPE_STENCIL_OP_KEEP:
00218 assert(0);
00219 case PIPE_STENCIL_OP_ZERO:
00220 spe_il(f, result, 0);
00221 break;
00222 case PIPE_STENCIL_OP_REPLACE:
00223 spe_il(f, result, ref);
00224 break;
00225 case PIPE_STENCIL_OP_INCR:
00226
00227 spe_il(f, clamp, 0x0ff);
00228
00229 spe_ai(f, result, in, 1);
00230
00231 spe_clgti(f, clamp_mask, result, 0x0ff);
00232
00233 spe_selb(f, result, result, clamp, clamp_mask);
00234 break;
00235 case PIPE_STENCIL_OP_DECR:
00236 spe_il(f, clamp, 0);
00237 spe_ai(f, result, in, -1);
00238
00239
00240
00241
00242 spe_clgti(f, clamp_mask, result, 0x0ff);
00243 spe_selb(f, result, result, clamp, clamp_mask);
00244 break;
00245 case PIPE_STENCIL_OP_INCR_WRAP:
00246 spe_ai(f, result, in, 1);
00247 break;
00248 case PIPE_STENCIL_OP_DECR_WRAP:
00249 spe_ai(f, result, in, -1);
00250 break;
00251 case PIPE_STENCIL_OP_INVERT:
00252 spe_nor(f, result, in, in);
00253 break;
00254 default:
00255 assert(0);
00256 }
00257
00258 spe_selb(f, out, in, result, mask);
00259
00260 spe_release_register(f, result);
00261 spe_release_register(f, clamp_mask);
00262 spe_release_register(f, clamp);
00263 }
00264
00265
00283 static int
00284 emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
00285 unsigned face,
00286 struct spe_function *f,
00287 int mask,
00288 int depth_mask,
00289 boolean depth_complement,
00290 int stencil,
00291 int depth_pass)
00292 {
00293 int stencil_fail = spe_allocate_available_register(f);
00294 int depth_fail = spe_allocate_available_register(f);
00295 int stencil_mask = spe_allocate_available_register(f);
00296 int stencil_pass = spe_allocate_available_register(f);
00297 int face_stencil = spe_allocate_available_register(f);
00298 int stencil_src = stencil;
00299 const unsigned ref = (dsa->stencil[face].ref_value
00300 & dsa->stencil[face].value_mask);
00301 boolean complement = FALSE;
00302 int stored;
00303 int tmp = spe_allocate_available_register(f);
00304
00305
00306 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
00307 && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
00308 && (dsa->stencil[face].value_mask != 0x0ff)) {
00309 stored = spe_allocate_available_register(f);
00310 spe_andi(f, stored, stencil, dsa->stencil[face].value_mask);
00311 } else {
00312 stored = stencil;
00313 }
00314
00315
00316 switch (dsa->stencil[face].func) {
00317 case PIPE_FUNC_NEVER:
00318 spe_il(f, stencil_mask, 0);
00319 break;
00320
00321 case PIPE_FUNC_NOTEQUAL:
00322 complement = TRUE;
00323
00324 case PIPE_FUNC_EQUAL:
00325
00326 spe_ceqi(f, stencil_mask, stored, ref);
00327 break;
00328
00329 case PIPE_FUNC_LEQUAL:
00330 complement = TRUE;
00331
00332 case PIPE_FUNC_GREATER:
00333 complement = TRUE;
00334
00335 spe_clgti(f, stencil_mask, stored, ref);
00336 break;
00337
00338 case PIPE_FUNC_LESS:
00339 complement = TRUE;
00340
00341 case PIPE_FUNC_GEQUAL:
00342
00343 spe_clgti(f, stencil_mask, stored, ref);
00344
00345 spe_ceqi(f, tmp, stored, ref);
00346
00347 spe_or(f, stencil_mask, stencil_mask, tmp);
00348 break;
00349
00350 case PIPE_FUNC_ALWAYS:
00351
00352 break;
00353
00354 default:
00355 assert(0);
00356 break;
00357 }
00358
00359 if (stored != stencil) {
00360 spe_release_register(f, stored);
00361 }
00362 spe_release_register(f, tmp);
00363
00364
00365
00366
00367
00368
00369
00370 if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) {
00371 spe_release_register(f, stencil_pass);
00372 stencil_pass = mask;
00373 } else {
00374 if (complement) {
00375 spe_andc(f, stencil_pass, mask, stencil_mask);
00376 } else {
00377 spe_and(f, stencil_pass, mask, stencil_mask);
00378 }
00379 }
00380
00381 if (depth_complement) {
00382 spe_andc(f, depth_pass, stencil_pass, depth_mask);
00383 } else {
00384 spe_and(f, depth_pass, stencil_pass, depth_mask);
00385 }
00386
00387
00388
00389
00390
00391
00392
00393
00394
00395
00396
00397
00398 if (dsa->stencil[face].write_mask != 0) {
00399 if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
00400 && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
00401 if (complement) {
00402 spe_and(f, stencil_fail, mask, stencil_mask);
00403 } else {
00404 spe_andc(f, stencil_fail, mask, stencil_mask);
00405 }
00406
00407 emit_stencil_op(f, face_stencil, stencil_src, stencil_fail,
00408 dsa->stencil[face].fail_op,
00409 dsa->stencil[face].ref_value);
00410
00411 stencil_src = face_stencil;
00412 }
00413
00414 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
00415 && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) {
00416 if (depth_complement) {
00417 spe_and(f, depth_fail, stencil_pass, depth_mask);
00418 } else {
00419 spe_andc(f, depth_fail, stencil_pass, depth_mask);
00420 }
00421
00422 emit_stencil_op(f, face_stencil, stencil_src, depth_fail,
00423 dsa->stencil[face].zfail_op,
00424 dsa->stencil[face].ref_value);
00425 stencil_src = face_stencil;
00426 }
00427
00428 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
00429 && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) {
00430 emit_stencil_op(f, face_stencil, stencil_src, depth_pass,
00431 dsa->stencil[face].zpass_op,
00432 dsa->stencil[face].ref_value);
00433 stencil_src = face_stencil;
00434 }
00435 }
00436
00437 spe_release_register(f, stencil_fail);
00438 spe_release_register(f, depth_fail);
00439 spe_release_register(f, stencil_mask);
00440 if (stencil_pass != mask) {
00441 spe_release_register(f, stencil_pass);
00442 }
00443
00444
00445
00446
00447
00448
00449
00450 if (stencil_src == stencil) {
00451 spe_release_register(f, face_stencil);
00452 } else if (dsa->stencil[face].write_mask != 0x0ff) {
00453 int tmp = spe_allocate_available_register(f);
00454
00455 spe_il(f, tmp, dsa->stencil[face].write_mask);
00456 spe_selb(f, stencil_src, stencil, stencil_src, tmp);
00457
00458 spe_release_register(f, tmp);
00459 }
00460
00461 return stencil_src;
00462 }
00463
00464
00465 void
00466 cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
00467 {
00468 struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base;
00469 struct spe_function *const f = &cdsa->code;
00470
00471
00472
00473
00474
00475 spe_init_func(f, SPE_INST_SIZE * 64);
00476
00477
00478
00479
00480
00481
00482
00483
00484 int mask = spe_allocate_register(f, 3);
00485 int depth = spe_allocate_register(f, 4);
00486 int stencil = spe_allocate_register(f, 5);
00487 int zvals = spe_allocate_register(f, 6);
00488 int frag_a = spe_allocate_register(f, 7);
00489 int facing = spe_allocate_register(f, 8);
00490
00491 int depth_mask = spe_allocate_available_register(f);
00492
00493 boolean depth_complement;
00494
00495
00496 emit_alpha_test(dsa, f, mask, frag_a);
00497
00498 depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals);
00499
00500 if (dsa->stencil[0].enabled) {
00501 const int front_depth_pass = spe_allocate_available_register(f);
00502 int front_stencil = emit_stencil_test(dsa, 0, f, mask,
00503 depth_mask, depth_complement,
00504 stencil, front_depth_pass);
00505
00506 if (dsa->stencil[1].enabled) {
00507 const int back_depth_pass = spe_allocate_available_register(f);
00508 int back_stencil = emit_stencil_test(dsa, 1, f, mask,
00509 depth_mask, depth_complement,
00510 stencil, back_depth_pass);
00511
00512
00513
00514
00515
00516
00517
00518 if (front_stencil != back_stencil) {
00519 spe_selb(f, stencil, back_stencil, front_stencil, facing);
00520 }
00521
00522 if (back_stencil != stencil) {
00523 spe_release_register(f, back_stencil);
00524 }
00525
00526 if (front_stencil != stencil) {
00527 spe_release_register(f, front_stencil);
00528 }
00529
00530 spe_selb(f, mask, back_depth_pass, front_depth_pass, facing);
00531
00532 spe_release_register(f, back_depth_pass);
00533 } else {
00534 if (front_stencil != stencil) {
00535 spe_or(f, stencil, front_stencil, front_stencil);
00536 spe_release_register(f, front_stencil);
00537 }
00538 spe_or(f, mask, front_depth_pass, front_depth_pass);
00539 }
00540
00541 spe_release_register(f, front_depth_pass);
00542 } else if (dsa->depth.enabled) {
00543 if (depth_complement) {
00544 spe_andc(f, mask, mask, depth_mask);
00545 } else {
00546 spe_and(f, mask, mask, depth_mask);
00547 }
00548 }
00549
00550 if (dsa->depth.writemask) {
00551 spe_selb(f, depth, depth, zvals, mask);
00552 }
00553
00554 spe_bi(f, 0, 0, 0);
00555
00556
00557 #if 0
00558 {
00559 const uint32_t *p = f->store;
00560 unsigned i;
00561
00562 printf("# alpha (%sabled)\n",
00563 (dsa->alpha.enabled) ? "en" : "dis");
00564 printf("# func: %u\n", dsa->alpha.func);
00565 printf("# ref: %.2f\n", dsa->alpha.ref);
00566
00567 printf("# depth (%sabled)\n",
00568 (dsa->depth.enabled) ? "en" : "dis");
00569 printf("# func: %u\n", dsa->depth.func);
00570
00571 for (i = 0; i < 2; i++) {
00572 printf("# %s stencil (%sabled)\n",
00573 (i == 0) ? "front" : "back",
00574 (dsa->stencil[i].enabled) ? "en" : "dis");
00575
00576 printf("# func: %u\n", dsa->stencil[i].func);
00577 printf("# op (sf, zf, zp): %u %u %u\n",
00578 dsa->stencil[i].fail_op,
00579 dsa->stencil[i].zfail_op,
00580 dsa->stencil[i].zpass_op);
00581 printf("# ref value / value mask / write mask: %02x %02x %02x\n",
00582 dsa->stencil[i].ref_value,
00583 dsa->stencil[i].value_mask,
00584 dsa->stencil[i].write_mask);
00585 }
00586
00587 printf("\t.text\n");
00588 for (; p < f->csr; p++) {
00589 printf("\t.long\t0x%04x\n", *p);
00590 }
00591 fflush(stdout);
00592 }
00593 #endif
00594 }
00595
00596
00600 static int
00601 emit_alpha_factor_calculation(struct spe_function *f,
00602 unsigned factor,
00603 int src_alpha, int dst_alpha, int const_alpha)
00604 {
00605 int factor_reg;
00606 int tmp;
00607
00608
00609 switch (factor) {
00610 case PIPE_BLENDFACTOR_ONE:
00611 factor_reg = -1;
00612 break;
00613
00614 case PIPE_BLENDFACTOR_SRC_ALPHA:
00615 factor_reg = spe_allocate_available_register(f);
00616
00617 spe_or(f, factor_reg, src_alpha, src_alpha);
00618 break;
00619
00620 case PIPE_BLENDFACTOR_DST_ALPHA:
00621 factor_reg = dst_alpha;
00622 break;
00623
00624 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
00625 factor_reg = -1;
00626 break;
00627
00628 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
00629 factor_reg = spe_allocate_available_register(f);
00630
00631 tmp = spe_allocate_available_register(f);
00632 spe_il(f, tmp, 1);
00633 spe_cuflt(f, tmp, tmp, 0);
00634 spe_fs(f, factor_reg, tmp, const_alpha);
00635 spe_release_register(f, tmp);
00636 break;
00637
00638 case PIPE_BLENDFACTOR_CONST_ALPHA:
00639 factor_reg = const_alpha;
00640 break;
00641
00642 case PIPE_BLENDFACTOR_ZERO:
00643 factor_reg = -1;
00644 break;
00645
00646 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
00647 tmp = spe_allocate_available_register(f);
00648 factor_reg = spe_allocate_available_register(f);
00649
00650 spe_il(f, tmp, 1);
00651 spe_cuflt(f, tmp, tmp, 0);
00652 spe_fs(f, factor_reg, tmp, src_alpha);
00653
00654 spe_release_register(f, tmp);
00655 break;
00656
00657 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
00658 tmp = spe_allocate_available_register(f);
00659 factor_reg = spe_allocate_available_register(f);
00660
00661 spe_il(f, tmp, 1);
00662 spe_cuflt(f, tmp, tmp, 0);
00663 spe_fs(f, factor_reg, tmp, dst_alpha);
00664
00665 spe_release_register(f, tmp);
00666 break;
00667
00668 case PIPE_BLENDFACTOR_SRC1_ALPHA:
00669 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
00670 default:
00671 assert(0);
00672 factor_reg = -1;
00673 break;
00674 }
00675
00676 return factor_reg;
00677 }
00678
00679
00683 static void
00684 emit_color_factor_calculation(struct spe_function *f,
00685 unsigned sF, unsigned mask,
00686 const int *src,
00687 const int *dst,
00688 const int *const_color,
00689 int *factor)
00690 {
00691 int tmp;
00692 unsigned i;
00693
00694
00695 factor[0] = -1;
00696 factor[1] = -1;
00697 factor[2] = -1;
00698 factor[3] = -1;
00699
00700 switch (sF) {
00701 case PIPE_BLENDFACTOR_ONE:
00702 break;
00703
00704 case PIPE_BLENDFACTOR_SRC_COLOR:
00705 for (i = 0; i < 3; ++i) {
00706 if ((mask & (1U << i)) != 0) {
00707 factor[i] = spe_allocate_available_register(f);
00708 spe_or(f, factor[i], src[i], src[i]);
00709 }
00710 }
00711 break;
00712
00713 case PIPE_BLENDFACTOR_SRC_ALPHA:
00714 factor[0] = spe_allocate_available_register(f);
00715 factor[1] = factor[0];
00716 factor[2] = factor[0];
00717
00718 spe_or(f, factor[0], src[3], src[3]);
00719 break;
00720
00721 case PIPE_BLENDFACTOR_DST_ALPHA:
00722 factor[0] = dst[3];
00723 factor[1] = dst[3];
00724 factor[2] = dst[3];
00725 break;
00726
00727 case PIPE_BLENDFACTOR_DST_COLOR:
00728 factor[0] = dst[0];
00729 factor[1] = dst[1];
00730 factor[2] = dst[2];
00731 break;
00732
00733 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
00734 tmp = spe_allocate_available_register(f);
00735 factor[0] = spe_allocate_available_register(f);
00736 factor[1] = factor[0];
00737 factor[2] = factor[0];
00738
00739
00740
00741 spe_il(f, tmp, 1);
00742 spe_cuflt(f, tmp, tmp, 0);
00743 spe_fs(f, tmp, tmp, dst[3]);
00744 spe_fcgt(f, factor[0], tmp, src[3]);
00745 spe_selb(f, factor[0], src[3], tmp, factor[0]);
00746
00747 spe_release_register(f, tmp);
00748 break;
00749
00750 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
00751 tmp = spe_allocate_available_register(f);
00752 spe_il(f, tmp, 1);
00753 spe_cuflt(f, tmp, tmp, 0);
00754
00755 for (i = 0; i < 3; i++) {
00756 factor[i] = spe_allocate_available_register(f);
00757
00758 spe_fs(f, factor[i], tmp, const_color[i]);
00759 }
00760 spe_release_register(f, tmp);
00761 break;
00762
00763 case PIPE_BLENDFACTOR_CONST_COLOR:
00764 for (i = 0; i < 3; i++) {
00765 factor[i] = const_color[i];
00766 }
00767 break;
00768
00769 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
00770 factor[0] = spe_allocate_available_register(f);
00771 factor[1] = factor[0];
00772 factor[2] = factor[0];
00773
00774 tmp = spe_allocate_available_register(f);
00775 spe_il(f, tmp, 1);
00776 spe_cuflt(f, tmp, tmp, 0);
00777 spe_fs(f, factor[0], tmp, const_color[3]);
00778 spe_release_register(f, tmp);
00779 break;
00780
00781 case PIPE_BLENDFACTOR_CONST_ALPHA:
00782 factor[0] = const_color[3];
00783 factor[1] = factor[0];
00784 factor[2] = factor[0];
00785 break;
00786
00787 case PIPE_BLENDFACTOR_ZERO:
00788 break;
00789
00790 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
00791 tmp = spe_allocate_available_register(f);
00792
00793 spe_il(f, tmp, 1);
00794 spe_cuflt(f, tmp, tmp, 0);
00795
00796 for (i = 0; i < 3; ++i) {
00797 if ((mask & (1U << i)) != 0) {
00798 factor[i] = spe_allocate_available_register(f);
00799 spe_fs(f, factor[i], tmp, src[i]);
00800 }
00801 }
00802
00803 spe_release_register(f, tmp);
00804 break;
00805
00806 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
00807 tmp = spe_allocate_available_register(f);
00808 factor[0] = spe_allocate_available_register(f);
00809 factor[1] = factor[0];
00810 factor[2] = factor[0];
00811
00812 spe_il(f, tmp, 1);
00813 spe_cuflt(f, tmp, tmp, 0);
00814 spe_fs(f, factor[0], tmp, src[3]);
00815
00816 spe_release_register(f, tmp);
00817 break;
00818
00819 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
00820 tmp = spe_allocate_available_register(f);
00821 factor[0] = spe_allocate_available_register(f);
00822 factor[1] = factor[0];
00823 factor[2] = factor[0];
00824
00825 spe_il(f, tmp, 1);
00826 spe_cuflt(f, tmp, tmp, 0);
00827 spe_fs(f, factor[0], tmp, dst[3]);
00828
00829 spe_release_register(f, tmp);
00830 break;
00831
00832 case PIPE_BLENDFACTOR_INV_DST_COLOR:
00833 tmp = spe_allocate_available_register(f);
00834
00835 spe_il(f, tmp, 1);
00836 spe_cuflt(f, tmp, tmp, 0);
00837
00838 for (i = 0; i < 3; ++i) {
00839 if ((mask & (1U << i)) != 0) {
00840 factor[i] = spe_allocate_available_register(f);
00841 spe_fs(f, factor[i], tmp, dst[i]);
00842 }
00843 }
00844
00845 spe_release_register(f, tmp);
00846 break;
00847
00848 case PIPE_BLENDFACTOR_SRC1_COLOR:
00849 case PIPE_BLENDFACTOR_SRC1_ALPHA:
00850 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
00851 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
00852 default:
00853 assert(0);
00854 }
00855 }
00856
00857
00858 static void
00859 emit_blend_calculation(struct spe_function *f,
00860 unsigned func, unsigned sF, unsigned dF,
00861 int src, int src_factor, int dst, int dst_factor)
00862 {
00863 int tmp = spe_allocate_available_register(f);
00864
00865 switch (func) {
00866 case PIPE_BLEND_ADD:
00867 if (sF == PIPE_BLENDFACTOR_ONE) {
00868 if (dF == PIPE_BLENDFACTOR_ZERO) {
00869
00870 } else if (dF == PIPE_BLENDFACTOR_ONE) {
00871 spe_fa(f, src, src, dst);
00872 }
00873 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
00874 if (dF == PIPE_BLENDFACTOR_ZERO) {
00875 spe_il(f, src, 0);
00876 } else if (dF == PIPE_BLENDFACTOR_ONE) {
00877 spe_or(f, src, dst, dst);
00878 } else {
00879 spe_fm(f, src, dst, dst_factor);
00880 }
00881 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
00882 spe_fm(f, src, src, src_factor);
00883 } else {
00884 spe_fm(f, tmp, dst, dst_factor);
00885 spe_fma(f, src, src, src_factor, tmp);
00886 }
00887 break;
00888
00889 case PIPE_BLEND_SUBTRACT:
00890 if (sF == PIPE_BLENDFACTOR_ONE) {
00891 if (dF == PIPE_BLENDFACTOR_ZERO) {
00892
00893 } else if (dF == PIPE_BLENDFACTOR_ONE) {
00894 spe_fs(f, src, src, dst);
00895 }
00896 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
00897 if (dF == PIPE_BLENDFACTOR_ZERO) {
00898 spe_il(f, src, 0);
00899 } else if (dF == PIPE_BLENDFACTOR_ONE) {
00900 spe_il(f, tmp, 0);
00901 spe_fs(f, src, tmp, dst);
00902 } else {
00903 spe_fm(f, src, dst, dst_factor);
00904 }
00905 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
00906 spe_fm(f, src, src, src_factor);
00907 } else {
00908 spe_fm(f, tmp, dst, dst_factor);
00909 spe_fms(f, src, src, src_factor, tmp);
00910 }
00911 break;
00912
00913 case PIPE_BLEND_REVERSE_SUBTRACT:
00914 if (sF == PIPE_BLENDFACTOR_ONE) {
00915 if (dF == PIPE_BLENDFACTOR_ZERO) {
00916 spe_il(f, tmp, 0);
00917 spe_fs(f, src, tmp, src);
00918 } else if (dF == PIPE_BLENDFACTOR_ONE) {
00919 spe_fs(f, src, dst, src);
00920 }
00921 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
00922 if (dF == PIPE_BLENDFACTOR_ZERO) {
00923 spe_il(f, src, 0);
00924 } else if (dF == PIPE_BLENDFACTOR_ONE) {
00925 spe_or(f, src, dst, dst);
00926 } else {
00927 spe_fm(f, src, dst, dst_factor);
00928 }
00929 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
00930 spe_fm(f, src, src, src_factor);
00931 } else {
00932 spe_fm(f, tmp, src, src_factor);
00933 spe_fms(f, src, src, dst_factor, tmp);
00934 }
00935 break;
00936
00937 case PIPE_BLEND_MIN:
00938 spe_cgt(f, tmp, src, dst);
00939 spe_selb(f, src, src, dst, tmp);
00940 break;
00941
00942 case PIPE_BLEND_MAX:
00943 spe_cgt(f, tmp, src, dst);
00944 spe_selb(f, src, dst, src, tmp);
00945 break;
00946
00947 default:
00948 assert(0);
00949 }
00950
00951 spe_release_register(f, tmp);
00952 }
00953
00954
00958 void
00959 cell_generate_alpha_blend(struct cell_blend_state *cb)
00960 {
00961 struct pipe_blend_state *const b = &cb->base;
00962 struct spe_function *const f = &cb->code;
00963
00964
00965
00966
00967
00968
00969
00970 spe_init_func(f, SPE_INST_SIZE * 64);
00971
00972
00973 const int frag[4] = {
00974 spe_allocate_register(f, 3),
00975 spe_allocate_register(f, 4),
00976 spe_allocate_register(f, 5),
00977 spe_allocate_register(f, 6),
00978 };
00979 const int pixel[4] = {
00980 spe_allocate_register(f, 7),
00981 spe_allocate_register(f, 8),
00982 spe_allocate_register(f, 9),
00983 spe_allocate_register(f, 10),
00984 };
00985 const int const_color[4] = {
00986 spe_allocate_register(f, 11),
00987 spe_allocate_register(f, 12),
00988 spe_allocate_register(f, 13),
00989 spe_allocate_register(f, 14),
00990 };
00991 unsigned func[4];
00992 unsigned sF[4];
00993 unsigned dF[4];
00994 unsigned i;
00995 int src_factor[4];
00996 int dst_factor[4];
00997
00998
00999
01000
01001
01002 boolean need_color_factor = b->blend_enable
01003 && (b->rgb_func != PIPE_BLEND_MIN)
01004 && (b->rgb_func != PIPE_BLEND_MAX);
01005
01006
01007
01008
01009 boolean need_alpha_factor = b->blend_enable
01010 && (b->alpha_func != PIPE_BLEND_MIN)
01011 && (b->alpha_func != PIPE_BLEND_MAX);
01012
01013
01014 if (b->blend_enable) {
01015 sF[0] = b->rgb_src_factor;
01016 sF[1] = sF[0];
01017 sF[2] = sF[0];
01018 switch (b->alpha_src_factor & 0x0f) {
01019 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
01020 sF[3] = PIPE_BLENDFACTOR_ONE;
01021 break;
01022 case PIPE_BLENDFACTOR_SRC_COLOR:
01023 case PIPE_BLENDFACTOR_DST_COLOR:
01024 case PIPE_BLENDFACTOR_CONST_COLOR:
01025 case PIPE_BLENDFACTOR_SRC1_COLOR:
01026 sF[3] = b->alpha_src_factor + 1;
01027 break;
01028 default:
01029 sF[3] = b->alpha_src_factor;
01030 }
01031
01032 dF[0] = b->rgb_dst_factor;
01033 dF[1] = dF[0];
01034 dF[2] = dF[0];
01035 switch (b->alpha_dst_factor & 0x0f) {
01036 case PIPE_BLENDFACTOR_SRC_COLOR:
01037 case PIPE_BLENDFACTOR_DST_COLOR:
01038 case PIPE_BLENDFACTOR_CONST_COLOR:
01039 case PIPE_BLENDFACTOR_SRC1_COLOR:
01040 dF[3] = b->alpha_dst_factor + 1;
01041 break;
01042 default:
01043 dF[3] = b->alpha_dst_factor;
01044 }
01045
01046 func[0] = b->rgb_func;
01047 func[1] = func[0];
01048 func[2] = func[0];
01049 func[3] = b->alpha_func;
01050 } else {
01051 sF[0] = PIPE_BLENDFACTOR_ONE;
01052 sF[1] = PIPE_BLENDFACTOR_ONE;
01053 sF[2] = PIPE_BLENDFACTOR_ONE;
01054 sF[3] = PIPE_BLENDFACTOR_ONE;
01055 dF[0] = PIPE_BLENDFACTOR_ZERO;
01056 dF[1] = PIPE_BLENDFACTOR_ZERO;
01057 dF[2] = PIPE_BLENDFACTOR_ZERO;
01058 dF[3] = PIPE_BLENDFACTOR_ZERO;
01059
01060 func[0] = PIPE_BLEND_ADD;
01061 func[1] = PIPE_BLEND_ADD;
01062 func[2] = PIPE_BLEND_ADD;
01063 func[3] = PIPE_BLEND_ADD;
01064 }
01065
01066
01067
01068
01069
01070 if (((b->colormask & 8) != 0) && need_alpha_factor) {
01071 src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3],
01072 frag[3], pixel[3]);
01073
01074
01075
01076
01077 dst_factor[3] = (dF[3] == sF[3])
01078 ? src_factor[3]
01079 : emit_alpha_factor_calculation(f, dF[3], const_color[3],
01080 frag[3], pixel[3]);
01081 }
01082
01083
01084 if (sF[0] == sF[3]) {
01085 src_factor[0] = src_factor[3];
01086 src_factor[1] = src_factor[3];
01087 src_factor[2] = src_factor[3];
01088 } else if (sF[0] == dF[3]) {
01089 src_factor[0] = dst_factor[3];
01090 src_factor[1] = dst_factor[3];
01091 src_factor[2] = dst_factor[3];
01092 } else if (need_color_factor) {
01093 emit_color_factor_calculation(f,
01094 b->rgb_src_factor,
01095 b->colormask,
01096 frag, pixel, const_color, src_factor);
01097 }
01098
01099
01100 if (dF[0] == sF[3]) {
01101 dst_factor[0] = src_factor[3];
01102 dst_factor[1] = src_factor[3];
01103 dst_factor[2] = src_factor[3];
01104 } else if (dF[0] == dF[3]) {
01105 dst_factor[0] = dst_factor[3];
01106 dst_factor[1] = dst_factor[3];
01107 dst_factor[2] = dst_factor[3];
01108 } else if (dF[0] == sF[0]) {
01109 dst_factor[0] = src_factor[0];
01110 dst_factor[1] = src_factor[1];
01111 dst_factor[2] = src_factor[2];
01112 } else if (need_color_factor) {
01113 emit_color_factor_calculation(f,
01114 b->rgb_dst_factor,
01115 b->colormask,
01116 frag, pixel, const_color, dst_factor);
01117 }
01118
01119
01120
01121 for (i = 0; i < 4; ++i) {
01122 if ((b->colormask & (1U << i)) != 0) {
01123 emit_blend_calculation(f,
01124 func[i], sF[i], dF[i],
01125 frag[i], src_factor[i],
01126 pixel[i], dst_factor[i]);
01127 }
01128 }
01129
01130 spe_bi(f, 0, 0, 0);
01131
01132 #if 0
01133 {
01134 const uint32_t *p = f->store;
01135
01136 printf("# %u instructions\n", f->csr - f->store);
01137 printf("# blend (%sabled)\n",
01138 (cb->base.blend_enable) ? "en" : "dis");
01139 printf("# RGB func / sf / df: %u %u %u\n",
01140 cb->base.rgb_func,
01141 cb->base.rgb_src_factor,
01142 cb->base.rgb_dst_factor);
01143 printf("# ALP func / sf / df: %u %u %u\n",
01144 cb->base.alpha_func,
01145 cb->base.alpha_src_factor,
01146 cb->base.alpha_dst_factor);
01147
01148 printf("\t.text\n");
01149 for (; p < f->csr; p++) {
01150 printf("\t.long\t0x%04x\n", *p);
01151 }
01152 fflush(stdout);
01153 }
01154 #endif
01155 }
01156
01157
01158 static int
01159 PC_OFFSET(const struct spe_function *f, const void *d)
01160 {
01161 const intptr_t pc = (intptr_t) &f->store[f->num_inst];
01162 const intptr_t ea = ~0x0f & (intptr_t) d;
01163
01164 return (ea - pc) >> 2;
01165 }
01166
01167
01181 void
01182 cell_generate_logic_op(struct spe_function *f,
01183 const struct pipe_blend_state *blend,
01184 struct pipe_surface *surf)
01185 {
01186 const unsigned logic_op = (blend->logicop_enable)
01187 ? blend->logicop_func : PIPE_LOGICOP_COPY;
01188
01189
01190
01191
01192
01193 spe_init_func(f, SPE_INST_SIZE * 64);
01194
01195
01196
01197
01198 const int pixel[4] = {
01199 spe_allocate_register(f, 3),
01200 spe_allocate_register(f, 4),
01201 spe_allocate_register(f, 5),
01202 spe_allocate_register(f, 6),
01203 };
01204
01205
01206
01207 const int frag[4] = {
01208 spe_allocate_register(f, 7),
01209 spe_allocate_register(f, 8),
01210 spe_allocate_register(f, 9),
01211 spe_allocate_register(f, 10),
01212 };
01213
01214 const int mask = spe_allocate_register(f, 11);
01215
01216
01217
01218
01219 if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->colormask == 0)) {
01220 spe_bi(f, 0, 0, 0);
01221 return;
01222 } else if (logic_op == PIPE_LOGICOP_INVERT) {
01223 spe_nor(f, pixel[0], pixel[0], pixel[0]);
01224 spe_nor(f, pixel[1], pixel[1], pixel[1]);
01225 spe_nor(f, pixel[2], pixel[2], pixel[2]);
01226 spe_nor(f, pixel[3], pixel[3], pixel[3]);
01227 spe_bi(f, 0, 0, 0);
01228 return;
01229 }
01230
01231
01232 const int tmp[4] = {
01233 spe_allocate_available_register(f),
01234 spe_allocate_available_register(f),
01235 spe_allocate_available_register(f),
01236 spe_allocate_available_register(f),
01237 };
01238
01239 const int shuf_xpose_hi = spe_allocate_available_register(f);
01240 const int shuf_xpose_lo = spe_allocate_available_register(f);
01241 const int shuf_color = spe_allocate_available_register(f);
01242
01243
01244
01245
01246 uint32_t *const data = ((uint32_t *) f->store) + (64 - 8);
01247
01248
01249
01250
01251 switch (surf->format) {
01252 case PIPE_FORMAT_A8R8G8B8_UNORM:
01253 data[0] = 0x00010203;
01254 data[1] = 0x10111213;
01255 data[2] = 0x04050607;
01256 data[3] = 0x14151617;
01257 data[4] = 0x0c000408;
01258 data[5] = 0x80808080;
01259 data[6] = 0x80808080;
01260 data[7] = 0x80808080;
01261 break;
01262 case PIPE_FORMAT_B8G8R8A8_UNORM:
01263 data[0] = 0x03020100;
01264 data[1] = 0x13121110;
01265 data[2] = 0x07060504;
01266 data[3] = 0x17161514;
01267 data[4] = 0x0804000c;
01268 data[5] = 0x80808080;
01269 data[6] = 0x80808080;
01270 data[7] = 0x80808080;
01271 break;
01272 default:
01273 fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()");
01274 ASSERT(0);
01275 }
01276
01277 spe_ilh(f, tmp[0], 0x0808);
01278 spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0));
01279 spe_lqr(f, shuf_color, PC_OFFSET(f, data+4));
01280 spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]);
01281
01282 spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi);
01283 spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo);
01284 spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi);
01285 spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo);
01286
01287 spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi);
01288 spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo);
01289 spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi);
01290 spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo);
01291
01292 spe_cfltu(f, frag[0], frag[0], 32);
01293 spe_cfltu(f, frag[1], frag[1], 32);
01294 spe_cfltu(f, frag[2], frag[2], 32);
01295 spe_cfltu(f, frag[3], frag[3], 32);
01296
01297 spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color);
01298 spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color);
01299 spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color);
01300 spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color);
01301
01302
01303
01304
01305
01306 switch (logic_op) {
01307 case PIPE_LOGICOP_CLEAR:
01308 spe_il(f, frag[0], 0);
01309 spe_il(f, frag[1], 0);
01310 spe_il(f, frag[2], 0);
01311 spe_il(f, frag[3], 0);
01312 break;
01313 case PIPE_LOGICOP_NOR:
01314 spe_nor(f, frag[0], frag[0], pixel[0]);
01315 spe_nor(f, frag[1], frag[1], pixel[1]);
01316 spe_nor(f, frag[2], frag[2], pixel[2]);
01317 spe_nor(f, frag[3], frag[3], pixel[3]);
01318 break;
01319 case PIPE_LOGICOP_AND_INVERTED:
01320 spe_andc(f, frag[0], pixel[0], frag[0]);
01321 spe_andc(f, frag[1], pixel[1], frag[1]);
01322 spe_andc(f, frag[2], pixel[2], frag[2]);
01323 spe_andc(f, frag[3], pixel[3], frag[3]);
01324 break;
01325 case PIPE_LOGICOP_COPY_INVERTED:
01326 spe_nor(f, frag[0], frag[0], frag[0]);
01327 spe_nor(f, frag[1], frag[1], frag[1]);
01328 spe_nor(f, frag[2], frag[2], frag[2]);
01329 spe_nor(f, frag[3], frag[3], frag[3]);
01330 break;
01331 case PIPE_LOGICOP_AND_REVERSE:
01332 spe_andc(f, frag[0], frag[0], pixel[0]);
01333 spe_andc(f, frag[1], frag[1], pixel[1]);
01334 spe_andc(f, frag[2], frag[2], pixel[2]);
01335 spe_andc(f, frag[3], frag[3], pixel[3]);
01336 break;
01337 case PIPE_LOGICOP_XOR:
01338 spe_xor(f, frag[0], frag[0], pixel[0]);
01339 spe_xor(f, frag[1], frag[1], pixel[1]);
01340 spe_xor(f, frag[2], frag[2], pixel[2]);
01341 spe_xor(f, frag[3], frag[3], pixel[3]);
01342 break;
01343 case PIPE_LOGICOP_NAND:
01344 spe_nand(f, frag[0], frag[0], pixel[0]);
01345 spe_nand(f, frag[1], frag[1], pixel[1]);
01346 spe_nand(f, frag[2], frag[2], pixel[2]);
01347 spe_nand(f, frag[3], frag[3], pixel[3]);
01348 break;
01349 case PIPE_LOGICOP_AND:
01350 spe_and(f, frag[0], frag[0], pixel[0]);
01351 spe_and(f, frag[1], frag[1], pixel[1]);
01352 spe_and(f, frag[2], frag[2], pixel[2]);
01353 spe_and(f, frag[3], frag[3], pixel[3]);
01354 break;
01355 case PIPE_LOGICOP_EQUIV:
01356 spe_eqv(f, frag[0], frag[0], pixel[0]);
01357 spe_eqv(f, frag[1], frag[1], pixel[1]);
01358 spe_eqv(f, frag[2], frag[2], pixel[2]);
01359 spe_eqv(f, frag[3], frag[3], pixel[3]);
01360 break;
01361 case PIPE_LOGICOP_OR_INVERTED:
01362 spe_orc(f, frag[0], pixel[0], frag[0]);
01363 spe_orc(f, frag[1], pixel[1], frag[1]);
01364 spe_orc(f, frag[2], pixel[2], frag[2]);
01365 spe_orc(f, frag[3], pixel[3], frag[3]);
01366 break;
01367 case PIPE_LOGICOP_COPY:
01368 break;
01369 case PIPE_LOGICOP_OR_REVERSE:
01370 spe_orc(f, frag[0], frag[0], pixel[0]);
01371 spe_orc(f, frag[1], frag[1], pixel[1]);
01372 spe_orc(f, frag[2], frag[2], pixel[2]);
01373 spe_orc(f, frag[3], frag[3], pixel[3]);
01374 break;
01375 case PIPE_LOGICOP_OR:
01376 spe_or(f, frag[0], frag[0], pixel[0]);
01377 spe_or(f, frag[1], frag[1], pixel[1]);
01378 spe_or(f, frag[2], frag[2], pixel[2]);
01379 spe_or(f, frag[3], frag[3], pixel[3]);
01380 break;
01381 case PIPE_LOGICOP_SET:
01382 spe_il(f, frag[0], ~0);
01383 spe_il(f, frag[1], ~0);
01384 spe_il(f, frag[2], ~0);
01385 spe_il(f, frag[3], ~0);
01386 break;
01387
01388
01389
01390 case PIPE_LOGICOP_INVERT:
01391 case PIPE_LOGICOP_NOOP:
01392 default:
01393 assert(0);
01394 }
01395
01396
01397
01398
01399 spe_ilh(f, tmp[0], 0x0000);
01400 spe_ilh(f, tmp[1], 0x0404);
01401 spe_ilh(f, tmp[2], 0x0808);
01402 spe_ilh(f, tmp[3], 0x0c0c);
01403
01404 spe_shufb(f, tmp[0], mask, mask, tmp[0]);
01405 spe_shufb(f, tmp[1], mask, mask, tmp[1]);
01406 spe_shufb(f, tmp[2], mask, mask, tmp[2]);
01407 spe_shufb(f, tmp[3], mask, mask, tmp[3]);
01408
01409 spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]);
01410 spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]);
01411 spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]);
01412 spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]);
01413
01414 spe_bi(f, 0, 0, 0);
01415
01416 #if 0
01417 {
01418 const uint32_t *p = f->store;
01419 unsigned i;
01420
01421 printf("# %u instructions\n", f->csr - f->store);
01422
01423 printf("\t.text\n");
01424 for (i = 0; i < 64; i++) {
01425 printf("\t.long\t0x%04x\n", p[i]);
01426 }
01427 fflush(stdout);
01428 }
01429 #endif
01430 }