cell_state_per_fragment.c

Go to the documentation of this file.
00001 /*
00002  * (C) Copyright IBM Corporation 2008
00003  * All Rights Reserved.
00004  *
00005  * Permission is hereby granted, free of charge, to any person obtaining a
00006  * copy of this software and associated documentation files (the "Software"),
00007  * to deal in the Software without restriction, including without limitation
00008  * on the rights to use, copy, modify, merge, publish, distribute, sub
00009  * license, and/or sell copies of the Software, and to permit persons to whom
00010  * the Software is furnished to do so, subject to the following conditions:
00011  *
00012  * The above copyright notice and this permission notice (including the next
00013  * paragraph) shall be included in all copies or substantial portions of the
00014  * Software.
00015  *
00016  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00017  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00018  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
00019  * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
00020  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
00021  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
00022  * USE OR OTHER DEALINGS IN THE SOFTWARE.
00023  */
00024 
00039 #include "pipe/p_defines.h"
00040 #include "pipe/p_state.h"
00041 
00042 #include "cell_context.h"
00043 
00044 #include "rtasm/rtasm_ppc_spe.h"
00045 
00046 
00060 static void
00061 emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
00062                 struct spe_function *f, int mask, int alphas)
00063 {
00064    /* If the alpha function is either NEVER or ALWAYS, there is no need to
00065     * load the reference value into a register.  ALWAYS is a fairly common
00066     * case, and this optimization saves 2 instructions.
00067     */
00068    if (dsa->alpha.enabled
00069        && (dsa->alpha.func != PIPE_FUNC_NEVER)
00070        && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
00071       int ref = spe_allocate_available_register(f);
00072       int tmp_a = spe_allocate_available_register(f);
00073       int tmp_b = spe_allocate_available_register(f);
00074       union {
00075          float f;
00076          unsigned u;
00077       } ref_val;
00078       boolean complement = FALSE;
00079 
00080       ref_val.f = dsa->alpha.ref;
00081 
00082       spe_il(f, ref, ref_val.u & 0x0000ffff);
00083       spe_ilh(f, ref, ref_val.u >> 16);
00084 
00085       switch (dsa->alpha.func) {
00086       case PIPE_FUNC_NOTEQUAL:
00087          complement = TRUE;
00088          /* FALLTHROUGH */
00089 
00090       case PIPE_FUNC_EQUAL:
00091          spe_fceq(f, tmp_a, ref, alphas);
00092          break;
00093 
00094       case PIPE_FUNC_LEQUAL:
00095          complement = TRUE;
00096          /* FALLTHROUGH */
00097 
00098       case PIPE_FUNC_GREATER:
00099          spe_fcgt(f, tmp_a, ref, alphas);
00100          break;
00101 
00102       case PIPE_FUNC_LESS:
00103          complement = TRUE;
00104          /* FALLTHROUGH */
00105 
00106       case PIPE_FUNC_GEQUAL:
00107          spe_fcgt(f, tmp_a, ref, alphas);
00108          spe_fceq(f, tmp_b, ref, alphas);
00109          spe_or(f, tmp_a, tmp_b, tmp_a);
00110          break;
00111 
00112       case PIPE_FUNC_ALWAYS:
00113       case PIPE_FUNC_NEVER:
00114       default:
00115          assert(0);
00116          break;
00117       }
00118 
00119       if (complement) {
00120          spe_andc(f, mask, mask, tmp_a);
00121       } else {
00122          spe_and(f, mask, mask, tmp_a);
00123       }
00124 
00125       spe_release_register(f, ref);
00126       spe_release_register(f, tmp_a);
00127       spe_release_register(f, tmp_b);
00128    } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) {
00129       spe_il(f, mask, 0);
00130    }
00131 }
00132 
00133 
00149 static boolean
00150 emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
00151                 struct spe_function *f, int mask, int stored, int calculated)
00152 {
00153    unsigned func = (dsa->depth.enabled)
00154        ? dsa->depth.func : PIPE_FUNC_ALWAYS;
00155    int tmp = spe_allocate_available_register(f);
00156    boolean compliment = FALSE;
00157 
00158    switch (func) {
00159    case PIPE_FUNC_NEVER:
00160       spe_il(f, mask, 0);
00161       break;
00162 
00163    case PIPE_FUNC_NOTEQUAL:
00164       compliment = TRUE;
00165       /* FALLTHROUGH */
00166    case PIPE_FUNC_EQUAL:
00167       spe_ceq(f, mask, calculated, stored);
00168       break;
00169 
00170    case PIPE_FUNC_LEQUAL:
00171       compliment = TRUE;
00172       /* FALLTHROUGH */
00173    case PIPE_FUNC_GREATER:
00174       spe_clgt(f, mask, calculated, stored);
00175       break;
00176 
00177    case PIPE_FUNC_LESS:
00178       compliment = TRUE;
00179       /* FALLTHROUGH */
00180    case PIPE_FUNC_GEQUAL:
00181       spe_clgt(f, mask, calculated, stored);
00182       spe_ceq(f, tmp, calculated, stored);
00183       spe_or(f, mask, mask, tmp);
00184       break;
00185 
00186    case PIPE_FUNC_ALWAYS:
00187       spe_il(f, mask, ~0);
00188       break;
00189 
00190    default:
00191       assert(0);
00192       break;
00193    }
00194 
00195    spe_release_register(f, tmp);
00196    return compliment;
00197 }
00198 
00199 
00208 static void
00209 emit_stencil_op(struct spe_function *f,
00210                 int out, int in, int mask, unsigned op, unsigned ref)
00211 {
00212    const int clamp = spe_allocate_available_register(f);
00213    const int clamp_mask = spe_allocate_available_register(f);
00214    const int result = spe_allocate_available_register(f);
00215 
00216    switch(op) {
00217    case PIPE_STENCIL_OP_KEEP:
00218       assert(0);
00219    case PIPE_STENCIL_OP_ZERO:
00220       spe_il(f, result, 0);
00221       break;
00222    case PIPE_STENCIL_OP_REPLACE:
00223       spe_il(f, result, ref);
00224       break;
00225    case PIPE_STENCIL_OP_INCR:
00226       /* clamp = [0xff, 0xff, 0xff, 0xff] */
00227       spe_il(f, clamp, 0x0ff);
00228       /* result[i] = in[i] + 1 */
00229       spe_ai(f, result, in, 1);
00230       /* clamp_mask[i] = (result[i] > 0xff) */
00231       spe_clgti(f, clamp_mask, result, 0x0ff);
00232       /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
00233       spe_selb(f, result, result, clamp, clamp_mask);
00234       break;
00235    case PIPE_STENCIL_OP_DECR:
00236       spe_il(f, clamp, 0);
00237       spe_ai(f, result, in, -1);
00238 
00239       /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned
00240        * arithmetic.
00241        */
00242       spe_clgti(f, clamp_mask, result, 0x0ff);
00243       spe_selb(f, result, result, clamp, clamp_mask);
00244       break;
00245    case PIPE_STENCIL_OP_INCR_WRAP:
00246       spe_ai(f, result, in, 1);
00247       break;
00248    case PIPE_STENCIL_OP_DECR_WRAP:
00249       spe_ai(f, result, in, -1);
00250       break;
00251    case PIPE_STENCIL_OP_INVERT:
00252       spe_nor(f, result, in, in);
00253       break;
00254    default:
00255       assert(0);
00256    }
00257 
00258    spe_selb(f, out, in, result, mask);
00259 
00260    spe_release_register(f, result);
00261    spe_release_register(f, clamp_mask);
00262    spe_release_register(f, clamp);
00263 }
00264 
00265 
00283 static int
00284 emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
00285                   unsigned face,
00286                   struct spe_function *f,
00287                   int mask,
00288                   int depth_mask,
00289                   boolean depth_complement,
00290                   int stencil,
00291                   int depth_pass)
00292 {
00293    int stencil_fail = spe_allocate_available_register(f);
00294    int depth_fail = spe_allocate_available_register(f);
00295    int stencil_mask = spe_allocate_available_register(f);
00296    int stencil_pass = spe_allocate_available_register(f);
00297    int face_stencil = spe_allocate_available_register(f);
00298    int stencil_src = stencil;
00299    const unsigned ref = (dsa->stencil[face].ref_value
00300                          & dsa->stencil[face].value_mask);
00301    boolean complement = FALSE;
00302    int stored;
00303    int tmp = spe_allocate_available_register(f);
00304 
00305 
00306    if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
00307        && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
00308        && (dsa->stencil[face].value_mask != 0x0ff)) {
00309       stored = spe_allocate_available_register(f);
00310       spe_andi(f, stored, stencil, dsa->stencil[face].value_mask);
00311    } else {
00312       stored = stencil;
00313    }
00314 
00315 
00316    switch (dsa->stencil[face].func) {
00317    case PIPE_FUNC_NEVER:
00318       spe_il(f, stencil_mask, 0);   /* stencil_mask[0..3] = [0,0,0,0] */
00319       break;
00320 
00321    case PIPE_FUNC_NOTEQUAL:
00322       complement = TRUE;
00323       /* FALLTHROUGH */
00324    case PIPE_FUNC_EQUAL:
00325       /* stencil_mask[i] = (stored[i] == ref) */
00326       spe_ceqi(f, stencil_mask, stored, ref);
00327       break;
00328 
00329    case PIPE_FUNC_LEQUAL:
00330       complement = TRUE;
00331       /* FALLTHROUGH */
00332    case PIPE_FUNC_GREATER:
00333       complement = TRUE;
00334       /* stencil_mask[i] = (stored[i] > ref) */
00335       spe_clgti(f, stencil_mask, stored, ref);
00336       break;
00337 
00338    case PIPE_FUNC_LESS:
00339       complement = TRUE;
00340       /* FALLTHROUGH */
00341    case PIPE_FUNC_GEQUAL:
00342       /* stencil_mask[i] = (stored[i] > ref) */
00343       spe_clgti(f, stencil_mask, stored, ref);
00344       /* tmp[i] = (stored[i] == ref) */
00345       spe_ceqi(f, tmp, stored, ref);
00346       /* stencil_mask[i] = stencil_mask[i] | tmp[i] */
00347       spe_or(f, stencil_mask, stencil_mask, tmp);
00348       break;
00349 
00350    case PIPE_FUNC_ALWAYS:
00351       /* See comment below. */
00352       break;
00353 
00354    default:
00355       assert(0);
00356       break;
00357    }
00358 
00359    if (stored != stencil) {
00360       spe_release_register(f, stored);
00361    }
00362    spe_release_register(f, tmp);
00363 
00364 
00365    /* ALWAYS is a very common stencil-test, so some effort is applied to
00366     * optimize that case.  The stencil-pass mask is the same as the input
00367     * fragment mask.  This makes the stencil-test (above) a no-op, and the
00368     * input fragment mask can be "renamed" the stencil-pass mask.
00369     */
00370    if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) {
00371       spe_release_register(f, stencil_pass);
00372       stencil_pass = mask;
00373    } else {
00374       if (complement) {
00375          spe_andc(f, stencil_pass, mask, stencil_mask);
00376       } else {
00377          spe_and(f, stencil_pass, mask, stencil_mask);
00378       }
00379    }
00380 
00381    if (depth_complement) {
00382       spe_andc(f, depth_pass, stencil_pass, depth_mask);
00383    } else {
00384       spe_and(f, depth_pass, stencil_pass, depth_mask);
00385    }
00386 
00387 
00388    /* Conditionally emit code to update the stencil value under various
00389     * condititons.  Note that there is no need to generate code under the
00390     * following circumstances:
00391     *
00392     * - Stencil write mask is zero.
00393     * - For stencil-fail if the stencil test is ALWAYS
00394     * - For depth-fail if the stencil test is NEVER
00395     * - For depth-pass if the stencil test is NEVER
00396     * - Any of the 3 conditions if the operation is KEEP
00397     */
00398    if (dsa->stencil[face].write_mask != 0) {
00399       if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
00400           && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
00401          if (complement) {
00402             spe_and(f, stencil_fail, mask, stencil_mask);
00403          } else {
00404             spe_andc(f, stencil_fail, mask, stencil_mask);
00405          }
00406 
00407          emit_stencil_op(f, face_stencil, stencil_src, stencil_fail,
00408                          dsa->stencil[face].fail_op,
00409                          dsa->stencil[face].ref_value);
00410 
00411          stencil_src = face_stencil;
00412       }
00413 
00414       if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
00415           && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) {
00416          if (depth_complement) {
00417             spe_and(f, depth_fail, stencil_pass, depth_mask);
00418          } else {
00419             spe_andc(f, depth_fail, stencil_pass, depth_mask);
00420          }
00421 
00422          emit_stencil_op(f, face_stencil, stencil_src, depth_fail,
00423                          dsa->stencil[face].zfail_op,
00424                          dsa->stencil[face].ref_value);
00425          stencil_src = face_stencil;
00426       }
00427 
00428       if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
00429           && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) {
00430          emit_stencil_op(f, face_stencil, stencil_src, depth_pass,
00431                          dsa->stencil[face].zpass_op,
00432                          dsa->stencil[face].ref_value);
00433          stencil_src = face_stencil;
00434       }
00435    }
00436 
00437    spe_release_register(f, stencil_fail);
00438    spe_release_register(f, depth_fail);
00439    spe_release_register(f, stencil_mask);
00440    if (stencil_pass != mask) {
00441       spe_release_register(f, stencil_pass);
00442    }
00443 
00444    /* If all of the stencil operations were KEEP or the stencil write mask was
00445     * zero, "stencil_src" will still be set to "stencil".  In this case
00446     * release the "face_stencil" register.  Otherwise apply the stencil write
00447     * mask to select bits from the calculated stencil value and the previous
00448     * stencil value.
00449     */
00450    if (stencil_src == stencil) {
00451       spe_release_register(f, face_stencil);
00452    } else if (dsa->stencil[face].write_mask != 0x0ff) {
00453       int tmp = spe_allocate_available_register(f);
00454 
00455       spe_il(f, tmp, dsa->stencil[face].write_mask);
00456       spe_selb(f, stencil_src, stencil, stencil_src, tmp);
00457 
00458       spe_release_register(f, tmp);
00459    }
00460 
00461    return stencil_src;
00462 }
00463 
00464 
00465 void
00466 cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa)
00467 {
00468    struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base;
00469    struct spe_function *const f = &cdsa->code;
00470 
00471    /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
00472     * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions.  Round
00473     * up to 64 to make it a happy power-of-two.
00474     */
00475    spe_init_func(f, SPE_INST_SIZE * 64);
00476 
00477 
00478    /* Allocate registers for the function's input parameters.  Cleverly (and
00479     * clever code is usually dangerous, but I couldn't resist) the generated
00480     * function returns a structure.  Returned structures start with register
00481     * 3, and the structure fields are ordered to match up exactly with the
00482     * input parameters.
00483     */
00484    int mask = spe_allocate_register(f, 3);
00485    int depth = spe_allocate_register(f, 4);
00486    int stencil = spe_allocate_register(f, 5);
00487    int zvals = spe_allocate_register(f, 6);
00488    int frag_a = spe_allocate_register(f, 7);
00489    int facing = spe_allocate_register(f, 8);
00490 
00491    int depth_mask = spe_allocate_available_register(f);
00492 
00493    boolean depth_complement;
00494 
00495 
00496    emit_alpha_test(dsa, f, mask, frag_a);
00497 
00498    depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals);
00499 
00500    if (dsa->stencil[0].enabled) {
00501       const int front_depth_pass = spe_allocate_available_register(f);
00502       int front_stencil = emit_stencil_test(dsa, 0, f, mask,
00503                                             depth_mask, depth_complement,
00504                                             stencil, front_depth_pass);
00505 
00506       if (dsa->stencil[1].enabled) {
00507          const int back_depth_pass = spe_allocate_available_register(f);
00508          int back_stencil = emit_stencil_test(dsa, 1, f, mask,
00509                                               depth_mask,  depth_complement,
00510                                               stencil, back_depth_pass);
00511 
00512          /* If the front facing stencil value and the back facing stencil
00513           * value are stored in the same register, there is no need to select
00514           * a value based on the facing.  This can happen if the stencil value
00515           * was not modified due to the write masks being zero, the stencil
00516           * operations being KEEP, etc.
00517           */
00518          if (front_stencil != back_stencil) {
00519             spe_selb(f, stencil, back_stencil, front_stencil, facing);
00520          }
00521 
00522          if (back_stencil != stencil) {
00523             spe_release_register(f, back_stencil);
00524          }
00525 
00526          if (front_stencil != stencil) {
00527             spe_release_register(f, front_stencil);
00528          }
00529 
00530          spe_selb(f, mask, back_depth_pass, front_depth_pass, facing);
00531 
00532          spe_release_register(f, back_depth_pass);
00533       } else {
00534          if (front_stencil != stencil) {
00535             spe_or(f, stencil, front_stencil, front_stencil);
00536             spe_release_register(f, front_stencil);
00537          }
00538          spe_or(f, mask, front_depth_pass, front_depth_pass);
00539       }
00540 
00541       spe_release_register(f, front_depth_pass);
00542    } else if (dsa->depth.enabled) {
00543       if (depth_complement) {
00544          spe_andc(f, mask, mask, depth_mask);
00545       } else {
00546          spe_and(f, mask, mask, depth_mask);
00547       }
00548    }
00549 
00550    if (dsa->depth.writemask) {
00551          spe_selb(f, depth, depth, zvals, mask);
00552    }
00553 
00554    spe_bi(f, 0, 0, 0);  /* return from function call */
00555 
00556 
00557 #if 0
00558    {
00559       const uint32_t *p = f->store;
00560       unsigned i;
00561 
00562       printf("# alpha (%sabled)\n",
00563              (dsa->alpha.enabled) ? "en" : "dis");
00564       printf("#    func: %u\n", dsa->alpha.func);
00565       printf("#    ref: %.2f\n", dsa->alpha.ref);
00566 
00567       printf("# depth (%sabled)\n",
00568              (dsa->depth.enabled) ? "en" : "dis");
00569       printf("#    func: %u\n", dsa->depth.func);
00570 
00571       for (i = 0; i < 2; i++) {
00572          printf("# %s stencil (%sabled)\n",
00573                 (i == 0) ? "front" : "back",
00574                 (dsa->stencil[i].enabled) ? "en" : "dis");
00575 
00576          printf("#    func: %u\n", dsa->stencil[i].func);
00577          printf("#    op (sf, zf, zp): %u %u %u\n",
00578                 dsa->stencil[i].fail_op,
00579                 dsa->stencil[i].zfail_op,
00580                 dsa->stencil[i].zpass_op);
00581          printf("#    ref value / value mask / write mask: %02x %02x %02x\n",
00582                 dsa->stencil[i].ref_value,
00583                 dsa->stencil[i].value_mask,
00584                 dsa->stencil[i].write_mask);
00585       }
00586 
00587       printf("\t.text\n");
00588       for (/* empty */; p < f->csr; p++) {
00589          printf("\t.long\t0x%04x\n", *p);
00590       }
00591       fflush(stdout);
00592    }
00593 #endif
00594 }
00595 
00596 
00600 static int
00601 emit_alpha_factor_calculation(struct spe_function *f,
00602                               unsigned factor,
00603                               int src_alpha, int dst_alpha, int const_alpha)
00604 {
00605    int factor_reg;
00606    int tmp;
00607 
00608 
00609    switch (factor) {
00610    case PIPE_BLENDFACTOR_ONE:
00611       factor_reg = -1;
00612       break;
00613 
00614    case PIPE_BLENDFACTOR_SRC_ALPHA:
00615       factor_reg = spe_allocate_available_register(f);
00616 
00617       spe_or(f, factor_reg, src_alpha, src_alpha);
00618       break;
00619 
00620    case PIPE_BLENDFACTOR_DST_ALPHA:
00621       factor_reg = dst_alpha;
00622       break;
00623 
00624    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
00625       factor_reg = -1;
00626       break;
00627 
00628    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
00629       factor_reg = spe_allocate_available_register(f);
00630 
00631       tmp = spe_allocate_available_register(f);
00632       spe_il(f, tmp, 1);
00633       spe_cuflt(f, tmp, tmp, 0);
00634       spe_fs(f, factor_reg, tmp, const_alpha);
00635       spe_release_register(f, tmp);
00636       break;
00637 
00638    case PIPE_BLENDFACTOR_CONST_ALPHA:
00639       factor_reg = const_alpha;
00640       break;
00641 
00642    case PIPE_BLENDFACTOR_ZERO:
00643       factor_reg = -1;
00644       break;
00645 
00646    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
00647       tmp = spe_allocate_available_register(f);
00648       factor_reg = spe_allocate_available_register(f);
00649 
00650       spe_il(f, tmp, 1);
00651       spe_cuflt(f, tmp, tmp, 0);
00652       spe_fs(f, factor_reg, tmp, src_alpha);
00653 
00654       spe_release_register(f, tmp);
00655       break;
00656 
00657    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
00658       tmp = spe_allocate_available_register(f);
00659       factor_reg = spe_allocate_available_register(f);
00660 
00661       spe_il(f, tmp, 1);
00662       spe_cuflt(f, tmp, tmp, 0);
00663       spe_fs(f, factor_reg, tmp, dst_alpha);
00664 
00665       spe_release_register(f, tmp);
00666       break;
00667 
00668    case PIPE_BLENDFACTOR_SRC1_ALPHA:
00669    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
00670    default:
00671       assert(0);
00672       factor_reg = -1;
00673       break;
00674    }
00675 
00676    return factor_reg;
00677 }
00678 
00679 
00683 static void
00684 emit_color_factor_calculation(struct spe_function *f,
00685                               unsigned sF, unsigned mask,
00686                               const int *src,
00687                               const int *dst,
00688                               const int *const_color,
00689                               int *factor)
00690 {
00691    int tmp;
00692    unsigned i;
00693 
00694 
00695    factor[0] = -1;
00696    factor[1] = -1;
00697    factor[2] = -1;
00698    factor[3] = -1;
00699 
00700    switch (sF) {
00701    case PIPE_BLENDFACTOR_ONE:
00702       break;
00703 
00704    case PIPE_BLENDFACTOR_SRC_COLOR:
00705       for (i = 0; i < 3; ++i) {
00706          if ((mask & (1U << i)) != 0) {
00707             factor[i] = spe_allocate_available_register(f);
00708             spe_or(f, factor[i], src[i], src[i]);
00709          }
00710       }
00711       break;
00712 
00713    case PIPE_BLENDFACTOR_SRC_ALPHA:
00714       factor[0] = spe_allocate_available_register(f);
00715       factor[1] = factor[0];
00716       factor[2] = factor[0];
00717 
00718       spe_or(f, factor[0], src[3], src[3]);
00719       break;
00720 
00721    case PIPE_BLENDFACTOR_DST_ALPHA:
00722       factor[0] = dst[3];
00723       factor[1] = dst[3];
00724       factor[2] = dst[3];
00725       break;
00726 
00727    case PIPE_BLENDFACTOR_DST_COLOR:
00728       factor[0] = dst[0];
00729       factor[1] = dst[1];
00730       factor[2] = dst[2];
00731       break;
00732 
00733    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
00734       tmp = spe_allocate_available_register(f);
00735       factor[0] = spe_allocate_available_register(f);
00736       factor[1] = factor[0];
00737       factor[2] = factor[0];
00738 
00739       /* Alpha saturate means min(As, 1-Ad).
00740        */
00741       spe_il(f, tmp, 1);
00742       spe_cuflt(f, tmp, tmp, 0);
00743       spe_fs(f, tmp, tmp, dst[3]);
00744       spe_fcgt(f, factor[0], tmp, src[3]);
00745       spe_selb(f, factor[0], src[3], tmp, factor[0]);
00746 
00747       spe_release_register(f, tmp);
00748       break;
00749 
00750    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
00751       tmp = spe_allocate_available_register(f);
00752       spe_il(f, tmp, 1);
00753       spe_cuflt(f, tmp, tmp, 0);
00754 
00755       for (i = 0; i < 3; i++) {
00756          factor[i] = spe_allocate_available_register(f);
00757 
00758          spe_fs(f, factor[i], tmp, const_color[i]);
00759       }
00760       spe_release_register(f, tmp);
00761       break;
00762 
00763    case PIPE_BLENDFACTOR_CONST_COLOR:
00764       for (i = 0; i < 3; i++) {
00765          factor[i] = const_color[i];
00766       }
00767       break;
00768 
00769    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
00770       factor[0] = spe_allocate_available_register(f);
00771       factor[1] = factor[0];
00772       factor[2] = factor[0];
00773 
00774       tmp = spe_allocate_available_register(f);
00775       spe_il(f, tmp, 1);
00776       spe_cuflt(f, tmp, tmp, 0);
00777       spe_fs(f, factor[0], tmp, const_color[3]);
00778       spe_release_register(f, tmp);
00779       break;
00780 
00781    case PIPE_BLENDFACTOR_CONST_ALPHA:
00782       factor[0] = const_color[3];
00783       factor[1] = factor[0];
00784       factor[2] = factor[0];
00785       break;
00786 
00787    case PIPE_BLENDFACTOR_ZERO:
00788       break;
00789 
00790    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
00791       tmp = spe_allocate_available_register(f);
00792 
00793       spe_il(f, tmp, 1);
00794       spe_cuflt(f, tmp, tmp, 0);
00795 
00796       for (i = 0; i < 3; ++i) {
00797          if ((mask & (1U << i)) != 0) {
00798             factor[i] = spe_allocate_available_register(f);
00799             spe_fs(f, factor[i], tmp, src[i]);
00800          }
00801       }
00802 
00803       spe_release_register(f, tmp);
00804       break;
00805 
00806    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
00807       tmp = spe_allocate_available_register(f);
00808       factor[0] = spe_allocate_available_register(f);
00809       factor[1] = factor[0];
00810       factor[2] = factor[0];
00811 
00812       spe_il(f, tmp, 1);
00813       spe_cuflt(f, tmp, tmp, 0);
00814       spe_fs(f, factor[0], tmp, src[3]);
00815 
00816       spe_release_register(f, tmp);
00817       break;
00818 
00819    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
00820       tmp = spe_allocate_available_register(f);
00821       factor[0] = spe_allocate_available_register(f);
00822       factor[1] = factor[0];
00823       factor[2] = factor[0];
00824 
00825       spe_il(f, tmp, 1);
00826       spe_cuflt(f, tmp, tmp, 0);
00827       spe_fs(f, factor[0], tmp, dst[3]);
00828 
00829       spe_release_register(f, tmp);
00830       break;
00831 
00832    case PIPE_BLENDFACTOR_INV_DST_COLOR:
00833       tmp = spe_allocate_available_register(f);
00834 
00835       spe_il(f, tmp, 1);
00836       spe_cuflt(f, tmp, tmp, 0);
00837 
00838       for (i = 0; i < 3; ++i) {
00839          if ((mask & (1U << i)) != 0) {
00840             factor[i] = spe_allocate_available_register(f);
00841             spe_fs(f, factor[i], tmp, dst[i]);
00842          }
00843       }
00844 
00845       spe_release_register(f, tmp);
00846       break;
00847 
00848    case PIPE_BLENDFACTOR_SRC1_COLOR:
00849    case PIPE_BLENDFACTOR_SRC1_ALPHA:
00850    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
00851    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
00852    default:
00853       assert(0);
00854    }
00855 }
00856 
00857 
00858 static void
00859 emit_blend_calculation(struct spe_function *f,
00860                        unsigned func, unsigned sF, unsigned dF,
00861                        int src, int src_factor, int dst, int dst_factor)
00862 {
00863    int tmp = spe_allocate_available_register(f);
00864 
00865    switch (func) {
00866    case PIPE_BLEND_ADD:
00867       if (sF == PIPE_BLENDFACTOR_ONE) {
00868          if (dF == PIPE_BLENDFACTOR_ZERO) {
00869             /* Do nothing. */
00870          } else if (dF == PIPE_BLENDFACTOR_ONE) {
00871             spe_fa(f, src, src, dst);
00872          }
00873       } else if (sF == PIPE_BLENDFACTOR_ZERO) {
00874          if (dF == PIPE_BLENDFACTOR_ZERO) {
00875             spe_il(f, src, 0);
00876          } else if (dF == PIPE_BLENDFACTOR_ONE) {
00877             spe_or(f, src, dst, dst);
00878          } else {
00879             spe_fm(f, src, dst, dst_factor);
00880          }
00881       } else if (dF == PIPE_BLENDFACTOR_ZERO) {
00882          spe_fm(f, src, src, src_factor);
00883       } else {
00884          spe_fm(f, tmp, dst, dst_factor);
00885          spe_fma(f, src, src, src_factor, tmp);
00886       }
00887       break;
00888 
00889    case PIPE_BLEND_SUBTRACT:
00890       if (sF == PIPE_BLENDFACTOR_ONE) {
00891          if (dF == PIPE_BLENDFACTOR_ZERO) {
00892             /* Do nothing. */
00893          } else if (dF == PIPE_BLENDFACTOR_ONE) {
00894             spe_fs(f, src, src, dst);
00895          }
00896       } else if (sF == PIPE_BLENDFACTOR_ZERO) {
00897          if (dF == PIPE_BLENDFACTOR_ZERO) {
00898             spe_il(f, src, 0);
00899          } else if (dF == PIPE_BLENDFACTOR_ONE) {
00900             spe_il(f, tmp, 0);
00901             spe_fs(f, src, tmp, dst);
00902          } else {
00903             spe_fm(f, src, dst, dst_factor);
00904          }
00905       } else if (dF == PIPE_BLENDFACTOR_ZERO) {
00906          spe_fm(f, src, src, src_factor);
00907       } else {
00908          spe_fm(f, tmp, dst, dst_factor);
00909          spe_fms(f, src, src, src_factor, tmp);
00910       }
00911       break;
00912 
00913    case PIPE_BLEND_REVERSE_SUBTRACT:
00914       if (sF == PIPE_BLENDFACTOR_ONE) {
00915          if (dF == PIPE_BLENDFACTOR_ZERO) {
00916             spe_il(f, tmp, 0);
00917             spe_fs(f, src, tmp, src);
00918          } else if (dF == PIPE_BLENDFACTOR_ONE) {
00919             spe_fs(f, src, dst, src);
00920          }
00921       } else if (sF == PIPE_BLENDFACTOR_ZERO) {
00922          if (dF == PIPE_BLENDFACTOR_ZERO) {
00923             spe_il(f, src, 0);
00924          } else if (dF == PIPE_BLENDFACTOR_ONE) {
00925             spe_or(f, src, dst, dst);
00926          } else {
00927             spe_fm(f, src, dst, dst_factor);
00928          }
00929       } else if (dF == PIPE_BLENDFACTOR_ZERO) {
00930          spe_fm(f, src, src, src_factor);
00931       } else {
00932          spe_fm(f, tmp, src, src_factor);
00933          spe_fms(f, src, src, dst_factor, tmp);
00934       }
00935       break;
00936 
00937    case PIPE_BLEND_MIN:
00938       spe_cgt(f, tmp, src, dst);
00939       spe_selb(f, src, src, dst, tmp);
00940       break;
00941 
00942    case PIPE_BLEND_MAX:
00943       spe_cgt(f, tmp, src, dst);
00944       spe_selb(f, src, dst, src, tmp);
00945       break;
00946 
00947    default:
00948       assert(0);
00949    }
00950 
00951    spe_release_register(f, tmp);
00952 }
00953 
00954 
00958 void
00959 cell_generate_alpha_blend(struct cell_blend_state *cb)
00960 {
00961    struct pipe_blend_state *const b = &cb->base;
00962    struct spe_function *const f = &cb->code;
00963 
00964    /* This code generates a maximum of 3 (source alpha factor)
00965     * + 3 (destination alpha factor) + (3 * 6) (source color factor)
00966     * + (3 * 6) (destination color factor) + (4 * 2) (blend equation)
00967     * + 4 (fragment mask) + 1 (return) = 55 instlructions.  Round up to 64 to
00968     * make it a happy power-of-two.
00969     */
00970    spe_init_func(f, SPE_INST_SIZE * 64);
00971 
00972 
00973    const int frag[4] = {
00974       spe_allocate_register(f, 3),
00975       spe_allocate_register(f, 4),
00976       spe_allocate_register(f, 5),
00977       spe_allocate_register(f, 6),
00978    };
00979    const int pixel[4] = {
00980       spe_allocate_register(f, 7),
00981       spe_allocate_register(f, 8),
00982       spe_allocate_register(f, 9),
00983       spe_allocate_register(f, 10),
00984    };
00985    const int const_color[4] = {
00986       spe_allocate_register(f, 11),
00987       spe_allocate_register(f, 12),
00988       spe_allocate_register(f, 13),
00989       spe_allocate_register(f, 14),
00990    };
00991    unsigned func[4];
00992    unsigned sF[4];
00993    unsigned dF[4];
00994    unsigned i;
00995    int src_factor[4];
00996    int dst_factor[4];
00997 
00998 
00999    /* Does the selected blend mode make use of the source / destination
01000     * color (RGB) blend factors?
01001     */
01002    boolean need_color_factor = b->blend_enable
01003        && (b->rgb_func != PIPE_BLEND_MIN)
01004        && (b->rgb_func != PIPE_BLEND_MAX);
01005 
01006    /* Does the selected blend mode make use of the source / destination
01007     * alpha blend factors?
01008     */
01009    boolean need_alpha_factor = b->blend_enable
01010        && (b->alpha_func != PIPE_BLEND_MIN)
01011        && (b->alpha_func != PIPE_BLEND_MAX);
01012 
01013 
01014    if (b->blend_enable) {
01015       sF[0] = b->rgb_src_factor;
01016       sF[1] = sF[0];
01017       sF[2] = sF[0];
01018       switch (b->alpha_src_factor & 0x0f) {
01019       case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
01020          sF[3] = PIPE_BLENDFACTOR_ONE;
01021          break;
01022       case PIPE_BLENDFACTOR_SRC_COLOR:
01023       case PIPE_BLENDFACTOR_DST_COLOR:
01024       case PIPE_BLENDFACTOR_CONST_COLOR:
01025       case PIPE_BLENDFACTOR_SRC1_COLOR:
01026          sF[3] = b->alpha_src_factor + 1;
01027          break;
01028       default:
01029          sF[3] = b->alpha_src_factor;
01030       }
01031 
01032       dF[0] = b->rgb_dst_factor;
01033       dF[1] = dF[0];
01034       dF[2] = dF[0];
01035       switch (b->alpha_dst_factor & 0x0f) {
01036       case PIPE_BLENDFACTOR_SRC_COLOR:
01037       case PIPE_BLENDFACTOR_DST_COLOR:
01038       case PIPE_BLENDFACTOR_CONST_COLOR:
01039       case PIPE_BLENDFACTOR_SRC1_COLOR:
01040          dF[3] = b->alpha_dst_factor + 1;
01041          break;
01042       default:
01043          dF[3] = b->alpha_dst_factor;
01044       }
01045 
01046       func[0] = b->rgb_func;
01047       func[1] = func[0];
01048       func[2] = func[0];
01049       func[3] = b->alpha_func;
01050    } else {
01051       sF[0] = PIPE_BLENDFACTOR_ONE;
01052       sF[1] = PIPE_BLENDFACTOR_ONE;
01053       sF[2] = PIPE_BLENDFACTOR_ONE;
01054       sF[3] = PIPE_BLENDFACTOR_ONE;
01055       dF[0] = PIPE_BLENDFACTOR_ZERO;
01056       dF[1] = PIPE_BLENDFACTOR_ZERO;
01057       dF[2] = PIPE_BLENDFACTOR_ZERO;
01058       dF[3] = PIPE_BLENDFACTOR_ZERO;
01059 
01060       func[0] = PIPE_BLEND_ADD;
01061       func[1] = PIPE_BLEND_ADD;
01062       func[2] = PIPE_BLEND_ADD;
01063       func[3] = PIPE_BLEND_ADD;
01064    }
01065 
01066 
01067    /* If alpha writing is enabled and the alpha blend mode requires use of
01068     * the alpha factor, calculate the alpha factor.
01069     */
01070    if (((b->colormask & 8) != 0) && need_alpha_factor) {
01071       src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3],
01072                                                     frag[3], pixel[3]);
01073 
01074       /* If the alpha destination blend factor is the same as the alpha source
01075        * blend factor, re-use the previously calculated value.
01076        */
01077       dst_factor[3] = (dF[3] == sF[3])
01078           ? src_factor[3]
01079           : emit_alpha_factor_calculation(f, dF[3], const_color[3],
01080                                           frag[3], pixel[3]);
01081    }
01082 
01083 
01084    if (sF[0] == sF[3]) {
01085       src_factor[0] = src_factor[3];
01086       src_factor[1] = src_factor[3];
01087       src_factor[2] = src_factor[3];
01088    } else if (sF[0] == dF[3]) {
01089       src_factor[0] = dst_factor[3];
01090       src_factor[1] = dst_factor[3];
01091       src_factor[2] = dst_factor[3];
01092    } else if (need_color_factor) {
01093       emit_color_factor_calculation(f,
01094                                     b->rgb_src_factor,
01095                                     b->colormask,
01096                                     frag, pixel, const_color, src_factor);
01097    }
01098 
01099 
01100    if (dF[0] == sF[3]) {
01101       dst_factor[0] = src_factor[3];
01102       dst_factor[1] = src_factor[3];
01103       dst_factor[2] = src_factor[3];
01104    } else if (dF[0] == dF[3]) {
01105       dst_factor[0] = dst_factor[3];
01106       dst_factor[1] = dst_factor[3];
01107       dst_factor[2] = dst_factor[3];
01108    } else if (dF[0] == sF[0]) {
01109       dst_factor[0] = src_factor[0];
01110       dst_factor[1] = src_factor[1];
01111       dst_factor[2] = src_factor[2];
01112    } else if (need_color_factor) {
01113       emit_color_factor_calculation(f,
01114                                     b->rgb_dst_factor,
01115                                     b->colormask,
01116                                     frag, pixel, const_color, dst_factor);
01117    }
01118 
01119 
01120 
01121    for (i = 0; i < 4; ++i) {
01122       if ((b->colormask & (1U << i)) != 0) {
01123          emit_blend_calculation(f,
01124                                 func[i], sF[i], dF[i],
01125                                 frag[i], src_factor[i],
01126                                 pixel[i], dst_factor[i]);
01127       }
01128    }
01129 
01130    spe_bi(f, 0, 0, 0);
01131 
01132 #if 0
01133    {
01134       const uint32_t *p = f->store;
01135 
01136       printf("# %u instructions\n", f->csr - f->store);
01137       printf("# blend (%sabled)\n",
01138              (cb->base.blend_enable) ? "en" : "dis");
01139       printf("#    RGB func / sf / df: %u %u %u\n",
01140              cb->base.rgb_func,
01141              cb->base.rgb_src_factor,
01142              cb->base.rgb_dst_factor);
01143       printf("#    ALP func / sf / df: %u %u %u\n",
01144              cb->base.alpha_func,
01145              cb->base.alpha_src_factor,
01146              cb->base.alpha_dst_factor);
01147 
01148       printf("\t.text\n");
01149       for (/* empty */; p < f->csr; p++) {
01150          printf("\t.long\t0x%04x\n", *p);
01151       }
01152       fflush(stdout);
01153    }
01154 #endif
01155 }
01156 
01157 
01158 static int
01159 PC_OFFSET(const struct spe_function *f, const void *d)
01160 {
01161    const intptr_t pc = (intptr_t) &f->store[f->num_inst];
01162    const intptr_t ea = ~0x0f & (intptr_t) d;
01163 
01164    return (ea - pc) >> 2;
01165 }
01166 
01167 
01181 void
01182 cell_generate_logic_op(struct spe_function *f,
01183                        const struct pipe_blend_state *blend,
01184                        struct pipe_surface *surf)
01185 {
01186    const unsigned logic_op = (blend->logicop_enable)
01187        ? blend->logicop_func : PIPE_LOGICOP_COPY;
01188 
01189    /* This code generates a maximum of 37 instructions.  An additional 32
01190     * bytes (equiv. to 8 instructions) are needed for data storage.  Round up
01191     * to 64 to make it a happy power-of-two.
01192     */
01193    spe_init_func(f, SPE_INST_SIZE * 64);
01194 
01195 
01196    /* Pixel colors in framebuffer format in AoS layout.
01197     */
01198    const int pixel[4] = {
01199       spe_allocate_register(f, 3),
01200       spe_allocate_register(f, 4),
01201       spe_allocate_register(f, 5),
01202       spe_allocate_register(f, 6),
01203    };
01204 
01205    /* Fragment colors stored as floats in SoA layout.
01206     */
01207    const int frag[4] = {
01208       spe_allocate_register(f, 7),
01209       spe_allocate_register(f, 8),
01210       spe_allocate_register(f, 9),
01211       spe_allocate_register(f, 10),
01212    };
01213 
01214    const int mask = spe_allocate_register(f, 11);
01215 
01216 
01217    /* Short-circuit the noop and invert cases.
01218     */
01219    if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->colormask == 0)) {
01220       spe_bi(f, 0, 0, 0);
01221       return;
01222    } else if (logic_op == PIPE_LOGICOP_INVERT) {
01223       spe_nor(f, pixel[0], pixel[0], pixel[0]);
01224       spe_nor(f, pixel[1], pixel[1], pixel[1]);
01225       spe_nor(f, pixel[2], pixel[2], pixel[2]);
01226       spe_nor(f, pixel[3], pixel[3], pixel[3]);
01227       spe_bi(f, 0, 0, 0);
01228       return;
01229    }
01230 
01231 
01232    const int tmp[4] = {
01233       spe_allocate_available_register(f),
01234       spe_allocate_available_register(f),
01235       spe_allocate_available_register(f),
01236       spe_allocate_available_register(f),
01237    };
01238 
01239    const int shuf_xpose_hi = spe_allocate_available_register(f);
01240    const int shuf_xpose_lo = spe_allocate_available_register(f);
01241    const int shuf_color = spe_allocate_available_register(f);
01242 
01243 
01244    /* Pointer to the begining of the function's private data area.
01245     */
01246    uint32_t *const data = ((uint32_t *) f->store) + (64 - 8);
01247 
01248 
01249    /* Convert fragment colors to framebuffer format in AoS layout.
01250     */
01251    switch (surf->format) {
01252    case PIPE_FORMAT_A8R8G8B8_UNORM:
01253       data[0] = 0x00010203;
01254       data[1] = 0x10111213;
01255       data[2] = 0x04050607;
01256       data[3] = 0x14151617;
01257       data[4] = 0x0c000408;
01258       data[5] = 0x80808080;
01259       data[6] = 0x80808080;
01260       data[7] = 0x80808080;
01261       break;
01262    case PIPE_FORMAT_B8G8R8A8_UNORM:
01263       data[0] = 0x03020100;
01264       data[1] = 0x13121110;
01265       data[2] = 0x07060504;
01266       data[3] = 0x17161514;
01267       data[4] = 0x0804000c;
01268       data[5] = 0x80808080;
01269       data[6] = 0x80808080;
01270       data[7] = 0x80808080;
01271       break;
01272    default:
01273       fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()");
01274       ASSERT(0);
01275    }
01276 
01277    spe_ilh(f, tmp[0], 0x0808);
01278    spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0));
01279    spe_lqr(f, shuf_color, PC_OFFSET(f, data+4));
01280    spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]);
01281 
01282    spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi);
01283    spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo);
01284    spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi);
01285    spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo);
01286 
01287    spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi);
01288    spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo);
01289    spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi);
01290    spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo);
01291 
01292    spe_cfltu(f, frag[0], frag[0], 32);
01293    spe_cfltu(f, frag[1], frag[1], 32);
01294    spe_cfltu(f, frag[2], frag[2], 32);
01295    spe_cfltu(f, frag[3], frag[3], 32);
01296 
01297    spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color);
01298    spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color);
01299    spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color);
01300    spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color);
01301 
01302 
01303    /* If logic op is enabled, perform the requested logical operation on the
01304     * converted fragment colors and the pixel colors.
01305     */
01306    switch (logic_op) {
01307    case PIPE_LOGICOP_CLEAR:
01308       spe_il(f, frag[0], 0);
01309       spe_il(f, frag[1], 0);
01310       spe_il(f, frag[2], 0);
01311       spe_il(f, frag[3], 0);
01312       break;
01313    case PIPE_LOGICOP_NOR:
01314       spe_nor(f, frag[0], frag[0], pixel[0]);
01315       spe_nor(f, frag[1], frag[1], pixel[1]);
01316       spe_nor(f, frag[2], frag[2], pixel[2]);
01317       spe_nor(f, frag[3], frag[3], pixel[3]);
01318       break;
01319    case PIPE_LOGICOP_AND_INVERTED:
01320       spe_andc(f, frag[0], pixel[0], frag[0]);
01321       spe_andc(f, frag[1], pixel[1], frag[1]);
01322       spe_andc(f, frag[2], pixel[2], frag[2]);
01323       spe_andc(f, frag[3], pixel[3], frag[3]);
01324       break;
01325    case PIPE_LOGICOP_COPY_INVERTED:
01326       spe_nor(f, frag[0], frag[0], frag[0]);
01327       spe_nor(f, frag[1], frag[1], frag[1]);
01328       spe_nor(f, frag[2], frag[2], frag[2]);
01329       spe_nor(f, frag[3], frag[3], frag[3]);
01330       break;
01331    case PIPE_LOGICOP_AND_REVERSE:
01332       spe_andc(f, frag[0], frag[0], pixel[0]);
01333       spe_andc(f, frag[1], frag[1], pixel[1]);
01334       spe_andc(f, frag[2], frag[2], pixel[2]);
01335       spe_andc(f, frag[3], frag[3], pixel[3]);
01336       break;
01337    case PIPE_LOGICOP_XOR:
01338       spe_xor(f, frag[0], frag[0], pixel[0]);
01339       spe_xor(f, frag[1], frag[1], pixel[1]);
01340       spe_xor(f, frag[2], frag[2], pixel[2]);
01341       spe_xor(f, frag[3], frag[3], pixel[3]);
01342       break;
01343    case PIPE_LOGICOP_NAND:
01344       spe_nand(f, frag[0], frag[0], pixel[0]);
01345       spe_nand(f, frag[1], frag[1], pixel[1]);
01346       spe_nand(f, frag[2], frag[2], pixel[2]);
01347       spe_nand(f, frag[3], frag[3], pixel[3]);
01348       break;
01349    case PIPE_LOGICOP_AND:
01350       spe_and(f, frag[0], frag[0], pixel[0]);
01351       spe_and(f, frag[1], frag[1], pixel[1]);
01352       spe_and(f, frag[2], frag[2], pixel[2]);
01353       spe_and(f, frag[3], frag[3], pixel[3]);
01354       break;
01355    case PIPE_LOGICOP_EQUIV:
01356       spe_eqv(f, frag[0], frag[0], pixel[0]);
01357       spe_eqv(f, frag[1], frag[1], pixel[1]);
01358       spe_eqv(f, frag[2], frag[2], pixel[2]);
01359       spe_eqv(f, frag[3], frag[3], pixel[3]);
01360       break;
01361    case PIPE_LOGICOP_OR_INVERTED:
01362       spe_orc(f, frag[0], pixel[0], frag[0]);
01363       spe_orc(f, frag[1], pixel[1], frag[1]);
01364       spe_orc(f, frag[2], pixel[2], frag[2]);
01365       spe_orc(f, frag[3], pixel[3], frag[3]);
01366       break;
01367    case PIPE_LOGICOP_COPY:
01368       break;
01369    case PIPE_LOGICOP_OR_REVERSE:
01370       spe_orc(f, frag[0], frag[0], pixel[0]);
01371       spe_orc(f, frag[1], frag[1], pixel[1]);
01372       spe_orc(f, frag[2], frag[2], pixel[2]);
01373       spe_orc(f, frag[3], frag[3], pixel[3]);
01374       break;
01375    case PIPE_LOGICOP_OR:
01376       spe_or(f, frag[0], frag[0], pixel[0]);
01377       spe_or(f, frag[1], frag[1], pixel[1]);
01378       spe_or(f, frag[2], frag[2], pixel[2]);
01379       spe_or(f, frag[3], frag[3], pixel[3]);
01380       break;
01381    case PIPE_LOGICOP_SET:
01382       spe_il(f, frag[0], ~0);
01383       spe_il(f, frag[1], ~0);
01384       spe_il(f, frag[2], ~0);
01385       spe_il(f, frag[3], ~0);
01386       break;
01387 
01388    /* These two cases are short-circuited above.
01389     */
01390    case PIPE_LOGICOP_INVERT:
01391    case PIPE_LOGICOP_NOOP:
01392    default:
01393       assert(0);
01394    }
01395 
01396 
01397    /* Apply fragment mask.
01398     */
01399    spe_ilh(f, tmp[0], 0x0000);
01400    spe_ilh(f, tmp[1], 0x0404);
01401    spe_ilh(f, tmp[2], 0x0808);
01402    spe_ilh(f, tmp[3], 0x0c0c);
01403 
01404    spe_shufb(f, tmp[0], mask, mask, tmp[0]);
01405    spe_shufb(f, tmp[1], mask, mask, tmp[1]);
01406    spe_shufb(f, tmp[2], mask, mask, tmp[2]);
01407    spe_shufb(f, tmp[3], mask, mask, tmp[3]);
01408 
01409    spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]);
01410    spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]);
01411    spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]);
01412    spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]);
01413 
01414    spe_bi(f, 0, 0, 0);
01415 
01416 #if 0
01417    {
01418       const uint32_t *p = f->store;
01419       unsigned i;
01420 
01421       printf("# %u instructions\n", f->csr - f->store);
01422 
01423       printf("\t.text\n");
01424       for (i = 0; i < 64; i++) {
01425          printf("\t.long\t0x%04x\n", p[i]);
01426       }
01427       fflush(stdout);
01428    }
01429 #endif
01430 }

Generated on Tue Sep 29 06:25:16 2009 for Gallium3D by  doxygen 1.5.4