cell_state_per_fragment.c File Reference

Generate code to perform all per-fragment operations. More...

Include dependency graph for cell_state_per_fragment.c:

Go to the source code of this file.

Functions

static void emit_alpha_test (struct pipe_depth_stencil_alpha_state *dsa, struct spe_function *f, int mask, int alphas)
 Generate code to perform alpha testing.
static boolean emit_depth_test (struct pipe_depth_stencil_alpha_state *dsa, struct spe_function *f, int mask, int stored, int calculated)
 Generate code to perform Z testing.
static void emit_stencil_op (struct spe_function *f, int out, int in, int mask, unsigned op, unsigned ref)
 Generate code to apply the stencil operation (after testing).
static int emit_stencil_test (struct pipe_depth_stencil_alpha_state *dsa, unsigned face, struct spe_function *f, int mask, int depth_mask, boolean depth_complement, int stencil, int depth_pass)
 Generate code to do stencil test.
void cell_generate_depth_stencil_test (struct cell_depth_stencil_alpha_state *cdsa)
static int emit_alpha_factor_calculation (struct spe_function *f, unsigned factor, int src_alpha, int dst_alpha, int const_alpha)
 
Note:
Emits a maximum of 3 instructions

static void emit_color_factor_calculation (struct spe_function *f, unsigned sF, unsigned mask, const int *src, const int *dst, const int *const_color, int *factor)
 
Note:
Emits a maximum of 6 instructions

static void emit_blend_calculation (struct spe_function *f, unsigned func, unsigned sF, unsigned dF, int src, int src_factor, int dst, int dst_factor)
void cell_generate_alpha_blend (struct cell_blend_state *cb)
 Generate code to perform alpha blending on the SPE.
static int PC_OFFSET (const struct spe_function *f, const void *d)
void cell_generate_logic_op (struct spe_function *f, const struct pipe_blend_state *blend, struct pipe_surface *surf)
 Generate code to perform color conversion and logic op.


Detailed Description

Generate code to perform all per-fragment operations.

Code generated by these functions perform both alpha, depth, and stencil testing as well as alpha blending.

Note:
Occlusion query is not supported, but this is the right place to add that support.
Author:
Ian Romanick <idr@us.ibm.com>

Definition in file cell_state_per_fragment.c.


Function Documentation

void cell_generate_alpha_blend ( struct cell_blend_state cb  ) 

Generate code to perform alpha blending on the SPE.

Definition at line 959 of file cell_state_per_fragment.c.

References pipe_blend_state::alpha_dst_factor, pipe_blend_state::alpha_func, pipe_blend_state::alpha_src_factor, cell_blend_state::base, pipe_blend_state::blend_enable, cell_blend_state::code, pipe_blend_state::colormask, emit_alpha_factor_calculation(), emit_blend_calculation(), emit_color_factor_calculation(), PIPE_BLEND_ADD, PIPE_BLEND_MAX, PIPE_BLEND_MIN, PIPE_BLENDFACTOR_CONST_COLOR, PIPE_BLENDFACTOR_DST_COLOR, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_SRC1_COLOR, PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE, PIPE_BLENDFACTOR_SRC_COLOR, PIPE_BLENDFACTOR_ZERO, pipe_blend_state::rgb_dst_factor, pipe_blend_state::rgb_func, pipe_blend_state::rgb_src_factor, spe_allocate_register(), spe_bi(), spe_init_func(), SPE_INST_SIZE, and spe_function::store.

00960 {
00961    struct pipe_blend_state *const b = &cb->base;
00962    struct spe_function *const f = &cb->code;
00963 
00964    /* This code generates a maximum of 3 (source alpha factor)
00965     * + 3 (destination alpha factor) + (3 * 6) (source color factor)
00966     * + (3 * 6) (destination color factor) + (4 * 2) (blend equation)
00967     * + 4 (fragment mask) + 1 (return) = 55 instlructions.  Round up to 64 to
00968     * make it a happy power-of-two.
00969     */
00970    spe_init_func(f, SPE_INST_SIZE * 64);
00971 
00972 
00973    const int frag[4] = {
00974       spe_allocate_register(f, 3),
00975       spe_allocate_register(f, 4),
00976       spe_allocate_register(f, 5),
00977       spe_allocate_register(f, 6),
00978    };
00979    const int pixel[4] = {
00980       spe_allocate_register(f, 7),
00981       spe_allocate_register(f, 8),
00982       spe_allocate_register(f, 9),
00983       spe_allocate_register(f, 10),
00984    };
00985    const int const_color[4] = {
00986       spe_allocate_register(f, 11),
00987       spe_allocate_register(f, 12),
00988       spe_allocate_register(f, 13),
00989       spe_allocate_register(f, 14),
00990    };
00991    unsigned func[4];
00992    unsigned sF[4];
00993    unsigned dF[4];
00994    unsigned i;
00995    int src_factor[4];
00996    int dst_factor[4];
00997 
00998 
00999    /* Does the selected blend mode make use of the source / destination
01000     * color (RGB) blend factors?
01001     */
01002    boolean need_color_factor = b->blend_enable
01003        && (b->rgb_func != PIPE_BLEND_MIN)
01004        && (b->rgb_func != PIPE_BLEND_MAX);
01005 
01006    /* Does the selected blend mode make use of the source / destination
01007     * alpha blend factors?
01008     */
01009    boolean need_alpha_factor = b->blend_enable
01010        && (b->alpha_func != PIPE_BLEND_MIN)
01011        && (b->alpha_func != PIPE_BLEND_MAX);
01012 
01013 
01014    if (b->blend_enable) {
01015       sF[0] = b->rgb_src_factor;
01016       sF[1] = sF[0];
01017       sF[2] = sF[0];
01018       switch (b->alpha_src_factor & 0x0f) {
01019       case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
01020          sF[3] = PIPE_BLENDFACTOR_ONE;
01021          break;
01022       case PIPE_BLENDFACTOR_SRC_COLOR:
01023       case PIPE_BLENDFACTOR_DST_COLOR:
01024       case PIPE_BLENDFACTOR_CONST_COLOR:
01025       case PIPE_BLENDFACTOR_SRC1_COLOR:
01026          sF[3] = b->alpha_src_factor + 1;
01027          break;
01028       default:
01029          sF[3] = b->alpha_src_factor;
01030       }
01031 
01032       dF[0] = b->rgb_dst_factor;
01033       dF[1] = dF[0];
01034       dF[2] = dF[0];
01035       switch (b->alpha_dst_factor & 0x0f) {
01036       case PIPE_BLENDFACTOR_SRC_COLOR:
01037       case PIPE_BLENDFACTOR_DST_COLOR:
01038       case PIPE_BLENDFACTOR_CONST_COLOR:
01039       case PIPE_BLENDFACTOR_SRC1_COLOR:
01040          dF[3] = b->alpha_dst_factor + 1;
01041          break;
01042       default:
01043          dF[3] = b->alpha_dst_factor;
01044       }
01045 
01046       func[0] = b->rgb_func;
01047       func[1] = func[0];
01048       func[2] = func[0];
01049       func[3] = b->alpha_func;
01050    } else {
01051       sF[0] = PIPE_BLENDFACTOR_ONE;
01052       sF[1] = PIPE_BLENDFACTOR_ONE;
01053       sF[2] = PIPE_BLENDFACTOR_ONE;
01054       sF[3] = PIPE_BLENDFACTOR_ONE;
01055       dF[0] = PIPE_BLENDFACTOR_ZERO;
01056       dF[1] = PIPE_BLENDFACTOR_ZERO;
01057       dF[2] = PIPE_BLENDFACTOR_ZERO;
01058       dF[3] = PIPE_BLENDFACTOR_ZERO;
01059 
01060       func[0] = PIPE_BLEND_ADD;
01061       func[1] = PIPE_BLEND_ADD;
01062       func[2] = PIPE_BLEND_ADD;
01063       func[3] = PIPE_BLEND_ADD;
01064    }
01065 
01066 
01067    /* If alpha writing is enabled and the alpha blend mode requires use of
01068     * the alpha factor, calculate the alpha factor.
01069     */
01070    if (((b->colormask & 8) != 0) && need_alpha_factor) {
01071       src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3],
01072                                                     frag[3], pixel[3]);
01073 
01074       /* If the alpha destination blend factor is the same as the alpha source
01075        * blend factor, re-use the previously calculated value.
01076        */
01077       dst_factor[3] = (dF[3] == sF[3])
01078           ? src_factor[3]
01079           : emit_alpha_factor_calculation(f, dF[3], const_color[3],
01080                                           frag[3], pixel[3]);
01081    }
01082 
01083 
01084    if (sF[0] == sF[3]) {
01085       src_factor[0] = src_factor[3];
01086       src_factor[1] = src_factor[3];
01087       src_factor[2] = src_factor[3];
01088    } else if (sF[0] == dF[3]) {
01089       src_factor[0] = dst_factor[3];
01090       src_factor[1] = dst_factor[3];
01091       src_factor[2] = dst_factor[3];
01092    } else if (need_color_factor) {
01093       emit_color_factor_calculation(f,
01094                                     b->rgb_src_factor,
01095                                     b->colormask,
01096                                     frag, pixel, const_color, src_factor);
01097    }
01098 
01099 
01100    if (dF[0] == sF[3]) {
01101       dst_factor[0] = src_factor[3];
01102       dst_factor[1] = src_factor[3];
01103       dst_factor[2] = src_factor[3];
01104    } else if (dF[0] == dF[3]) {
01105       dst_factor[0] = dst_factor[3];
01106       dst_factor[1] = dst_factor[3];
01107       dst_factor[2] = dst_factor[3];
01108    } else if (dF[0] == sF[0]) {
01109       dst_factor[0] = src_factor[0];
01110       dst_factor[1] = src_factor[1];
01111       dst_factor[2] = src_factor[2];
01112    } else if (need_color_factor) {
01113       emit_color_factor_calculation(f,
01114                                     b->rgb_dst_factor,
01115                                     b->colormask,
01116                                     frag, pixel, const_color, dst_factor);
01117    }
01118 
01119 
01120 
01121    for (i = 0; i < 4; ++i) {
01122       if ((b->colormask & (1U << i)) != 0) {
01123          emit_blend_calculation(f,
01124                                 func[i], sF[i], dF[i],
01125                                 frag[i], src_factor[i],
01126                                 pixel[i], dst_factor[i]);
01127       }
01128    }
01129 
01130    spe_bi(f, 0, 0, 0);
01131 
01132 #if 0
01133    {
01134       const uint32_t *p = f->store;
01135 
01136       printf("# %u instructions\n", f->csr - f->store);
01137       printf("# blend (%sabled)\n",
01138              (cb->base.blend_enable) ? "en" : "dis");
01139       printf("#    RGB func / sf / df: %u %u %u\n",
01140              cb->base.rgb_func,
01141              cb->base.rgb_src_factor,
01142              cb->base.rgb_dst_factor);
01143       printf("#    ALP func / sf / df: %u %u %u\n",
01144              cb->base.alpha_func,
01145              cb->base.alpha_src_factor,
01146              cb->base.alpha_dst_factor);
01147 
01148       printf("\t.text\n");
01149       for (/* empty */; p < f->csr; p++) {
01150          printf("\t.long\t0x%04x\n", *p);
01151       }
01152       fflush(stdout);
01153    }
01154 #endif
01155 }

void cell_generate_depth_stencil_test ( struct cell_depth_stencil_alpha_state cdsa  ) 

Definition at line 466 of file cell_state_per_fragment.c.

References pipe_depth_stencil_alpha_state::alpha, cell_depth_stencil_alpha_state::base, cell_depth_stencil_alpha_state::code, pipe_depth_stencil_alpha_state::depth, emit_alpha_test(), emit_depth_test(), emit_stencil_test(), pipe_alpha_state::enabled, pipe_depth_state::enabled, pipe_stencil_state::enabled, pipe_stencil_state::fail_op, pipe_stencil_state::func, pipe_depth_state::func, pipe_alpha_state::func, pipe_alpha_state::ref, pipe_stencil_state::ref_value, spe_allocate_available_register(), spe_allocate_register(), spe_and(), spe_andc(), spe_bi(), spe_init_func(), SPE_INST_SIZE, spe_or(), spe_release_register(), spe_selb(), pipe_depth_stencil_alpha_state::stencil, spe_function::store, pipe_stencil_state::value_mask, pipe_stencil_state::write_mask, pipe_depth_state::writemask, pipe_stencil_state::zfail_op, and pipe_stencil_state::zpass_op.

00467 {
00468    struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base;
00469    struct spe_function *const f = &cdsa->code;
00470 
00471    /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
00472     * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions.  Round
00473     * up to 64 to make it a happy power-of-two.
00474     */
00475    spe_init_func(f, SPE_INST_SIZE * 64);
00476 
00477 
00478    /* Allocate registers for the function's input parameters.  Cleverly (and
00479     * clever code is usually dangerous, but I couldn't resist) the generated
00480     * function returns a structure.  Returned structures start with register
00481     * 3, and the structure fields are ordered to match up exactly with the
00482     * input parameters.
00483     */
00484    int mask = spe_allocate_register(f, 3);
00485    int depth = spe_allocate_register(f, 4);
00486    int stencil = spe_allocate_register(f, 5);
00487    int zvals = spe_allocate_register(f, 6);
00488    int frag_a = spe_allocate_register(f, 7);
00489    int facing = spe_allocate_register(f, 8);
00490 
00491    int depth_mask = spe_allocate_available_register(f);
00492 
00493    boolean depth_complement;
00494 
00495 
00496    emit_alpha_test(dsa, f, mask, frag_a);
00497 
00498    depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals);
00499 
00500    if (dsa->stencil[0].enabled) {
00501       const int front_depth_pass = spe_allocate_available_register(f);
00502       int front_stencil = emit_stencil_test(dsa, 0, f, mask,
00503                                             depth_mask, depth_complement,
00504                                             stencil, front_depth_pass);
00505 
00506       if (dsa->stencil[1].enabled) {
00507          const int back_depth_pass = spe_allocate_available_register(f);
00508          int back_stencil = emit_stencil_test(dsa, 1, f, mask,
00509                                               depth_mask,  depth_complement,
00510                                               stencil, back_depth_pass);
00511 
00512          /* If the front facing stencil value and the back facing stencil
00513           * value are stored in the same register, there is no need to select
00514           * a value based on the facing.  This can happen if the stencil value
00515           * was not modified due to the write masks being zero, the stencil
00516           * operations being KEEP, etc.
00517           */
00518          if (front_stencil != back_stencil) {
00519             spe_selb(f, stencil, back_stencil, front_stencil, facing);
00520          }
00521 
00522          if (back_stencil != stencil) {
00523             spe_release_register(f, back_stencil);
00524          }
00525 
00526          if (front_stencil != stencil) {
00527             spe_release_register(f, front_stencil);
00528          }
00529 
00530          spe_selb(f, mask, back_depth_pass, front_depth_pass, facing);
00531 
00532          spe_release_register(f, back_depth_pass);
00533       } else {
00534          if (front_stencil != stencil) {
00535             spe_or(f, stencil, front_stencil, front_stencil);
00536             spe_release_register(f, front_stencil);
00537          }
00538          spe_or(f, mask, front_depth_pass, front_depth_pass);
00539       }
00540 
00541       spe_release_register(f, front_depth_pass);
00542    } else if (dsa->depth.enabled) {
00543       if (depth_complement) {
00544          spe_andc(f, mask, mask, depth_mask);
00545       } else {
00546          spe_and(f, mask, mask, depth_mask);
00547       }
00548    }
00549 
00550    if (dsa->depth.writemask) {
00551          spe_selb(f, depth, depth, zvals, mask);
00552    }
00553 
00554    spe_bi(f, 0, 0, 0);  /* return from function call */
00555 
00556 
00557 #if 0
00558    {
00559       const uint32_t *p = f->store;
00560       unsigned i;
00561 
00562       printf("# alpha (%sabled)\n",
00563              (dsa->alpha.enabled) ? "en" : "dis");
00564       printf("#    func: %u\n", dsa->alpha.func);
00565       printf("#    ref: %.2f\n", dsa->alpha.ref);
00566 
00567       printf("# depth (%sabled)\n",
00568              (dsa->depth.enabled) ? "en" : "dis");
00569       printf("#    func: %u\n", dsa->depth.func);
00570 
00571       for (i = 0; i < 2; i++) {
00572          printf("# %s stencil (%sabled)\n",
00573                 (i == 0) ? "front" : "back",
00574                 (dsa->stencil[i].enabled) ? "en" : "dis");
00575 
00576          printf("#    func: %u\n", dsa->stencil[i].func);
00577          printf("#    op (sf, zf, zp): %u %u %u\n",
00578                 dsa->stencil[i].fail_op,
00579                 dsa->stencil[i].zfail_op,
00580                 dsa->stencil[i].zpass_op);
00581          printf("#    ref value / value mask / write mask: %02x %02x %02x\n",
00582                 dsa->stencil[i].ref_value,
00583                 dsa->stencil[i].value_mask,
00584                 dsa->stencil[i].write_mask);
00585       }
00586 
00587       printf("\t.text\n");
00588       for (/* empty */; p < f->csr; p++) {
00589          printf("\t.long\t0x%04x\n", *p);
00590       }
00591       fflush(stdout);
00592    }
00593 #endif
00594 }

void cell_generate_logic_op ( struct spe_function f,
const struct pipe_blend_state blend,
struct pipe_surface surf 
)

Generate code to perform color conversion and logic op.

Definition at line 1182 of file cell_state_per_fragment.c.

References assert, ASSERT, pipe_blend_state::colormask, pipe_surface::format, pipe_blend_state::logicop_enable, pipe_blend_state::logicop_func, PC_OFFSET(), PIPE_FORMAT_A8R8G8B8_UNORM, PIPE_FORMAT_B8G8R8A8_UNORM, PIPE_LOGICOP_AND, PIPE_LOGICOP_AND_INVERTED, PIPE_LOGICOP_AND_REVERSE, PIPE_LOGICOP_CLEAR, PIPE_LOGICOP_COPY, PIPE_LOGICOP_COPY_INVERTED, PIPE_LOGICOP_EQUIV, PIPE_LOGICOP_INVERT, PIPE_LOGICOP_NAND, PIPE_LOGICOP_NOOP, PIPE_LOGICOP_NOR, PIPE_LOGICOP_OR, PIPE_LOGICOP_OR_INVERTED, PIPE_LOGICOP_OR_REVERSE, PIPE_LOGICOP_SET, PIPE_LOGICOP_XOR, spe_a(), spe_allocate_available_register(), spe_allocate_register(), spe_and(), spe_andc(), spe_bi(), spe_cfltu(), spe_eqv(), spe_il(), spe_ilh(), spe_init_func(), SPE_INST_SIZE, spe_lqr(), spe_nand(), spe_nor(), spe_or(), spe_orc(), spe_selb(), spe_shufb(), spe_xor(), and spe_function::store.

01185 {
01186    const unsigned logic_op = (blend->logicop_enable)
01187        ? blend->logicop_func : PIPE_LOGICOP_COPY;
01188 
01189    /* This code generates a maximum of 37 instructions.  An additional 32
01190     * bytes (equiv. to 8 instructions) are needed for data storage.  Round up
01191     * to 64 to make it a happy power-of-two.
01192     */
01193    spe_init_func(f, SPE_INST_SIZE * 64);
01194 
01195 
01196    /* Pixel colors in framebuffer format in AoS layout.
01197     */
01198    const int pixel[4] = {
01199       spe_allocate_register(f, 3),
01200       spe_allocate_register(f, 4),
01201       spe_allocate_register(f, 5),
01202       spe_allocate_register(f, 6),
01203    };
01204 
01205    /* Fragment colors stored as floats in SoA layout.
01206     */
01207    const int frag[4] = {
01208       spe_allocate_register(f, 7),
01209       spe_allocate_register(f, 8),
01210       spe_allocate_register(f, 9),
01211       spe_allocate_register(f, 10),
01212    };
01213 
01214    const int mask = spe_allocate_register(f, 11);
01215 
01216 
01217    /* Short-circuit the noop and invert cases.
01218     */
01219    if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->colormask == 0)) {
01220       spe_bi(f, 0, 0, 0);
01221       return;
01222    } else if (logic_op == PIPE_LOGICOP_INVERT) {
01223       spe_nor(f, pixel[0], pixel[0], pixel[0]);
01224       spe_nor(f, pixel[1], pixel[1], pixel[1]);
01225       spe_nor(f, pixel[2], pixel[2], pixel[2]);
01226       spe_nor(f, pixel[3], pixel[3], pixel[3]);
01227       spe_bi(f, 0, 0, 0);
01228       return;
01229    }
01230 
01231 
01232    const int tmp[4] = {
01233       spe_allocate_available_register(f),
01234       spe_allocate_available_register(f),
01235       spe_allocate_available_register(f),
01236       spe_allocate_available_register(f),
01237    };
01238 
01239    const int shuf_xpose_hi = spe_allocate_available_register(f);
01240    const int shuf_xpose_lo = spe_allocate_available_register(f);
01241    const int shuf_color = spe_allocate_available_register(f);
01242 
01243 
01244    /* Pointer to the begining of the function's private data area.
01245     */
01246    uint32_t *const data = ((uint32_t *) f->store) + (64 - 8);
01247 
01248 
01249    /* Convert fragment colors to framebuffer format in AoS layout.
01250     */
01251    switch (surf->format) {
01252    case PIPE_FORMAT_A8R8G8B8_UNORM:
01253       data[0] = 0x00010203;
01254       data[1] = 0x10111213;
01255       data[2] = 0x04050607;
01256       data[3] = 0x14151617;
01257       data[4] = 0x0c000408;
01258       data[5] = 0x80808080;
01259       data[6] = 0x80808080;
01260       data[7] = 0x80808080;
01261       break;
01262    case PIPE_FORMAT_B8G8R8A8_UNORM:
01263       data[0] = 0x03020100;
01264       data[1] = 0x13121110;
01265       data[2] = 0x07060504;
01266       data[3] = 0x17161514;
01267       data[4] = 0x0804000c;
01268       data[5] = 0x80808080;
01269       data[6] = 0x80808080;
01270       data[7] = 0x80808080;
01271       break;
01272    default:
01273       fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()");
01274       ASSERT(0);
01275    }
01276 
01277    spe_ilh(f, tmp[0], 0x0808);
01278    spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0));
01279    spe_lqr(f, shuf_color, PC_OFFSET(f, data+4));
01280    spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]);
01281 
01282    spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi);
01283    spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo);
01284    spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi);
01285    spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo);
01286 
01287    spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi);
01288    spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo);
01289    spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi);
01290    spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo);
01291 
01292    spe_cfltu(f, frag[0], frag[0], 32);
01293    spe_cfltu(f, frag[1], frag[1], 32);
01294    spe_cfltu(f, frag[2], frag[2], 32);
01295    spe_cfltu(f, frag[3], frag[3], 32);
01296 
01297    spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color);
01298    spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color);
01299    spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color);
01300    spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color);
01301 
01302 
01303    /* If logic op is enabled, perform the requested logical operation on the
01304     * converted fragment colors and the pixel colors.
01305     */
01306    switch (logic_op) {
01307    case PIPE_LOGICOP_CLEAR:
01308       spe_il(f, frag[0], 0);
01309       spe_il(f, frag[1], 0);
01310       spe_il(f, frag[2], 0);
01311       spe_il(f, frag[3], 0);
01312       break;
01313    case PIPE_LOGICOP_NOR:
01314       spe_nor(f, frag[0], frag[0], pixel[0]);
01315       spe_nor(f, frag[1], frag[1], pixel[1]);
01316       spe_nor(f, frag[2], frag[2], pixel[2]);
01317       spe_nor(f, frag[3], frag[3], pixel[3]);
01318       break;
01319    case PIPE_LOGICOP_AND_INVERTED:
01320       spe_andc(f, frag[0], pixel[0], frag[0]);
01321       spe_andc(f, frag[1], pixel[1], frag[1]);
01322       spe_andc(f, frag[2], pixel[2], frag[2]);
01323       spe_andc(f, frag[3], pixel[3], frag[3]);
01324       break;
01325    case PIPE_LOGICOP_COPY_INVERTED:
01326       spe_nor(f, frag[0], frag[0], frag[0]);
01327       spe_nor(f, frag[1], frag[1], frag[1]);
01328       spe_nor(f, frag[2], frag[2], frag[2]);
01329       spe_nor(f, frag[3], frag[3], frag[3]);
01330       break;
01331    case PIPE_LOGICOP_AND_REVERSE:
01332       spe_andc(f, frag[0], frag[0], pixel[0]);
01333       spe_andc(f, frag[1], frag[1], pixel[1]);
01334       spe_andc(f, frag[2], frag[2], pixel[2]);
01335       spe_andc(f, frag[3], frag[3], pixel[3]);
01336       break;
01337    case PIPE_LOGICOP_XOR:
01338       spe_xor(f, frag[0], frag[0], pixel[0]);
01339       spe_xor(f, frag[1], frag[1], pixel[1]);
01340       spe_xor(f, frag[2], frag[2], pixel[2]);
01341       spe_xor(f, frag[3], frag[3], pixel[3]);
01342       break;
01343    case PIPE_LOGICOP_NAND:
01344       spe_nand(f, frag[0], frag[0], pixel[0]);
01345       spe_nand(f, frag[1], frag[1], pixel[1]);
01346       spe_nand(f, frag[2], frag[2], pixel[2]);
01347       spe_nand(f, frag[3], frag[3], pixel[3]);
01348       break;
01349    case PIPE_LOGICOP_AND:
01350       spe_and(f, frag[0], frag[0], pixel[0]);
01351       spe_and(f, frag[1], frag[1], pixel[1]);
01352       spe_and(f, frag[2], frag[2], pixel[2]);
01353       spe_and(f, frag[3], frag[3], pixel[3]);
01354       break;
01355    case PIPE_LOGICOP_EQUIV:
01356       spe_eqv(f, frag[0], frag[0], pixel[0]);
01357       spe_eqv(f, frag[1], frag[1], pixel[1]);
01358       spe_eqv(f, frag[2], frag[2], pixel[2]);
01359       spe_eqv(f, frag[3], frag[3], pixel[3]);
01360       break;
01361    case PIPE_LOGICOP_OR_INVERTED:
01362       spe_orc(f, frag[0], pixel[0], frag[0]);
01363       spe_orc(f, frag[1], pixel[1], frag[1]);
01364       spe_orc(f, frag[2], pixel[2], frag[2]);
01365       spe_orc(f, frag[3], pixel[3], frag[3]);
01366       break;
01367    case PIPE_LOGICOP_COPY:
01368       break;
01369    case PIPE_LOGICOP_OR_REVERSE:
01370       spe_orc(f, frag[0], frag[0], pixel[0]);
01371       spe_orc(f, frag[1], frag[1], pixel[1]);
01372       spe_orc(f, frag[2], frag[2], pixel[2]);
01373       spe_orc(f, frag[3], frag[3], pixel[3]);
01374       break;
01375    case PIPE_LOGICOP_OR:
01376       spe_or(f, frag[0], frag[0], pixel[0]);
01377       spe_or(f, frag[1], frag[1], pixel[1]);
01378       spe_or(f, frag[2], frag[2], pixel[2]);
01379       spe_or(f, frag[3], frag[3], pixel[3]);
01380       break;
01381    case PIPE_LOGICOP_SET:
01382       spe_il(f, frag[0], ~0);
01383       spe_il(f, frag[1], ~0);
01384       spe_il(f, frag[2], ~0);
01385       spe_il(f, frag[3], ~0);
01386       break;
01387 
01388    /* These two cases are short-circuited above.
01389     */
01390    case PIPE_LOGICOP_INVERT:
01391    case PIPE_LOGICOP_NOOP:
01392    default:
01393       assert(0);
01394    }
01395 
01396 
01397    /* Apply fragment mask.
01398     */
01399    spe_ilh(f, tmp[0], 0x0000);
01400    spe_ilh(f, tmp[1], 0x0404);
01401    spe_ilh(f, tmp[2], 0x0808);
01402    spe_ilh(f, tmp[3], 0x0c0c);
01403 
01404    spe_shufb(f, tmp[0], mask, mask, tmp[0]);
01405    spe_shufb(f, tmp[1], mask, mask, tmp[1]);
01406    spe_shufb(f, tmp[2], mask, mask, tmp[2]);
01407    spe_shufb(f, tmp[3], mask, mask, tmp[3]);
01408 
01409    spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]);
01410    spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]);
01411    spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]);
01412    spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]);
01413 
01414    spe_bi(f, 0, 0, 0);
01415 
01416 #if 0
01417    {
01418       const uint32_t *p = f->store;
01419       unsigned i;
01420 
01421       printf("# %u instructions\n", f->csr - f->store);
01422 
01423       printf("\t.text\n");
01424       for (i = 0; i < 64; i++) {
01425          printf("\t.long\t0x%04x\n", p[i]);
01426       }
01427       fflush(stdout);
01428    }
01429 #endif
01430 }

static int emit_alpha_factor_calculation ( struct spe_function f,
unsigned  factor,
int  src_alpha,
int  dst_alpha,
int  const_alpha 
) [static]

Note:
Emits a maximum of 3 instructions

Definition at line 601 of file cell_state_per_fragment.c.

References assert, PIPE_BLENDFACTOR_CONST_ALPHA, PIPE_BLENDFACTOR_DST_ALPHA, PIPE_BLENDFACTOR_INV_CONST_ALPHA, PIPE_BLENDFACTOR_INV_DST_ALPHA, PIPE_BLENDFACTOR_INV_SRC1_ALPHA, PIPE_BLENDFACTOR_INV_SRC_ALPHA, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_SRC1_ALPHA, PIPE_BLENDFACTOR_SRC_ALPHA, PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE, PIPE_BLENDFACTOR_ZERO, spe_allocate_available_register(), spe_cuflt(), spe_fs(), spe_il(), spe_or(), and spe_release_register().

00604 {
00605    int factor_reg;
00606    int tmp;
00607 
00608 
00609    switch (factor) {
00610    case PIPE_BLENDFACTOR_ONE:
00611       factor_reg = -1;
00612       break;
00613 
00614    case PIPE_BLENDFACTOR_SRC_ALPHA:
00615       factor_reg = spe_allocate_available_register(f);
00616 
00617       spe_or(f, factor_reg, src_alpha, src_alpha);
00618       break;
00619 
00620    case PIPE_BLENDFACTOR_DST_ALPHA:
00621       factor_reg = dst_alpha;
00622       break;
00623 
00624    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
00625       factor_reg = -1;
00626       break;
00627 
00628    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
00629       factor_reg = spe_allocate_available_register(f);
00630 
00631       tmp = spe_allocate_available_register(f);
00632       spe_il(f, tmp, 1);
00633       spe_cuflt(f, tmp, tmp, 0);
00634       spe_fs(f, factor_reg, tmp, const_alpha);
00635       spe_release_register(f, tmp);
00636       break;
00637 
00638    case PIPE_BLENDFACTOR_CONST_ALPHA:
00639       factor_reg = const_alpha;
00640       break;
00641 
00642    case PIPE_BLENDFACTOR_ZERO:
00643       factor_reg = -1;
00644       break;
00645 
00646    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
00647       tmp = spe_allocate_available_register(f);
00648       factor_reg = spe_allocate_available_register(f);
00649 
00650       spe_il(f, tmp, 1);
00651       spe_cuflt(f, tmp, tmp, 0);
00652       spe_fs(f, factor_reg, tmp, src_alpha);
00653 
00654       spe_release_register(f, tmp);
00655       break;
00656 
00657    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
00658       tmp = spe_allocate_available_register(f);
00659       factor_reg = spe_allocate_available_register(f);
00660 
00661       spe_il(f, tmp, 1);
00662       spe_cuflt(f, tmp, tmp, 0);
00663       spe_fs(f, factor_reg, tmp, dst_alpha);
00664 
00665       spe_release_register(f, tmp);
00666       break;
00667 
00668    case PIPE_BLENDFACTOR_SRC1_ALPHA:
00669    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
00670    default:
00671       assert(0);
00672       factor_reg = -1;
00673       break;
00674    }
00675 
00676    return factor_reg;
00677 }

static void emit_alpha_test ( struct pipe_depth_stencil_alpha_state dsa,
struct spe_function f,
int  mask,
int  alphas 
) [static]

Generate code to perform alpha testing.

The code generated by this function uses the register specificed by mask as both an input and an output.

Parameters:
dsa Current alpha-test state
f Function to which code should be appended
mask Index of register containing active fragment mask
alphas Index of register containing per-fragment alpha values
Note:
Emits a maximum of 6 instructions.

Definition at line 61 of file cell_state_per_fragment.c.

References pipe_depth_stencil_alpha_state::alpha, assert, pipe_alpha_state::enabled, FALSE, pipe_alpha_state::func, PIPE_FUNC_ALWAYS, PIPE_FUNC_EQUAL, PIPE_FUNC_GEQUAL, PIPE_FUNC_GREATER, PIPE_FUNC_LEQUAL, PIPE_FUNC_LESS, PIPE_FUNC_NEVER, PIPE_FUNC_NOTEQUAL, pipe_alpha_state::ref, spe_allocate_available_register(), spe_and(), spe_andc(), spe_fceq(), spe_fcgt(), spe_il(), spe_ilh(), spe_or(), spe_release_register(), and TRUE.

00063 {
00064    /* If the alpha function is either NEVER or ALWAYS, there is no need to
00065     * load the reference value into a register.  ALWAYS is a fairly common
00066     * case, and this optimization saves 2 instructions.
00067     */
00068    if (dsa->alpha.enabled
00069        && (dsa->alpha.func != PIPE_FUNC_NEVER)
00070        && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
00071       int ref = spe_allocate_available_register(f);
00072       int tmp_a = spe_allocate_available_register(f);
00073       int tmp_b = spe_allocate_available_register(f);
00074       union {
00075          float f;
00076          unsigned u;
00077       } ref_val;
00078       boolean complement = FALSE;
00079 
00080       ref_val.f = dsa->alpha.ref;
00081 
00082       spe_il(f, ref, ref_val.u & 0x0000ffff);
00083       spe_ilh(f, ref, ref_val.u >> 16);
00084 
00085       switch (dsa->alpha.func) {
00086       case PIPE_FUNC_NOTEQUAL:
00087          complement = TRUE;
00088          /* FALLTHROUGH */
00089 
00090       case PIPE_FUNC_EQUAL:
00091          spe_fceq(f, tmp_a, ref, alphas);
00092          break;
00093 
00094       case PIPE_FUNC_LEQUAL:
00095          complement = TRUE;
00096          /* FALLTHROUGH */
00097 
00098       case PIPE_FUNC_GREATER:
00099          spe_fcgt(f, tmp_a, ref, alphas);
00100          break;
00101 
00102       case PIPE_FUNC_LESS:
00103          complement = TRUE;
00104          /* FALLTHROUGH */
00105 
00106       case PIPE_FUNC_GEQUAL:
00107          spe_fcgt(f, tmp_a, ref, alphas);
00108          spe_fceq(f, tmp_b, ref, alphas);
00109          spe_or(f, tmp_a, tmp_b, tmp_a);
00110          break;
00111 
00112       case PIPE_FUNC_ALWAYS:
00113       case PIPE_FUNC_NEVER:
00114       default:
00115          assert(0);
00116          break;
00117       }
00118 
00119       if (complement) {
00120          spe_andc(f, mask, mask, tmp_a);
00121       } else {
00122          spe_and(f, mask, mask, tmp_a);
00123       }
00124 
00125       spe_release_register(f, ref);
00126       spe_release_register(f, tmp_a);
00127       spe_release_register(f, tmp_b);
00128    } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) {
00129       spe_il(f, mask, 0);
00130    }
00131 }

static void emit_blend_calculation ( struct spe_function f,
unsigned  func,
unsigned  sF,
unsigned  dF,
int  src,
int  src_factor,
int  dst,
int  dst_factor 
) [static]

Definition at line 859 of file cell_state_per_fragment.c.

References assert, PIPE_BLEND_ADD, PIPE_BLEND_MAX, PIPE_BLEND_MIN, PIPE_BLEND_REVERSE_SUBTRACT, PIPE_BLEND_SUBTRACT, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_ZERO, spe_allocate_available_register(), spe_cgt(), spe_fa(), spe_fm(), spe_fma(), spe_fms(), spe_fs(), spe_il(), spe_or(), spe_release_register(), and spe_selb().

00862 {
00863    int tmp = spe_allocate_available_register(f);
00864 
00865    switch (func) {
00866    case PIPE_BLEND_ADD:
00867       if (sF == PIPE_BLENDFACTOR_ONE) {
00868          if (dF == PIPE_BLENDFACTOR_ZERO) {
00869             /* Do nothing. */
00870          } else if (dF == PIPE_BLENDFACTOR_ONE) {
00871             spe_fa(f, src, src, dst);
00872          }
00873       } else if (sF == PIPE_BLENDFACTOR_ZERO) {
00874          if (dF == PIPE_BLENDFACTOR_ZERO) {
00875             spe_il(f, src, 0);
00876          } else if (dF == PIPE_BLENDFACTOR_ONE) {
00877             spe_or(f, src, dst, dst);
00878          } else {
00879             spe_fm(f, src, dst, dst_factor);
00880          }
00881       } else if (dF == PIPE_BLENDFACTOR_ZERO) {
00882          spe_fm(f, src, src, src_factor);
00883       } else {
00884          spe_fm(f, tmp, dst, dst_factor);
00885          spe_fma(f, src, src, src_factor, tmp);
00886       }
00887       break;
00888 
00889    case PIPE_BLEND_SUBTRACT:
00890       if (sF == PIPE_BLENDFACTOR_ONE) {
00891          if (dF == PIPE_BLENDFACTOR_ZERO) {
00892             /* Do nothing. */
00893          } else if (dF == PIPE_BLENDFACTOR_ONE) {
00894             spe_fs(f, src, src, dst);
00895          }
00896       } else if (sF == PIPE_BLENDFACTOR_ZERO) {
00897          if (dF == PIPE_BLENDFACTOR_ZERO) {
00898             spe_il(f, src, 0);
00899          } else if (dF == PIPE_BLENDFACTOR_ONE) {
00900             spe_il(f, tmp, 0);
00901             spe_fs(f, src, tmp, dst);
00902          } else {
00903             spe_fm(f, src, dst, dst_factor);
00904          }
00905       } else if (dF == PIPE_BLENDFACTOR_ZERO) {
00906          spe_fm(f, src, src, src_factor);
00907       } else {
00908          spe_fm(f, tmp, dst, dst_factor);
00909          spe_fms(f, src, src, src_factor, tmp);
00910       }
00911       break;
00912 
00913    case PIPE_BLEND_REVERSE_SUBTRACT:
00914       if (sF == PIPE_BLENDFACTOR_ONE) {
00915          if (dF == PIPE_BLENDFACTOR_ZERO) {
00916             spe_il(f, tmp, 0);
00917             spe_fs(f, src, tmp, src);
00918          } else if (dF == PIPE_BLENDFACTOR_ONE) {
00919             spe_fs(f, src, dst, src);
00920          }
00921       } else if (sF == PIPE_BLENDFACTOR_ZERO) {
00922          if (dF == PIPE_BLENDFACTOR_ZERO) {
00923             spe_il(f, src, 0);
00924          } else if (dF == PIPE_BLENDFACTOR_ONE) {
00925             spe_or(f, src, dst, dst);
00926          } else {
00927             spe_fm(f, src, dst, dst_factor);
00928          }
00929       } else if (dF == PIPE_BLENDFACTOR_ZERO) {
00930          spe_fm(f, src, src, src_factor);
00931       } else {
00932          spe_fm(f, tmp, src, src_factor);
00933          spe_fms(f, src, src, dst_factor, tmp);
00934       }
00935       break;
00936 
00937    case PIPE_BLEND_MIN:
00938       spe_cgt(f, tmp, src, dst);
00939       spe_selb(f, src, src, dst, tmp);
00940       break;
00941 
00942    case PIPE_BLEND_MAX:
00943       spe_cgt(f, tmp, src, dst);
00944       spe_selb(f, src, dst, src, tmp);
00945       break;
00946 
00947    default:
00948       assert(0);
00949    }
00950 
00951    spe_release_register(f, tmp);
00952 }

static void emit_color_factor_calculation ( struct spe_function f,
unsigned  sF,
unsigned  mask,
const int *  src,
const int *  dst,
const int *  const_color,
int *  factor 
) [static]

Note:
Emits a maximum of 6 instructions

Definition at line 684 of file cell_state_per_fragment.c.

References assert, PIPE_BLENDFACTOR_CONST_ALPHA, PIPE_BLENDFACTOR_CONST_COLOR, PIPE_BLENDFACTOR_DST_ALPHA, PIPE_BLENDFACTOR_DST_COLOR, PIPE_BLENDFACTOR_INV_CONST_ALPHA, PIPE_BLENDFACTOR_INV_CONST_COLOR, PIPE_BLENDFACTOR_INV_DST_ALPHA, PIPE_BLENDFACTOR_INV_DST_COLOR, PIPE_BLENDFACTOR_INV_SRC1_ALPHA, PIPE_BLENDFACTOR_INV_SRC1_COLOR, PIPE_BLENDFACTOR_INV_SRC_ALPHA, PIPE_BLENDFACTOR_INV_SRC_COLOR, PIPE_BLENDFACTOR_ONE, PIPE_BLENDFACTOR_SRC1_ALPHA, PIPE_BLENDFACTOR_SRC1_COLOR, PIPE_BLENDFACTOR_SRC_ALPHA, PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE, PIPE_BLENDFACTOR_SRC_COLOR, PIPE_BLENDFACTOR_ZERO, spe_allocate_available_register(), spe_cuflt(), spe_fcgt(), spe_fs(), spe_il(), spe_or(), spe_release_register(), and spe_selb().

00690 {
00691    int tmp;
00692    unsigned i;
00693 
00694 
00695    factor[0] = -1;
00696    factor[1] = -1;
00697    factor[2] = -1;
00698    factor[3] = -1;
00699 
00700    switch (sF) {
00701    case PIPE_BLENDFACTOR_ONE:
00702       break;
00703 
00704    case PIPE_BLENDFACTOR_SRC_COLOR:
00705       for (i = 0; i < 3; ++i) {
00706          if ((mask & (1U << i)) != 0) {
00707             factor[i] = spe_allocate_available_register(f);
00708             spe_or(f, factor[i], src[i], src[i]);
00709          }
00710       }
00711       break;
00712 
00713    case PIPE_BLENDFACTOR_SRC_ALPHA:
00714       factor[0] = spe_allocate_available_register(f);
00715       factor[1] = factor[0];
00716       factor[2] = factor[0];
00717 
00718       spe_or(f, factor[0], src[3], src[3]);
00719       break;
00720 
00721    case PIPE_BLENDFACTOR_DST_ALPHA:
00722       factor[0] = dst[3];
00723       factor[1] = dst[3];
00724       factor[2] = dst[3];
00725       break;
00726 
00727    case PIPE_BLENDFACTOR_DST_COLOR:
00728       factor[0] = dst[0];
00729       factor[1] = dst[1];
00730       factor[2] = dst[2];
00731       break;
00732 
00733    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
00734       tmp = spe_allocate_available_register(f);
00735       factor[0] = spe_allocate_available_register(f);
00736       factor[1] = factor[0];
00737       factor[2] = factor[0];
00738 
00739       /* Alpha saturate means min(As, 1-Ad).
00740        */
00741       spe_il(f, tmp, 1);
00742       spe_cuflt(f, tmp, tmp, 0);
00743       spe_fs(f, tmp, tmp, dst[3]);
00744       spe_fcgt(f, factor[0], tmp, src[3]);
00745       spe_selb(f, factor[0], src[3], tmp, factor[0]);
00746 
00747       spe_release_register(f, tmp);
00748       break;
00749 
00750    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
00751       tmp = spe_allocate_available_register(f);
00752       spe_il(f, tmp, 1);
00753       spe_cuflt(f, tmp, tmp, 0);
00754 
00755       for (i = 0; i < 3; i++) {
00756          factor[i] = spe_allocate_available_register(f);
00757 
00758          spe_fs(f, factor[i], tmp, const_color[i]);
00759       }
00760       spe_release_register(f, tmp);
00761       break;
00762 
00763    case PIPE_BLENDFACTOR_CONST_COLOR:
00764       for (i = 0; i < 3; i++) {
00765          factor[i] = const_color[i];
00766       }
00767       break;
00768 
00769    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
00770       factor[0] = spe_allocate_available_register(f);
00771       factor[1] = factor[0];
00772       factor[2] = factor[0];
00773 
00774       tmp = spe_allocate_available_register(f);
00775       spe_il(f, tmp, 1);
00776       spe_cuflt(f, tmp, tmp, 0);
00777       spe_fs(f, factor[0], tmp, const_color[3]);
00778       spe_release_register(f, tmp);
00779       break;
00780 
00781    case PIPE_BLENDFACTOR_CONST_ALPHA:
00782       factor[0] = const_color[3];
00783       factor[1] = factor[0];
00784       factor[2] = factor[0];
00785       break;
00786 
00787    case PIPE_BLENDFACTOR_ZERO:
00788       break;
00789 
00790    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
00791       tmp = spe_allocate_available_register(f);
00792 
00793       spe_il(f, tmp, 1);
00794       spe_cuflt(f, tmp, tmp, 0);
00795 
00796       for (i = 0; i < 3; ++i) {
00797          if ((mask & (1U << i)) != 0) {
00798             factor[i] = spe_allocate_available_register(f);
00799             spe_fs(f, factor[i], tmp, src[i]);
00800          }
00801       }
00802 
00803       spe_release_register(f, tmp);
00804       break;
00805 
00806    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
00807       tmp = spe_allocate_available_register(f);
00808       factor[0] = spe_allocate_available_register(f);
00809       factor[1] = factor[0];
00810       factor[2] = factor[0];
00811 
00812       spe_il(f, tmp, 1);
00813       spe_cuflt(f, tmp, tmp, 0);
00814       spe_fs(f, factor[0], tmp, src[3]);
00815 
00816       spe_release_register(f, tmp);
00817       break;
00818 
00819    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
00820       tmp = spe_allocate_available_register(f);
00821       factor[0] = spe_allocate_available_register(f);
00822       factor[1] = factor[0];
00823       factor[2] = factor[0];
00824 
00825       spe_il(f, tmp, 1);
00826       spe_cuflt(f, tmp, tmp, 0);
00827       spe_fs(f, factor[0], tmp, dst[3]);
00828 
00829       spe_release_register(f, tmp);
00830       break;
00831 
00832    case PIPE_BLENDFACTOR_INV_DST_COLOR:
00833       tmp = spe_allocate_available_register(f);
00834 
00835       spe_il(f, tmp, 1);
00836       spe_cuflt(f, tmp, tmp, 0);
00837 
00838       for (i = 0; i < 3; ++i) {
00839          if ((mask & (1U << i)) != 0) {
00840             factor[i] = spe_allocate_available_register(f);
00841             spe_fs(f, factor[i], tmp, dst[i]);
00842          }
00843       }
00844 
00845       spe_release_register(f, tmp);
00846       break;
00847 
00848    case PIPE_BLENDFACTOR_SRC1_COLOR:
00849    case PIPE_BLENDFACTOR_SRC1_ALPHA:
00850    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
00851    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
00852    default:
00853       assert(0);
00854    }
00855 }

static boolean emit_depth_test ( struct pipe_depth_stencil_alpha_state dsa,
struct spe_function f,
int  mask,
int  stored,
int  calculated 
) [static]

Generate code to perform Z testing.

Four Z values are tested at once.

Parameters:
dsa Current depth-test state
f Function to which code should be appended
mask Index of register to contain depth-pass mask
stored Index of register containing values from depth buffer
calculated Index of register containing per-fragment depth values
Returns:
If the calculated depth comparison mask is the actual mask, FALSE is returned. If the calculated depth comparison mask is the compliment of the actual mask, TRUE is returned.
Note:
Emits a maximum of 3 instructions.

Definition at line 150 of file cell_state_per_fragment.c.

References assert, pipe_depth_stencil_alpha_state::depth, pipe_depth_state::enabled, FALSE, pipe_depth_state::func, PIPE_FUNC_ALWAYS, PIPE_FUNC_EQUAL, PIPE_FUNC_GEQUAL, PIPE_FUNC_GREATER, PIPE_FUNC_LEQUAL, PIPE_FUNC_LESS, PIPE_FUNC_NEVER, PIPE_FUNC_NOTEQUAL, spe_allocate_available_register(), spe_ceq(), spe_clgt(), spe_il(), spe_or(), spe_release_register(), and TRUE.

00152 {
00153    unsigned func = (dsa->depth.enabled)
00154        ? dsa->depth.func : PIPE_FUNC_ALWAYS;
00155    int tmp = spe_allocate_available_register(f);
00156    boolean compliment = FALSE;
00157 
00158    switch (func) {
00159    case PIPE_FUNC_NEVER:
00160       spe_il(f, mask, 0);
00161       break;
00162 
00163    case PIPE_FUNC_NOTEQUAL:
00164       compliment = TRUE;
00165       /* FALLTHROUGH */
00166    case PIPE_FUNC_EQUAL:
00167       spe_ceq(f, mask, calculated, stored);
00168       break;
00169 
00170    case PIPE_FUNC_LEQUAL:
00171       compliment = TRUE;
00172       /* FALLTHROUGH */
00173    case PIPE_FUNC_GREATER:
00174       spe_clgt(f, mask, calculated, stored);
00175       break;
00176 
00177    case PIPE_FUNC_LESS:
00178       compliment = TRUE;
00179       /* FALLTHROUGH */
00180    case PIPE_FUNC_GEQUAL:
00181       spe_clgt(f, mask, calculated, stored);
00182       spe_ceq(f, tmp, calculated, stored);
00183       spe_or(f, mask, mask, tmp);
00184       break;
00185 
00186    case PIPE_FUNC_ALWAYS:
00187       spe_il(f, mask, ~0);
00188       break;
00189 
00190    default:
00191       assert(0);
00192       break;
00193    }
00194 
00195    spe_release_register(f, tmp);
00196    return compliment;
00197 }

static void emit_stencil_op ( struct spe_function f,
int  out,
int  in,
int  mask,
unsigned  op,
unsigned  ref 
) [static]

Generate code to apply the stencil operation (after testing).

Note:
Emits a maximum of 5 instructions.
Warning:
Since out and in might be the same register, this routine cannot generate code that uses out as a temporary.

Definition at line 209 of file cell_state_per_fragment.c.

References assert, PIPE_STENCIL_OP_DECR, PIPE_STENCIL_OP_DECR_WRAP, PIPE_STENCIL_OP_INCR, PIPE_STENCIL_OP_INCR_WRAP, PIPE_STENCIL_OP_INVERT, PIPE_STENCIL_OP_KEEP, PIPE_STENCIL_OP_REPLACE, PIPE_STENCIL_OP_ZERO, spe_ai(), spe_allocate_available_register(), spe_clgti(), spe_il(), spe_nor(), spe_release_register(), and spe_selb().

00211 {
00212    const int clamp = spe_allocate_available_register(f);
00213    const int clamp_mask = spe_allocate_available_register(f);
00214    const int result = spe_allocate_available_register(f);
00215 
00216    switch(op) {
00217    case PIPE_STENCIL_OP_KEEP:
00218       assert(0);
00219    case PIPE_STENCIL_OP_ZERO:
00220       spe_il(f, result, 0);
00221       break;
00222    case PIPE_STENCIL_OP_REPLACE:
00223       spe_il(f, result, ref);
00224       break;
00225    case PIPE_STENCIL_OP_INCR:
00226       /* clamp = [0xff, 0xff, 0xff, 0xff] */
00227       spe_il(f, clamp, 0x0ff);
00228       /* result[i] = in[i] + 1 */
00229       spe_ai(f, result, in, 1);
00230       /* clamp_mask[i] = (result[i] > 0xff) */
00231       spe_clgti(f, clamp_mask, result, 0x0ff);
00232       /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
00233       spe_selb(f, result, result, clamp, clamp_mask);
00234       break;
00235    case PIPE_STENCIL_OP_DECR:
00236       spe_il(f, clamp, 0);
00237       spe_ai(f, result, in, -1);
00238 
00239       /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned
00240        * arithmetic.
00241        */
00242       spe_clgti(f, clamp_mask, result, 0x0ff);
00243       spe_selb(f, result, result, clamp, clamp_mask);
00244       break;
00245    case PIPE_STENCIL_OP_INCR_WRAP:
00246       spe_ai(f, result, in, 1);
00247       break;
00248    case PIPE_STENCIL_OP_DECR_WRAP:
00249       spe_ai(f, result, in, -1);
00250       break;
00251    case PIPE_STENCIL_OP_INVERT:
00252       spe_nor(f, result, in, in);
00253       break;
00254    default:
00255       assert(0);
00256    }
00257 
00258    spe_selb(f, out, in, result, mask);
00259 
00260    spe_release_register(f, result);
00261    spe_release_register(f, clamp_mask);
00262    spe_release_register(f, clamp);
00263 }

static int emit_stencil_test ( struct pipe_depth_stencil_alpha_state dsa,
unsigned  face,
struct spe_function f,
int  mask,
int  depth_mask,
boolean  depth_complement,
int  stencil,
int  depth_pass 
) [static]

Generate code to do stencil test.

Four pixels are tested at once.

Parameters:
dsa Depth / stencil test state
face 0 for front face, 1 for back face
f Function to append instructions to
mask Register containing mask of fragments passing the alpha test
depth_mask Register containing mask of fragments passing the depth test
depth_compliment Is depth_mask the compliment of the actual mask?
stencil Register containing values from stencil buffer
depth_pass Register to store mask of fragments passing stencil test and depth test
Note:
Emits a maximum of 10 + (3 * 5) = 25 instructions.

Definition at line 284 of file cell_state_per_fragment.c.

References assert, emit_stencil_op(), pipe_stencil_state::fail_op, FALSE, pipe_stencil_state::func, PIPE_FUNC_ALWAYS, PIPE_FUNC_EQUAL, PIPE_FUNC_GEQUAL, PIPE_FUNC_GREATER, PIPE_FUNC_LEQUAL, PIPE_FUNC_LESS, PIPE_FUNC_NEVER, PIPE_FUNC_NOTEQUAL, PIPE_STENCIL_OP_KEEP, pipe_stencil_state::ref_value, spe_allocate_available_register(), spe_and(), spe_andc(), spe_andi(), spe_ceqi(), spe_clgti(), spe_il(), spe_or(), spe_release_register(), spe_selb(), pipe_depth_stencil_alpha_state::stencil, TRUE, pipe_stencil_state::value_mask, pipe_stencil_state::write_mask, pipe_stencil_state::zfail_op, and pipe_stencil_state::zpass_op.

00292 {
00293    int stencil_fail = spe_allocate_available_register(f);
00294    int depth_fail = spe_allocate_available_register(f);
00295    int stencil_mask = spe_allocate_available_register(f);
00296    int stencil_pass = spe_allocate_available_register(f);
00297    int face_stencil = spe_allocate_available_register(f);
00298    int stencil_src = stencil;
00299    const unsigned ref = (dsa->stencil[face].ref_value
00300                          & dsa->stencil[face].value_mask);
00301    boolean complement = FALSE;
00302    int stored;
00303    int tmp = spe_allocate_available_register(f);
00304 
00305 
00306    if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
00307        && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
00308        && (dsa->stencil[face].value_mask != 0x0ff)) {
00309       stored = spe_allocate_available_register(f);
00310       spe_andi(f, stored, stencil, dsa->stencil[face].value_mask);
00311    } else {
00312       stored = stencil;
00313    }
00314 
00315 
00316    switch (dsa->stencil[face].func) {
00317    case PIPE_FUNC_NEVER:
00318       spe_il(f, stencil_mask, 0);   /* stencil_mask[0..3] = [0,0,0,0] */
00319       break;
00320 
00321    case PIPE_FUNC_NOTEQUAL:
00322       complement = TRUE;
00323       /* FALLTHROUGH */
00324    case PIPE_FUNC_EQUAL:
00325       /* stencil_mask[i] = (stored[i] == ref) */
00326       spe_ceqi(f, stencil_mask, stored, ref);
00327       break;
00328 
00329    case PIPE_FUNC_LEQUAL:
00330       complement = TRUE;
00331       /* FALLTHROUGH */
00332    case PIPE_FUNC_GREATER:
00333       complement = TRUE;
00334       /* stencil_mask[i] = (stored[i] > ref) */
00335       spe_clgti(f, stencil_mask, stored, ref);
00336       break;
00337 
00338    case PIPE_FUNC_LESS:
00339       complement = TRUE;
00340       /* FALLTHROUGH */
00341    case PIPE_FUNC_GEQUAL:
00342       /* stencil_mask[i] = (stored[i] > ref) */
00343       spe_clgti(f, stencil_mask, stored, ref);
00344       /* tmp[i] = (stored[i] == ref) */
00345       spe_ceqi(f, tmp, stored, ref);
00346       /* stencil_mask[i] = stencil_mask[i] | tmp[i] */
00347       spe_or(f, stencil_mask, stencil_mask, tmp);
00348       break;
00349 
00350    case PIPE_FUNC_ALWAYS:
00351       /* See comment below. */
00352       break;
00353 
00354    default:
00355       assert(0);
00356       break;
00357    }
00358 
00359    if (stored != stencil) {
00360       spe_release_register(f, stored);
00361    }
00362    spe_release_register(f, tmp);
00363 
00364 
00365    /* ALWAYS is a very common stencil-test, so some effort is applied to
00366     * optimize that case.  The stencil-pass mask is the same as the input
00367     * fragment mask.  This makes the stencil-test (above) a no-op, and the
00368     * input fragment mask can be "renamed" the stencil-pass mask.
00369     */
00370    if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) {
00371       spe_release_register(f, stencil_pass);
00372       stencil_pass = mask;
00373    } else {
00374       if (complement) {
00375          spe_andc(f, stencil_pass, mask, stencil_mask);
00376       } else {
00377          spe_and(f, stencil_pass, mask, stencil_mask);
00378       }
00379    }
00380 
00381    if (depth_complement) {
00382       spe_andc(f, depth_pass, stencil_pass, depth_mask);
00383    } else {
00384       spe_and(f, depth_pass, stencil_pass, depth_mask);
00385    }
00386 
00387 
00388    /* Conditionally emit code to update the stencil value under various
00389     * condititons.  Note that there is no need to generate code under the
00390     * following circumstances:
00391     *
00392     * - Stencil write mask is zero.
00393     * - For stencil-fail if the stencil test is ALWAYS
00394     * - For depth-fail if the stencil test is NEVER
00395     * - For depth-pass if the stencil test is NEVER
00396     * - Any of the 3 conditions if the operation is KEEP
00397     */
00398    if (dsa->stencil[face].write_mask != 0) {
00399       if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
00400           && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
00401          if (complement) {
00402             spe_and(f, stencil_fail, mask, stencil_mask);
00403          } else {
00404             spe_andc(f, stencil_fail, mask, stencil_mask);
00405          }
00406 
00407          emit_stencil_op(f, face_stencil, stencil_src, stencil_fail,
00408                          dsa->stencil[face].fail_op,
00409                          dsa->stencil[face].ref_value);
00410 
00411          stencil_src = face_stencil;
00412       }
00413 
00414       if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
00415           && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) {
00416          if (depth_complement) {
00417             spe_and(f, depth_fail, stencil_pass, depth_mask);
00418          } else {
00419             spe_andc(f, depth_fail, stencil_pass, depth_mask);
00420          }
00421 
00422          emit_stencil_op(f, face_stencil, stencil_src, depth_fail,
00423                          dsa->stencil[face].zfail_op,
00424                          dsa->stencil[face].ref_value);
00425          stencil_src = face_stencil;
00426       }
00427 
00428       if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
00429           && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) {
00430          emit_stencil_op(f, face_stencil, stencil_src, depth_pass,
00431                          dsa->stencil[face].zpass_op,
00432                          dsa->stencil[face].ref_value);
00433          stencil_src = face_stencil;
00434       }
00435    }
00436 
00437    spe_release_register(f, stencil_fail);
00438    spe_release_register(f, depth_fail);
00439    spe_release_register(f, stencil_mask);
00440    if (stencil_pass != mask) {
00441       spe_release_register(f, stencil_pass);
00442    }
00443 
00444    /* If all of the stencil operations were KEEP or the stencil write mask was
00445     * zero, "stencil_src" will still be set to "stencil".  In this case
00446     * release the "face_stencil" register.  Otherwise apply the stencil write
00447     * mask to select bits from the calculated stencil value and the previous
00448     * stencil value.
00449     */
00450    if (stencil_src == stencil) {
00451       spe_release_register(f, face_stencil);
00452    } else if (dsa->stencil[face].write_mask != 0x0ff) {
00453       int tmp = spe_allocate_available_register(f);
00454 
00455       spe_il(f, tmp, dsa->stencil[face].write_mask);
00456       spe_selb(f, stencil_src, stencil, stencil_src, tmp);
00457 
00458       spe_release_register(f, tmp);
00459    }
00460 
00461    return stencil_src;
00462 }

static int PC_OFFSET ( const struct spe_function f,
const void *  d 
) [static]

Definition at line 1159 of file cell_state_per_fragment.c.

References spe_function::num_inst, and spe_function::store.

01160 {
01161    const intptr_t pc = (intptr_t) &f->store[f->num_inst];
01162    const intptr_t ea = ~0x0f & (intptr_t) d;
01163 
01164    return (ea - pc) >> 2;
01165 }


Generated on Tue Sep 29 06:25:31 2009 for Gallium3D by  doxygen 1.5.4