cell_gen_fragment.c

Go to the documentation of this file.
00001 /**************************************************************************
00002  * 
00003  * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
00004  * All Rights Reserved.
00005  * 
00006  * Permission is hereby granted, free of charge, to any person obtaining a
00007  * copy of this software and associated documentation files (the
00008  * "Software"), to deal in the Software without restriction, including
00009  * without limitation the rights to use, copy, modify, merge, publish,
00010  * distribute, sub license, and/or sell copies of the Software, and to
00011  * permit persons to whom the Software is furnished to do so, subject to
00012  * the following conditions:
00013  * 
00014  * The above copyright notice and this permission notice (including the
00015  * next paragraph) shall be included in all copies or substantial portions
00016  * of the Software.
00017  * 
00018  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00019  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
00021  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
00022  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00023  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00024  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00025  * 
00026  **************************************************************************/
00027 
00028 
00029 
00036 #include "pipe/p_defines.h"
00037 #include "pipe/p_state.h"
00038 #include "rtasm/rtasm_ppc_spe.h"
00039 #include "cell_context.h"
00040 #include "cell_gen_fragment.h"
00041 
00042 
00043 
00045 #define OPTIMIZATIONS 1
00046 
00047 
00058 static void
00059 gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa,
00060                struct spe_function *f,
00061                int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg)
00062 {
00063    ASSERT(dsa->depth.enabled);
00064 
00065    switch (dsa->depth.func) {
00066    case PIPE_FUNC_EQUAL:
00067       /* zmask = (ifragZ == ref) */
00068       spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
00069       /* mask = (mask & zmask) */
00070       spe_and(f, mask_reg, mask_reg, zmask_reg);
00071       break;
00072 
00073    case PIPE_FUNC_NOTEQUAL:
00074       /* zmask = (ifragZ == ref) */
00075       spe_ceq(f, zmask_reg, ifragZ_reg, ifbZ_reg);
00076       /* mask = (mask & ~zmask) */
00077       spe_andc(f, mask_reg, mask_reg, zmask_reg);
00078       break;
00079 
00080    case PIPE_FUNC_GREATER:
00081       /* zmask = (ifragZ > ref) */
00082       spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
00083       /* mask = (mask & zmask) */
00084       spe_and(f, mask_reg, mask_reg, zmask_reg);
00085       break;
00086 
00087    case PIPE_FUNC_LESS:
00088       /* zmask = (ref > ifragZ) */
00089       spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
00090       /* mask = (mask & zmask) */
00091       spe_and(f, mask_reg, mask_reg, zmask_reg);
00092       break;
00093 
00094    case PIPE_FUNC_LEQUAL:
00095       /* zmask = (ifragZ > ref) */
00096       spe_cgt(f, zmask_reg, ifragZ_reg, ifbZ_reg);
00097       /* mask = (mask & ~zmask) */
00098       spe_andc(f, mask_reg, mask_reg, zmask_reg);
00099       break;
00100 
00101    case PIPE_FUNC_GEQUAL:
00102       /* zmask = (ref > ifragZ) */
00103       spe_cgt(f, zmask_reg, ifbZ_reg, ifragZ_reg);
00104       /* mask = (mask & ~zmask) */
00105       spe_andc(f, mask_reg, mask_reg, zmask_reg);
00106       break;
00107 
00108    case PIPE_FUNC_NEVER:
00109       spe_il(f, mask_reg, 0);  /* mask = {0,0,0,0} */
00110       spe_move(f, zmask_reg, mask_reg);  /* zmask = mask */
00111       break;
00112 
00113    case PIPE_FUNC_ALWAYS:
00114       /* mask unchanged */
00115       spe_il(f, zmask_reg, ~0);  /* zmask = {~0,~0,~0,~0} */
00116       break;
00117 
00118    default:
00119       ASSERT(0);
00120       break;
00121    }
00122 
00123    if (dsa->depth.writemask) {
00124       /*
00125        * If (ztest passed) {
00126        *    framebufferZ = fragmentZ;
00127        * }
00128        * OR,
00129        * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ;
00130        */
00131       spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg);
00132    }
00133 }
00134 
00135 
00144 static void
00145 gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa,
00146                struct spe_function *f, int mask_reg, int fragA_reg)
00147 {
00148    int ref_reg = spe_allocate_available_register(f);
00149    int amask_reg = spe_allocate_available_register(f);
00150 
00151    ASSERT(dsa->alpha.enabled);
00152 
00153    if ((dsa->alpha.func != PIPE_FUNC_NEVER) &&
00154        (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
00155       /* load/splat the alpha reference float value */
00156       spe_load_float(f, ref_reg, dsa->alpha.ref);
00157    }
00158 
00159    /* emit code to do the alpha comparison, updating 'mask' */
00160    switch (dsa->alpha.func) {
00161    case PIPE_FUNC_EQUAL:
00162       /* amask = (fragA == ref) */
00163       spe_fceq(f, amask_reg, fragA_reg, ref_reg);
00164       /* mask = (mask & amask) */
00165       spe_and(f, mask_reg, mask_reg, amask_reg);
00166       break;
00167 
00168    case PIPE_FUNC_NOTEQUAL:
00169       /* amask = (fragA == ref) */
00170       spe_fceq(f, amask_reg, fragA_reg, ref_reg);
00171       /* mask = (mask & ~amask) */
00172       spe_andc(f, mask_reg, mask_reg, amask_reg);
00173       break;
00174 
00175    case PIPE_FUNC_GREATER:
00176       /* amask = (fragA > ref) */
00177       spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
00178       /* mask = (mask & amask) */
00179       spe_and(f, mask_reg, mask_reg, amask_reg);
00180       break;
00181 
00182    case PIPE_FUNC_LESS:
00183       /* amask = (ref > fragA) */
00184       spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
00185       /* mask = (mask & amask) */
00186       spe_and(f, mask_reg, mask_reg, amask_reg);
00187       break;
00188 
00189    case PIPE_FUNC_LEQUAL:
00190       /* amask = (fragA > ref) */
00191       spe_fcgt(f, amask_reg, fragA_reg, ref_reg);
00192       /* mask = (mask & ~amask) */
00193       spe_andc(f, mask_reg, mask_reg, amask_reg);
00194       break;
00195 
00196    case PIPE_FUNC_GEQUAL:
00197       /* amask = (ref > fragA) */
00198       spe_fcgt(f, amask_reg, ref_reg, fragA_reg);
00199       /* mask = (mask & ~amask) */
00200       spe_andc(f, mask_reg, mask_reg, amask_reg);
00201       break;
00202 
00203    case PIPE_FUNC_NEVER:
00204       spe_il(f, mask_reg, 0);  /* mask = [0,0,0,0] */
00205       break;
00206 
00207    case PIPE_FUNC_ALWAYS:
00208       /* no-op, mask unchanged */
00209       break;
00210 
00211    default:
00212       ASSERT(0);
00213       break;
00214    }
00215 
00216 #if OPTIMIZATIONS
00217    /* if mask == {0,0,0,0} we're all done, return */
00218    {
00219       /* re-use amask reg here */
00220       int tmp_reg = amask_reg;
00221       /* tmp[0] = (mask[0] | mask[1] | mask[2] | mask[3]) */
00222       spe_orx(f, tmp_reg, mask_reg);
00223       /* if tmp[0] == 0 then return from function call */
00224       spe_biz(f, tmp_reg, SPE_REG_RA, 0, 0);
00225    }
00226 #endif
00227 
00228    spe_release_register(f, ref_reg);
00229    spe_release_register(f, amask_reg);
00230 }
00231 
00232 
00233 
00243 static void
00244 gen_blend(const struct pipe_blend_state *blend,
00245           struct spe_function *f,
00246           enum pipe_format color_format,
00247           int fragR_reg, int fragG_reg, int fragB_reg, int fragA_reg,
00248           int fbRGBA_reg)
00249 {
00250    int term1R_reg = spe_allocate_available_register(f);
00251    int term1G_reg = spe_allocate_available_register(f);
00252    int term1B_reg = spe_allocate_available_register(f);
00253    int term1A_reg = spe_allocate_available_register(f);
00254 
00255    int term2R_reg = spe_allocate_available_register(f);
00256    int term2G_reg = spe_allocate_available_register(f);
00257    int term2B_reg = spe_allocate_available_register(f);
00258    int term2A_reg = spe_allocate_available_register(f);
00259 
00260    int fbR_reg = spe_allocate_available_register(f);
00261    int fbG_reg = spe_allocate_available_register(f);
00262    int fbB_reg = spe_allocate_available_register(f);
00263    int fbA_reg = spe_allocate_available_register(f);
00264 
00265    int one_reg = spe_allocate_available_register(f);
00266    int tmp_reg = spe_allocate_available_register(f);
00267 
00268    boolean one_reg_set = false; /* avoid setting one_reg more than once */
00269 
00270    ASSERT(blend->blend_enable);
00271 
00272    /* Unpack/convert framebuffer colors from four 32-bit packed colors
00273     * (fbRGBA) to four float RGBA vectors (fbR, fbG, fbB, fbA).
00274     * Each 8-bit color component is expanded into a float in [0.0, 1.0].
00275     */
00276    {
00277       int mask_reg = spe_allocate_available_register(f);
00278 
00279       /* mask = {0x000000ff, 0x000000ff, 0x000000ff, 0x000000ff} */
00280       spe_load_int(f, mask_reg, 0xff);
00281 
00282       /* XXX there may be more clever ways to implement the following code */
00283       switch (color_format) {
00284       case PIPE_FORMAT_A8R8G8B8_UNORM:
00285          /* fbB = fbB & mask */
00286          spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
00287          /* mask = mask << 8 */
00288          spe_roti(f, mask_reg, mask_reg, 8);
00289 
00290          /* fbG = fbRGBA & mask */
00291          spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
00292          /* fbG = fbG >> 8 */
00293          spe_roti(f, fbG_reg, fbG_reg, -8);
00294          /* mask = mask << 8 */
00295          spe_roti(f, mask_reg, mask_reg, 8);
00296 
00297          /* fbR = fbRGBA & mask */
00298          spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
00299          /* fbR = fbR >> 16 */
00300          spe_roti(f, fbR_reg, fbR_reg, -16);
00301          /* mask = mask << 8 */
00302          spe_roti(f, mask_reg, mask_reg, 8);
00303 
00304          /* fbA = fbRGBA & mask */
00305          spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
00306          /* fbA = fbA >> 24 */
00307          spe_roti(f, fbA_reg, fbA_reg, -24);
00308          break;
00309 
00310       case PIPE_FORMAT_B8G8R8A8_UNORM:
00311          /* fbA = fbA & mask */
00312          spe_and(f, fbA_reg, fbRGBA_reg, mask_reg);
00313          /* mask = mask << 8 */
00314          spe_roti(f, mask_reg, mask_reg, 8);
00315 
00316          /* fbR = fbRGBA & mask */
00317          spe_and(f, fbR_reg, fbRGBA_reg, mask_reg);
00318          /* fbR = fbR >> 8 */
00319          spe_roti(f, fbR_reg, fbR_reg, -8);
00320          /* mask = mask << 8 */
00321          spe_roti(f, mask_reg, mask_reg, 8);
00322 
00323          /* fbG = fbRGBA & mask */
00324          spe_and(f, fbG_reg, fbRGBA_reg, mask_reg);
00325          /* fbG = fbG >> 16 */
00326          spe_roti(f, fbG_reg, fbG_reg, -16);
00327          /* mask = mask << 8 */
00328          spe_roti(f, mask_reg, mask_reg, 8);
00329 
00330          /* fbB = fbRGBA & mask */
00331          spe_and(f, fbB_reg, fbRGBA_reg, mask_reg);
00332          /* fbB = fbB >> 24 */
00333          spe_roti(f, fbB_reg, fbB_reg, -24);
00334          break;
00335 
00336       default:
00337          ASSERT(0);
00338       }
00339 
00340       /* convert int[4] in [0,255] to float[4] in [0.0, 1.0] */
00341       spe_cuflt(f, fbR_reg, fbR_reg, 8);
00342       spe_cuflt(f, fbG_reg, fbG_reg, 8);
00343       spe_cuflt(f, fbB_reg, fbB_reg, 8);
00344       spe_cuflt(f, fbA_reg, fbA_reg, 8);
00345 
00346       spe_release_register(f, mask_reg);
00347    }
00348 
00349 
00350    /*
00351     * Compute Src RGB terms
00352     */
00353    switch (blend->rgb_src_factor) {
00354    case PIPE_BLENDFACTOR_ONE:
00355       spe_move(f, term1R_reg, fragR_reg);
00356       spe_move(f, term1G_reg, fragG_reg);
00357       spe_move(f, term1B_reg, fragB_reg);
00358       break;
00359    case PIPE_BLENDFACTOR_ZERO:
00360       spe_zero(f, term1R_reg);
00361       spe_zero(f, term1G_reg);
00362       spe_zero(f, term1B_reg);
00363       break;
00364    case PIPE_BLENDFACTOR_SRC_COLOR:
00365       spe_fm(f, term1R_reg, fragR_reg, fragR_reg);
00366       spe_fm(f, term1G_reg, fragG_reg, fragG_reg);
00367       spe_fm(f, term1B_reg, fragB_reg, fragB_reg);
00368       break;
00369    case PIPE_BLENDFACTOR_SRC_ALPHA:
00370       spe_fm(f, term1R_reg, fragR_reg, fragA_reg);
00371       spe_fm(f, term1G_reg, fragG_reg, fragA_reg);
00372       spe_fm(f, term1B_reg, fragB_reg, fragA_reg);
00373       break;
00374       /* XXX more cases */
00375    default:
00376       ASSERT(0);
00377    }
00378 
00379    /*
00380     * Compute Src Alpha term
00381     */
00382    switch (blend->alpha_src_factor) {
00383    case PIPE_BLENDFACTOR_ONE:
00384       spe_move(f, term1A_reg, fragA_reg);
00385       break;
00386    case PIPE_BLENDFACTOR_SRC_COLOR:
00387       spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
00388       break;
00389    case PIPE_BLENDFACTOR_SRC_ALPHA:
00390       spe_fm(f, term1A_reg, fragA_reg, fragA_reg);
00391       break;
00392       /* XXX more cases */
00393    default:
00394       ASSERT(0);
00395    }
00396 
00397    /*
00398     * Compute Dest RGB terms
00399     */
00400    switch (blend->rgb_dst_factor) {
00401    case PIPE_BLENDFACTOR_ONE:
00402       spe_move(f, term2R_reg, fbR_reg);
00403       spe_move(f, term2G_reg, fbG_reg);
00404       spe_move(f, term2B_reg, fbB_reg);
00405       break;
00406    case PIPE_BLENDFACTOR_ZERO:
00407       spe_zero(f, term2R_reg);
00408       spe_zero(f, term2G_reg);
00409       spe_zero(f, term2B_reg);
00410       break;
00411    case PIPE_BLENDFACTOR_SRC_COLOR:
00412       spe_fm(f, term2R_reg, fbR_reg, fragR_reg);
00413       spe_fm(f, term2G_reg, fbG_reg, fragG_reg);
00414       spe_fm(f, term2B_reg, fbB_reg, fragB_reg);
00415       break;
00416    case PIPE_BLENDFACTOR_SRC_ALPHA:
00417       spe_fm(f, term2R_reg, fbR_reg, fragA_reg);
00418       spe_fm(f, term2G_reg, fbG_reg, fragA_reg);
00419       spe_fm(f, term2B_reg, fbB_reg, fragA_reg);
00420       break;
00421    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
00422       /* one = {1.0, 1.0, 1.0, 1.0} */
00423       if (!one_reg_set) {
00424          spe_load_float(f, one_reg, 1.0f);
00425          one_reg_set = true;
00426       }
00427       /* tmp = one - fragA */
00428       spe_fs(f, tmp_reg, one_reg, fragA_reg);
00429       /* term = fb * tmp */
00430       spe_fm(f, term2R_reg, fbR_reg, tmp_reg);
00431       spe_fm(f, term2G_reg, fbG_reg, tmp_reg);
00432       spe_fm(f, term2B_reg, fbB_reg, tmp_reg);
00433       break;
00434       /* XXX more cases */
00435    default:
00436       ASSERT(0);
00437    }
00438 
00439    /*
00440     * Compute Dest Alpha term
00441     */
00442    switch (blend->alpha_dst_factor) {
00443    case PIPE_BLENDFACTOR_ONE:
00444       spe_move(f, term2A_reg, fbA_reg);
00445       break;
00446    case PIPE_BLENDFACTOR_ZERO:
00447       spe_zero(f, term2A_reg);
00448       break;
00449    case PIPE_BLENDFACTOR_SRC_ALPHA:
00450       spe_fm(f, term2A_reg, fbA_reg, fragA_reg);
00451       break;
00452    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
00453       /* one = {1.0, 1.0, 1.0, 1.0} */
00454       if (!one_reg_set) {
00455          spe_load_float(f, one_reg, 1.0f);
00456          one_reg_set = true;
00457       }
00458       /* tmp = one - fragA */
00459       spe_fs(f, tmp_reg, one_reg, fragA_reg);
00460       /* termA = fbA * tmp */
00461       spe_fm(f, term2A_reg, fbA_reg, tmp_reg);
00462       break;
00463       /* XXX more cases */
00464    default:
00465       ASSERT(0);
00466    }
00467 
00468    /*
00469     * Combine Src/Dest RGB terms
00470     */
00471    switch (blend->rgb_func) {
00472    case PIPE_BLEND_ADD:
00473       spe_fa(f, fragR_reg, term1R_reg, term2R_reg);
00474       spe_fa(f, fragG_reg, term1G_reg, term2G_reg);
00475       spe_fa(f, fragB_reg, term1B_reg, term2B_reg);
00476       break;
00477    case PIPE_BLEND_SUBTRACT:
00478       spe_fs(f, fragR_reg, term1R_reg, term2R_reg);
00479       spe_fs(f, fragG_reg, term1G_reg, term2G_reg);
00480       spe_fs(f, fragB_reg, term1B_reg, term2B_reg);
00481       break;
00482       /* XXX more cases */
00483    default:
00484       ASSERT(0);
00485    }
00486 
00487    /*
00488     * Combine Src/Dest A term
00489     */
00490    switch (blend->alpha_func) {
00491    case PIPE_BLEND_ADD:
00492       spe_fa(f, fragA_reg, term1A_reg, term2A_reg);
00493       break;
00494    case PIPE_BLEND_SUBTRACT:
00495       spe_fs(f, fragA_reg, term1A_reg, term2A_reg);
00496       break;
00497       /* XXX more cases */
00498    default:
00499       ASSERT(0);
00500    }
00501 
00502    spe_release_register(f, term1R_reg);
00503    spe_release_register(f, term1G_reg);
00504    spe_release_register(f, term1B_reg);
00505    spe_release_register(f, term1A_reg);
00506 
00507    spe_release_register(f, term2R_reg);
00508    spe_release_register(f, term2G_reg);
00509    spe_release_register(f, term2B_reg);
00510    spe_release_register(f, term2A_reg);
00511 
00512    spe_release_register(f, fbR_reg);
00513    spe_release_register(f, fbG_reg);
00514    spe_release_register(f, fbB_reg);
00515    spe_release_register(f, fbA_reg);
00516 
00517    spe_release_register(f, one_reg);
00518    spe_release_register(f, tmp_reg);
00519 }
00520 
00521 
00522 static void
00523 gen_logicop(const struct pipe_blend_state *blend,
00524             struct spe_function *f,
00525             int fragRGBA_reg, int fbRGBA_reg)
00526 {
00527    /* XXX to-do */
00528    /* operate on 32-bit packed pixels, not float colors */
00529 }
00530 
00531 
00532 static void
00533 gen_colormask(uint colormask,
00534               struct spe_function *f,
00535               int fragRGBA_reg, int fbRGBA_reg)
00536 {
00537    /* XXX to-do */
00538    /* operate on 32-bit packed pixels, not float colors */
00539 }
00540 
00541 
00542 
00554 static void
00555 gen_pack_colors(struct spe_function *f,
00556                 enum pipe_format color_format,
00557                 int r_reg, int g_reg, int b_reg, int a_reg,
00558                 int rgba_reg)
00559 {
00560    /* Convert float[4] in [0.0,1.0] to int[4] in [0,~0], with clamping */
00561    spe_cfltu(f, r_reg, r_reg, 32);
00562    spe_cfltu(f, g_reg, g_reg, 32);
00563    spe_cfltu(f, b_reg, b_reg, 32);
00564    spe_cfltu(f, a_reg, a_reg, 32);
00565 
00566    /* Shift the most significant bytes to least the significant positions.
00567     * I.e.: reg = reg >> 24
00568     */
00569    spe_rotmi(f, r_reg, r_reg, -24);
00570    spe_rotmi(f, g_reg, g_reg, -24);
00571    spe_rotmi(f, b_reg, b_reg, -24);
00572    spe_rotmi(f, a_reg, a_reg, -24);
00573 
00574    /* Shift the color bytes according to the surface format */
00575    if (color_format == PIPE_FORMAT_A8R8G8B8_UNORM) {
00576       spe_roti(f, g_reg, g_reg, 8);   /* green <<= 8 */
00577       spe_roti(f, r_reg, r_reg, 16);  /* red <<= 16 */
00578       spe_roti(f, a_reg, a_reg, 24);  /* alpha <<= 24 */
00579    }
00580    else if (color_format == PIPE_FORMAT_B8G8R8A8_UNORM) {
00581       spe_roti(f, r_reg, r_reg, 8);   /* red <<= 8 */
00582       spe_roti(f, g_reg, g_reg, 16);  /* green <<= 16 */
00583       spe_roti(f, b_reg, b_reg, 24);  /* blue <<= 24 */
00584    }
00585    else {
00586       ASSERT(0);
00587    }
00588 
00589    /* Merge red, green, blue, alpha registers to make packed RGBA colors.
00590     * Eg: after shifting according to color_format we might have:
00591     *     R = {0x00ff0000, 0x00110000, 0x00220000, 0x00330000}
00592     *     G = {0x0000ff00, 0x00004400, 0x00005500, 0x00006600}
00593     *     B = {0x000000ff, 0x00000077, 0x00000088, 0x00000099}
00594     *     A = {0xff000000, 0xaa000000, 0xbb000000, 0xcc000000}
00595     * OR-ing all those together gives us four packed colors:
00596     *  RGBA = {0xffffffff, 0xaa114477, 0xbb225588, 0xcc336699}
00597     */
00598    spe_or(f, rgba_reg, r_reg, g_reg);
00599    spe_or(f, rgba_reg, rgba_reg, b_reg);
00600    spe_or(f, rgba_reg, rgba_reg, a_reg);
00601 }
00602 
00603 
00604 
00605 
00626 void
00627 cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f)
00628 {
00629    const struct pipe_depth_stencil_alpha_state *dsa =
00630       &cell->depth_stencil->base;
00631    const struct pipe_blend_state *blend = &cell->blend->base;
00632    const enum pipe_format color_format = cell->framebuffer.cbufs[0]->format;
00633 
00634    /* For SPE function calls: reg $3 = first param, $4 = second param, etc. */
00635    const int x_reg = 3;  /* uint */
00636    const int y_reg = 4;  /* uint */
00637    const int color_tile_reg = 5;  /* tile_t * */
00638    const int depth_tile_reg = 6;  /* tile_t * */
00639    const int fragZ_reg = 7;   /* vector float */
00640    const int fragR_reg = 8;   /* vector float */
00641    const int fragG_reg = 9;   /* vector float */
00642    const int fragB_reg = 10;  /* vector float */
00643    const int fragA_reg = 11;  /* vector float */
00644    const int mask_reg = 12;   /* vector uint */
00645 
00646    /* offset of quad from start of tile
00647     * XXX assuming 4-byte pixels for color AND Z/stencil!!!!
00648     */
00649    int quad_offset_reg;
00650 
00651    int fbRGBA_reg;  
00652    int fbZS_reg;    
00654    spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE);
00655    spe_allocate_register(f, x_reg);
00656    spe_allocate_register(f, y_reg);
00657    spe_allocate_register(f, color_tile_reg);
00658    spe_allocate_register(f, depth_tile_reg);
00659    spe_allocate_register(f, fragZ_reg);
00660    spe_allocate_register(f, fragR_reg);
00661    spe_allocate_register(f, fragG_reg);
00662    spe_allocate_register(f, fragB_reg);
00663    spe_allocate_register(f, fragA_reg);
00664    spe_allocate_register(f, mask_reg);
00665 
00666    quad_offset_reg = spe_allocate_available_register(f);
00667    fbRGBA_reg = spe_allocate_available_register(f);
00668    fbZS_reg = spe_allocate_available_register(f);
00669 
00670    /* compute offset of quad from start of tile, in bytes */
00671    {
00672       int x2_reg = spe_allocate_available_register(f);
00673       int y2_reg = spe_allocate_available_register(f);
00674 
00675       ASSERT(TILE_SIZE == 32);
00676 
00677       spe_rotmi(f, x2_reg, x_reg, -1);  /* x2 = x / 2 */
00678       spe_rotmi(f, y2_reg, y_reg, -1);  /* y2 = y / 2 */
00679       spe_shli(f, y2_reg, y2_reg, 4);   /* y2 *= 16 */
00680       spe_a(f, quad_offset_reg, y2_reg, x2_reg);  /* offset = y2 + x2 */
00681       spe_shli(f, quad_offset_reg, quad_offset_reg, 4);   /* offset *= 16 */
00682 
00683       spe_release_register(f, x2_reg);
00684       spe_release_register(f, y2_reg);
00685    }
00686 
00687 
00688    if (dsa->alpha.enabled) {
00689       gen_alpha_test(dsa, f, mask_reg, fragA_reg);
00690    }
00691 
00692    if (dsa->depth.enabled || dsa->stencil[0].enabled) {
00693       const enum pipe_format zs_format = cell->framebuffer.zsbuf->format;
00694       boolean write_depth_stencil;
00695 
00696       int fbZ_reg = spe_allocate_available_register(f); /* Z values */
00697       int fbS_reg = spe_allocate_available_register(f); /* Stencil values */
00698 
00699       /* fetch quad of depth/stencil values from tile at (x,y) */
00700       /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */
00701       spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
00702 
00703       if (dsa->depth.enabled) {
00704          /* Extract Z bits from fbZS_reg into fbZ_reg */
00705          if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00706              zs_format == PIPE_FORMAT_X8Z24_UNORM) {
00707             int mask_reg = spe_allocate_available_register(f);
00708             spe_fsmbi(f, mask_reg, 0x7777);  /* mask[0,1,2,3] = 0x00ffffff */
00709             spe_and(f, fbZ_reg, fbZS_reg, mask_reg);  /* fbZ = fbZS & mask */
00710             spe_release_register(f, mask_reg);
00711             /* OK, fbZ_reg has four 24-bit Z values now */
00712          }
00713          else {
00714             /* XXX handle other z/stencil formats */
00715             ASSERT(0);
00716          }
00717 
00718          /* Convert fragZ values from float[4] to uint[4] */
00719          if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00720              zs_format == PIPE_FORMAT_X8Z24_UNORM ||
00721              zs_format == PIPE_FORMAT_Z24S8_UNORM ||
00722              zs_format == PIPE_FORMAT_Z24X8_UNORM) {
00723             /* 24-bit Z values */
00724             int scale_reg = spe_allocate_available_register(f);
00725 
00726             /* scale_reg[0,1,2,3] = float(2^24-1) */
00727             spe_load_float(f, scale_reg, (float) 0xffffff);
00728 
00729             /* XXX these two instructions might be combined */
00730             spe_fm(f, fragZ_reg, fragZ_reg, scale_reg); /* fragZ *= scale */
00731             spe_cfltu(f, fragZ_reg, fragZ_reg, 0);  /* fragZ = (int) fragZ */
00732 
00733             spe_release_register(f, scale_reg);
00734          }
00735          else {
00736             /* XXX handle 16-bit Z format */
00737             ASSERT(0);
00738          }
00739       }
00740 
00741       if (dsa->stencil[0].enabled) {
00742          /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */
00743          if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00744              zs_format == PIPE_FORMAT_X8Z24_UNORM) {
00745             /* XXX extract with a shift */
00746             ASSERT(0);
00747          }
00748          else if (zs_format == PIPE_FORMAT_Z24S8_UNORM ||
00749                   zs_format == PIPE_FORMAT_Z24X8_UNORM) {
00750             /* XXX extract with a mask */
00751             ASSERT(0);
00752          }
00753       }
00754 
00755 
00756       if (dsa->stencil[0].enabled) {
00757          /* XXX this may involve depth testing too */
00758          // gen_stencil_test(dsa, f, ... );
00759          ASSERT(0);
00760       }
00761       else if (dsa->depth.enabled) {
00762          int zmask_reg = spe_allocate_available_register(f);
00763          gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg);
00764          spe_release_register(f, zmask_reg);
00765       }
00766 
00767       /* do we need to write Z and/or Stencil back into framebuffer? */
00768       write_depth_stencil = (dsa->depth.writemask |
00769                              dsa->stencil[0].write_mask |
00770                              dsa->stencil[1].write_mask);
00771 
00772       if (write_depth_stencil) {
00773          /* Merge latest Z and Stencil values into fbZS_reg.
00774           * fbZ_reg has four Z vals in bits [23..0] or bits [15..0].
00775           * fbS_reg has four 8-bit Z values in bits [7..0].
00776           */
00777          if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00778              zs_format == PIPE_FORMAT_X8Z24_UNORM) {
00779             spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */
00780             spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */
00781          }
00782          else if (zs_format == PIPE_FORMAT_S8Z24_UNORM ||
00783                   zs_format == PIPE_FORMAT_X8Z24_UNORM) {
00784             /* XXX to do */
00785             ASSERT(0);
00786          }
00787          else if (zs_format == PIPE_FORMAT_Z16_UNORM) {
00788             /* XXX to do */
00789             ASSERT(0);
00790          }
00791          else if (zs_format == PIPE_FORMAT_S8_UNORM) {
00792             /* XXX to do */
00793             ASSERT(0);
00794          }
00795          else {
00796             /* bad zs_format */
00797             ASSERT(0);
00798          }
00799 
00800          /* Store: memory[depth_tile_reg + quad_offset_reg] = fbZS */
00801          spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg);
00802       }
00803 
00804       spe_release_register(f, fbZ_reg);
00805       spe_release_register(f, fbS_reg);
00806    }
00807 
00808 
00809    /* Get framebuffer quad/colors.  We'll need these for blending,
00810     * color masking, and to obey the quad/pixel mask.
00811     * Load: fbRGBA_reg = memory[color_tile + quad_offset]
00812     * Note: if mask={~0,~0,~0,~0} and we're not blending or colormasking
00813     * we could skip this load.
00814     */
00815    spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg);
00816 
00817 
00818    if (blend->blend_enable) {
00819       gen_blend(blend, f, color_format,
00820                 fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg);
00821    }
00822 
00823    /*
00824     * Write fragment colors to framebuffer/tile.
00825     * This involves converting the fragment colors from float[4] to the
00826     * tile's specific format and obeying the quad/pixel mask.
00827     */
00828    {
00829       int rgba_reg = spe_allocate_available_register(f);
00830 
00831       /* Pack four float colors as four 32-bit int colors */
00832       gen_pack_colors(f, color_format,
00833                       fragR_reg, fragG_reg, fragB_reg, fragA_reg,
00834                       rgba_reg);
00835 
00836       if (blend->logicop_enable) {
00837          gen_logicop(blend, f, rgba_reg, fbRGBA_reg);
00838       }
00839 
00840       if (blend->colormask != 0xf) {
00841          gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg);
00842       }
00843 
00844 
00845       /* Mix fragment colors with framebuffer colors using the quad/pixel mask:
00846        * if (mask[i])
00847        *    rgba[i] = rgba[i];
00848        * else
00849        *    rgba[i] = framebuffer[i];
00850        */
00851       spe_selb(f, rgba_reg, fbRGBA_reg, rgba_reg, mask_reg);
00852 
00853       /* Store updated quad in tile:
00854        * memory[color_tile + quad_offset] = rgba_reg;
00855        */
00856       spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg);
00857 
00858       spe_release_register(f, rgba_reg);
00859    }
00860 
00861    //printf("gen_fragment_ops nr instructions: %u\n", f->num_inst);
00862 
00863    spe_bi(f, SPE_REG_RA, 0, 0);  /* return from function call */
00864 
00865 
00866    spe_release_register(f, fbRGBA_reg);
00867    spe_release_register(f, fbZS_reg);
00868    spe_release_register(f, quad_offset_reg);
00869 }
00870 

Generated on Tue Sep 29 06:25:15 2009 for Gallium3D by  doxygen 1.5.4