spu_per_fragment_op.c

Go to the documentation of this file.
00001 /**************************************************************************
00002  * 
00003  * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
00004  * All Rights Reserved.
00005  *
00006  * Permission is hereby granted, free of charge, to any person obtaining a
00007  * copy of this software and associated documentation files (the
00008  * "Software"), to deal in the Software without restriction, including
00009  * without limitation the rights to use, copy, modify, merge, publish,
00010  * distribute, sub license, and/or sell copies of the Software, and to
00011  * permit persons to whom the Software is furnished to do so, subject to
00012  * the following conditions:
00013  * 
00014  * The above copyright notice and this permission notice (including the
00015  * next paragraph) shall be included in all copies or substantial portions
00016  * of the Software.
00017  * 
00018  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00019  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
00021  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
00022  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00023  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00024  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00025  * 
00026  **************************************************************************/
00027 
00033 #include <transpose_matrix4x4.h>
00034 #include "pipe/p_format.h"
00035 #include "spu_main.h"
00036 #include "spu_colorpack.h"
00037 #include "spu_per_fragment_op.h"
00038 
00039 
00040 #define LINEAR_QUAD_LAYOUT 1
00041 
00042 
00051 void
00052 spu_fallback_fragment_ops(uint x, uint y,
00053                           tile_t *colorTile,
00054                           tile_t *depthStencilTile,
00055                           vector float fragZ,
00056                           vector float fragR,
00057                           vector float fragG,
00058                           vector float fragB,
00059                           vector float fragA,
00060                           vector unsigned int mask)
00061 {
00062    vector float frag_aos[4];
00063    unsigned int c0, c1, c2, c3;
00064 
00065    /* do alpha test */
00066    if (spu.depth_stencil_alpha.alpha.enabled) {
00067       vector float ref = spu_splats(spu.depth_stencil_alpha.alpha.ref);
00068       vector unsigned int amask;
00069 
00070       switch (spu.depth_stencil_alpha.alpha.func) {
00071       case PIPE_FUNC_LESS:
00072          amask = spu_cmpgt(ref, fragA);  /* mask = (fragA < ref) */
00073          break;
00074       case PIPE_FUNC_GREATER:
00075          amask = spu_cmpgt(fragA, ref);  /* mask = (fragA > ref) */
00076          break;
00077       case PIPE_FUNC_GEQUAL:
00078          amask = spu_cmpgt(ref, fragA);
00079          amask = spu_nor(amask, amask);
00080          break;
00081       case PIPE_FUNC_LEQUAL:
00082          amask = spu_cmpgt(fragA, ref);
00083          amask = spu_nor(amask, amask);
00084          break;
00085       case PIPE_FUNC_EQUAL:
00086          amask = spu_cmpeq(ref, fragA);
00087          break;
00088       case PIPE_FUNC_NOTEQUAL:
00089          amask = spu_cmpeq(ref, fragA);
00090          amask = spu_nor(amask, amask);
00091          break;
00092       case PIPE_FUNC_ALWAYS:
00093          amask = spu_splats(0xffffffffU);
00094          break;
00095       case PIPE_FUNC_NEVER:
00096          amask = spu_splats( 0x0U);
00097          break;
00098       default:
00099          ;
00100       }
00101 
00102       mask = spu_and(mask, amask);
00103    }
00104 
00105    /* Z and/or stencil testing... */
00106    if (spu.depth_stencil_alpha.depth.enabled ||
00107        spu.depth_stencil_alpha.stencil[0].enabled) {
00108 
00109       /* get four Z/Stencil values from tile */
00110       vector unsigned int mask24 = spu_splats((unsigned int)0x00ffffffU);
00111       vector unsigned int ifbZS = depthStencilTile->ui4[y/2][x/2];
00112       vector unsigned int ifbZ = spu_and(ifbZS, mask24);
00113       vector unsigned int ifbS = spu_andc(ifbZS, mask24);
00114 
00115       if (spu.depth_stencil_alpha.stencil[0].enabled) {
00116          /* do stencil test */
00117          ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM);
00118 
00119       }
00120       else if (spu.depth_stencil_alpha.depth.enabled) {
00121          /* do depth test */
00122 
00123          ASSERT(spu.fb.depth_format == PIPE_FORMAT_S8Z24_UNORM ||
00124                 spu.fb.depth_format == PIPE_FORMAT_X8Z24_UNORM);
00125 
00126          vector unsigned int ifragZ;
00127          vector unsigned int zmask;
00128 
00129          /* convert four fragZ from float to uint */
00130          fragZ = spu_mul(fragZ, spu_splats((float) 0xffffff));
00131          ifragZ = spu_convtu(fragZ, 0);
00132 
00133          /* do depth comparison, setting zmask with results */
00134          switch (spu.depth_stencil_alpha.depth.func) {
00135          case PIPE_FUNC_LESS:
00136             zmask = spu_cmpgt(ifbZ, ifragZ);  /* mask = (ifragZ < ifbZ) */
00137             break;
00138          case PIPE_FUNC_GREATER:
00139             zmask = spu_cmpgt(ifragZ, ifbZ);  /* mask = (ifbZ > ifragZ) */
00140             break;
00141          case PIPE_FUNC_GEQUAL:
00142             zmask = spu_cmpgt(ifbZ, ifragZ);
00143             zmask = spu_nor(zmask, zmask);
00144             break;
00145          case PIPE_FUNC_LEQUAL:
00146             zmask = spu_cmpgt(ifragZ, ifbZ);
00147             zmask = spu_nor(zmask, zmask);
00148             break;
00149          case PIPE_FUNC_EQUAL:
00150             zmask = spu_cmpeq(ifbZ, ifragZ);
00151             break;
00152          case PIPE_FUNC_NOTEQUAL:
00153             zmask = spu_cmpeq(ifbZ, ifragZ);
00154             zmask = spu_nor(zmask, zmask);
00155             break;
00156          case PIPE_FUNC_ALWAYS:
00157             zmask = spu_splats(0xffffffffU);
00158             break;
00159          case PIPE_FUNC_NEVER:
00160             zmask = spu_splats( 0x0U);
00161             break;
00162          default:
00163             ;
00164          }
00165 
00166          mask = spu_and(mask, zmask);
00167 
00168          /* merge framebuffer Z and fragment Z according to the mask */
00169          ifbZ = spu_or(spu_and(ifragZ, mask),
00170                        spu_andc(ifbZ, mask));
00171       }
00172 
00173       if (spu_extract(spu_orx(mask), 0)) {
00174          /* put new fragment Z/Stencil values back into Z/Stencil tile */
00175          depthStencilTile->ui4[y/2][x/2] = spu_or(ifbZ, ifbS);
00176 
00177          spu.cur_ztile_status = TILE_STATUS_DIRTY;
00178       }
00179    }
00180 
00181    if (spu.blend.blend_enable) {
00182       /* blending terms, misc regs */
00183       vector float term1r, term1g, term1b, term1a;
00184       vector float term2r, term2g, term2b, term2a;
00185       vector float one, tmp;
00186 
00187       vector float fbRGBA[4];  /* current framebuffer colors */
00188 
00189       /* get colors from framebuffer/tile */
00190       {
00191          vector float fc[4];
00192          uint c0, c1, c2, c3;
00193 
00194 #if LINEAR_QUAD_LAYOUT /* See comments/diagram below */
00195          c0 = colorTile->ui[y][x*2+0];
00196          c1 = colorTile->ui[y][x*2+1];
00197          c2 = colorTile->ui[y][x*2+2];
00198          c3 = colorTile->ui[y][x*2+3];
00199 #else
00200          c0 = colorTile->ui[y+0][x+0];
00201          c1 = colorTile->ui[y+0][x+1];
00202          c2 = colorTile->ui[y+1][x+0];
00203          c3 = colorTile->ui[y+1][x+1];
00204 #endif
00205          switch (spu.fb.color_format) {
00206          case PIPE_FORMAT_B8G8R8A8_UNORM:
00207             fc[0] = spu_unpack_B8G8R8A8(c0);
00208             fc[1] = spu_unpack_B8G8R8A8(c1);
00209             fc[2] = spu_unpack_B8G8R8A8(c2);
00210             fc[3] = spu_unpack_B8G8R8A8(c3);
00211             break;
00212          case PIPE_FORMAT_A8R8G8B8_UNORM:
00213             fc[0] = spu_unpack_A8R8G8B8(c0);
00214             fc[1] = spu_unpack_A8R8G8B8(c1);
00215             fc[2] = spu_unpack_A8R8G8B8(c2);
00216             fc[3] = spu_unpack_A8R8G8B8(c3);
00217             break;
00218          default:
00219             ASSERT(0);
00220          }
00221          _transpose_matrix4x4(fbRGBA, fc);
00222       }
00223 
00224       /*
00225        * Compute Src RGB terms
00226        */
00227       switch (spu.blend.rgb_src_factor) {
00228       case PIPE_BLENDFACTOR_ONE:
00229          term1r = fragR;
00230          term1g = fragG;
00231          term1b = fragB;
00232          break;
00233       case PIPE_BLENDFACTOR_ZERO:
00234          term1r =
00235          term1g =
00236          term1b = spu_splats(0.0f);
00237          break;
00238       case PIPE_BLENDFACTOR_SRC_COLOR:
00239          term1r = spu_mul(fragR, fragR);
00240          term1g = spu_mul(fragG, fragG);
00241          term1b = spu_mul(fragB, fragB);
00242          break;
00243       case PIPE_BLENDFACTOR_SRC_ALPHA:
00244          term1r = spu_mul(fragR, fragA);
00245          term1g = spu_mul(fragG, fragA);
00246          term1b = spu_mul(fragB, fragA);
00247          break;
00248       /* XXX more cases */
00249       default:
00250          ASSERT(0);
00251       }
00252 
00253       /*
00254        * Compute Src Alpha term
00255        */
00256       switch (spu.blend.alpha_src_factor) {
00257       case PIPE_BLENDFACTOR_ONE:
00258          term1a = fragA;
00259          break;
00260       case PIPE_BLENDFACTOR_SRC_COLOR:
00261          term1a = spu_splats(0.0f);
00262          break;
00263       case PIPE_BLENDFACTOR_SRC_ALPHA:
00264          term1a = spu_mul(fragA, fragA);
00265          break;
00266       /* XXX more cases */
00267       default:
00268          ASSERT(0);
00269       }
00270 
00271       /*
00272        * Compute Dest RGB terms
00273        */
00274       switch (spu.blend.rgb_dst_factor) {
00275       case PIPE_BLENDFACTOR_ONE:
00276          term2r = fragR;
00277          term2g = fragG;
00278          term2b = fragB;
00279          break;
00280       case PIPE_BLENDFACTOR_ZERO:
00281          term2r =
00282          term2g =
00283          term2b = spu_splats(0.0f);
00284          break;
00285       case PIPE_BLENDFACTOR_SRC_COLOR:
00286          term2r = spu_mul(fbRGBA[0], fragR);
00287          term2g = spu_mul(fbRGBA[1], fragG);
00288          term2b = spu_mul(fbRGBA[2], fragB);
00289          break;
00290       case PIPE_BLENDFACTOR_SRC_ALPHA:
00291          term2r = spu_mul(fbRGBA[0], fragA);
00292          term2g = spu_mul(fbRGBA[1], fragA);
00293          term2b = spu_mul(fbRGBA[2], fragA);
00294          break;
00295       case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
00296          one = spu_splats(1.0f);
00297          tmp = spu_sub(one, fragA);
00298          term2r = spu_mul(fbRGBA[0], tmp);
00299          term2g = spu_mul(fbRGBA[1], tmp);
00300          term2b = spu_mul(fbRGBA[2], tmp);
00301          break;
00302       /* XXX more cases */
00303       default:
00304          ASSERT(0);
00305       }
00306 
00307       /*
00308        * Compute Dest Alpha term
00309        */
00310       switch (spu.blend.alpha_dst_factor) {
00311       case PIPE_BLENDFACTOR_ONE:
00312          term2a = fragA;
00313          break;
00314       case PIPE_BLENDFACTOR_SRC_COLOR:
00315          term2a = spu_splats(0.0f);
00316          break;
00317       case PIPE_BLENDFACTOR_SRC_ALPHA:
00318          term2a = spu_mul(fbRGBA[3], fragA);
00319          break;
00320       case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
00321          one = spu_splats(1.0f);
00322          tmp = spu_sub(one, fragA);
00323          term2a = spu_mul(fbRGBA[3], tmp);
00324          break;
00325       /* XXX more cases */
00326       default:
00327          ASSERT(0);
00328       }
00329 
00330       /*
00331        * Combine Src/Dest RGB terms
00332        */
00333       switch (spu.blend.rgb_func) {
00334       case PIPE_BLEND_ADD:
00335          fragR = spu_add(term1r, term2r);
00336          fragG = spu_add(term1g, term2g);
00337          fragB = spu_add(term1b, term2b);
00338          break;
00339       case PIPE_BLEND_SUBTRACT:
00340          fragR = spu_sub(term1r, term2r);
00341          fragG = spu_sub(term1g, term2g);
00342          fragB = spu_sub(term1b, term2b);
00343          break;
00344       /* XXX more cases */
00345       default:
00346          ASSERT(0);
00347       }
00348 
00349       /*
00350        * Combine Src/Dest A term
00351        */
00352       switch (spu.blend.alpha_func) {
00353       case PIPE_BLEND_ADD:
00354          fragA = spu_add(term1a, term2a);
00355          break;
00356       case PIPE_BLEND_SUBTRACT:
00357          fragA = spu_sub(term1a, term2a);
00358          break;
00359       /* XXX more cases */
00360       default:
00361          ASSERT(0);
00362       }
00363    }
00364 
00365 
00366    /*
00367     * Convert RRRR,GGGG,BBBB,AAAA to RGBA,RGBA,RGBA,RGBA.
00368     */
00369 #if 0
00370    /* original code */
00371    {
00372       vector float frag_soa[4];
00373       frag_soa[0] = fragR;
00374       frag_soa[1] = fragG;
00375       frag_soa[2] = fragB;
00376       frag_soa[3] = fragA;
00377       _transpose_matrix4x4(frag_aos, frag_soa);
00378    }
00379 #else
00380    /* short-cut relying on function parameter layout: */
00381    _transpose_matrix4x4(frag_aos, &fragR);
00382    (void) fragG;
00383    (void) fragB;
00384 #endif
00385 
00386    /*
00387     * Pack float colors into 32-bit RGBA words.
00388     */
00389    switch (spu.fb.color_format) {
00390    case PIPE_FORMAT_A8R8G8B8_UNORM:
00391       c0 = spu_pack_A8R8G8B8(frag_aos[0]);
00392       c1 = spu_pack_A8R8G8B8(frag_aos[1]);
00393       c2 = spu_pack_A8R8G8B8(frag_aos[2]);
00394       c3 = spu_pack_A8R8G8B8(frag_aos[3]);
00395       break;
00396 
00397    case PIPE_FORMAT_B8G8R8A8_UNORM:
00398       c0 = spu_pack_B8G8R8A8(frag_aos[0]);
00399       c1 = spu_pack_B8G8R8A8(frag_aos[1]);
00400       c2 = spu_pack_B8G8R8A8(frag_aos[2]);
00401       c3 = spu_pack_B8G8R8A8(frag_aos[3]);
00402       break;
00403    default:
00404       fprintf(stderr, "SPU: Bad pixel format in spu_default_fragment_ops\n");
00405       ASSERT(0);
00406    }
00407 
00408 
00409    /*
00410     * Color masking
00411     */
00412    if (spu.blend.colormask != 0xf) {
00413       /* XXX to do */
00414       /* apply color mask to 32-bit packed colors */
00415    }
00416 
00417 
00418    /*
00419     * Logic Ops
00420     */
00421    if (spu.blend.logicop_enable) {
00422       /* XXX to do */
00423       /* apply logicop to 32-bit packed colors */
00424    }
00425 
00426 
00427    /*
00428     * If mask is non-zero, mark tile as dirty.
00429     */
00430    if (spu_extract(spu_orx(mask), 0)) {
00431       spu.cur_ctile_status = TILE_STATUS_DIRTY;
00432    }
00433    else {
00434       return;
00435    }
00436 
00437 
00438    /*
00439     * Write new quad colors to the framebuffer/tile.
00440     * Only write pixels where the corresponding mask word is set.
00441     */
00442 #if LINEAR_QUAD_LAYOUT
00443    /*
00444     * Quad layout:
00445     *  +--+--+--+--+
00446     *  |p0|p1|p2|p3|
00447     *  +--+--+--+--+
00448     */
00449    if (spu_extract(mask, 0))
00450       colorTile->ui[y][x*2] = c0;
00451    if (spu_extract(mask, 1))
00452       colorTile->ui[y][x*2+1] = c1;
00453    if (spu_extract(mask, 2))
00454       colorTile->ui[y][x*2+2] = c2;
00455    if (spu_extract(mask, 3))
00456       colorTile->ui[y][x*2+3] = c3;
00457 #else
00458    /*
00459     * Quad layout:
00460     *  +--+--+
00461     *  |p0|p1|
00462     *  +--+--+
00463     *  |p2|p3|
00464     *  +--+--+
00465     */
00466    if (spu_extract(mask, 0))
00467       colorTile->ui[y+0][x+0] = c0;
00468    if (spu_extract(mask, 1))
00469       colorTile->ui[y+0][x+1] = c1;
00470    if (spu_extract(mask, 2))
00471       colorTile->ui[y+1][x+0] = c2;
00472    if (spu_extract(mask, 3))
00473       colorTile->ui[y+1][x+1] = c3;
00474 #endif
00475 }

Generated on Tue Sep 29 06:25:16 2009 for Gallium3D by  doxygen 1.5.4