spu_texture.c File Reference

Include dependency graph for spu_texture.c:

Go to the source code of this file.

Functions

void invalidate_tex_cache (void)
 Mark all tex cache entries as invalid.
static uint get_texel (uint unit, vec_uint4 coordinate)
 XXX look into getting texels for all four pixels in a quad at once.
static void get_four_texels (uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
 Get four texels from locations (x[0], y[0]), (x[1], y[1]) .
vector float sample_texture_nearest (uint unit, vector float texcoord)
 Get texture sample at texcoord.
vector float sample_texture_bilinear (uint unit, vector float texcoord)


Function Documentation

static void get_four_texels ( uint  unit,
vec_uint4  x,
vec_uint4  y,
vec_uint4 *  texels 
) [static]

Get four texels from locations (x[0], y[0]), (x[1], y[1]) .

..

NOTE: in the typical case of bilinear filtering, the four texels are in a 2x2 group so we could get by with just two dcache fetches (two side-by-side texels per fetch). But when bilinear filtering wraps around a texture edge, we'll probably need code like we have now. FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time, it's quite likely that the four pixels in a quad will need some of the same texels. So look into doing texture fetches for four pixels at a time.

Definition at line 91 of file spu_texture.c.

References offset(), spu, spu_dcache_fetch_unaligned(), spu_texture::start, spu_global::texture, and spu_texture::tiles_per_row.

00092 {
00093    const unsigned texture_ea = (uintptr_t) spu.texture[unit].start;
00094    vec_uint4 tile_x = spu_rlmask(x, -5);
00095    vec_uint4 tile_y = spu_rlmask(y, -5);
00096    const qword offset_x = si_andi((qword) x, 0x1f);
00097    const qword offset_y = si_andi((qword) y, 0x1f);
00098 
00099    const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row);
00100    const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
00101 
00102    qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
00103    tile_offset = si_mpy((qword) tile_offset, tile_size);
00104 
00105    qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x);
00106    texel_offset = si_mpyui(texel_offset, 4);
00107    
00108    vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
00109    
00110    spu_dcache_fetch_unaligned((qword *) & texels[0],
00111                               texture_ea + spu_extract(offset, 0), 4);
00112    spu_dcache_fetch_unaligned((qword *) & texels[1],
00113                               texture_ea + spu_extract(offset, 1), 4);
00114    spu_dcache_fetch_unaligned((qword *) & texels[2],
00115                               texture_ea + spu_extract(offset, 2), 4);
00116    spu_dcache_fetch_unaligned((qword *) & texels[3],
00117                               texture_ea + spu_extract(offset, 3), 4);
00118 }

static uint get_texel ( uint  unit,
vec_uint4  coordinate 
) [static]

XXX look into getting texels for all four pixels in a quad at once.

Definition at line 55 of file spu_texture.c.

References spu, spu_dcache_fetch_unaligned(), spu_texture::start, spu_global::texture, TILE_SIZE, and spu_texture::tiles_per_row.

00056 {
00057    /*
00058     * XXX we could do the "/ TILE_SIZE" and "% TILE_SIZE" operations as
00059     * SIMD since X and Y are already in a SIMD register.
00060     */
00061    const unsigned texture_ea = (uintptr_t) spu.texture[unit].start;
00062    ushort x = spu_extract(coordinate, 0);
00063    ushort y = spu_extract(coordinate, 1);
00064    unsigned tile_offset = sizeof(tile_t)
00065       * ((y / TILE_SIZE * spu.texture[unit].tiles_per_row) + (x / TILE_SIZE));
00066    ushort texel_offset = (ushort) 4
00067       * (ushort) (((ushort) (y % TILE_SIZE) * (ushort) TILE_SIZE) + (x % TILE_SIZE));
00068    vec_uint4 tmp;
00069 
00070    spu_dcache_fetch_unaligned((qword *) & tmp,
00071                               texture_ea + tile_offset + texel_offset,
00072                               4);
00073    return spu_extract(tmp, 0);
00074 }

void invalidate_tex_cache ( void   ) 

Mark all tex cache entries as invalid.

Definition at line 41 of file spu_texture.c.

References spu_texture::height, spu, spu_dcache_mark_dirty(), spu_texture::start, spu_global::texture, and spu_texture::width.

00042 {
00043    uint unit = 0;
00044    uint bytes = 4 * spu.texture[unit].width
00045       * spu.texture[unit].height;
00046 
00047    spu_dcache_mark_dirty((unsigned) spu.texture[unit].start, bytes);
00048 }

vector float sample_texture_bilinear ( uint  unit,
vector float  texcoord 
)

Definition at line 136 of file spu_texture.c.

References get_four_texels(), spu, spu_unpack_A8R8G8B8(), spu_texture::tex_size, spu_texture::tex_size_x_mask, spu_texture::tex_size_y_mask, and spu_global::texture.

00137 {
00138    static const vec_uint4 offset_x = {0, 0, 1, 1};
00139    static const vec_uint4 offset_y = {0, 1, 0, 1};
00140 
00141    vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size);
00142    tc = spu_add(tc, spu_splats(-0.5f));  /* half texel bias */
00143 
00144    /* integer texcoords S,T: */
00145    vec_uint4 itc = spu_convtu(tc, 0);  /* convert to int */
00146 
00147    vec_uint4 texels[4];
00148    
00149    /* setup texcoords for quad:
00150     *  +-----+-----+
00151     *  |x0,y0|x1,y1|
00152     *  +-----+-----+
00153     *  |x2,y2|x3,y3|
00154     *  +-----+-----+
00155     */
00156    vec_uint4 x = spu_splats(spu_extract(itc, 0));
00157    vec_uint4 y = spu_splats(spu_extract(itc, 1));
00158    x = spu_add(x, offset_x);
00159    y = spu_add(y, offset_y);
00160 
00161    /* GL_REPEAT wrap mode: */
00162    x = spu_and(x, spu.texture[unit].tex_size_x_mask);
00163    y = spu_and(y, spu.texture[unit].tex_size_y_mask);
00164 
00165    get_four_texels(unit, x, y, texels);
00166 
00167    /* integer A8R8G8B8 to float texel conversion */
00168    vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0));
00169    vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0));
00170    vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0));
00171    vector float texel11 = spu_unpack_A8R8G8B8(spu_extract(texels[3], 0));
00172 
00173 
00174    /* Compute weighting factors in [0,1]
00175     * Multiply texcoord by 1024, AND with 1023, convert back to float.
00176     */
00177    vector float tc1024 = spu_mul(tc, spu_splats(1024.0f));
00178    vector signed int itc1024 = spu_convts(tc1024, 0);
00179    itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1));
00180    vector float weight = spu_convtf(itc1024, 10);
00181 
00182    /* smeared frac and 1-frac */
00183    vector float sfrac = spu_splats(spu_extract(weight, 0));
00184    vector float tfrac = spu_splats(spu_extract(weight, 1));
00185    vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac);
00186    vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac);
00187 
00188    /* multiply the samples (colors) by the S/T weights */
00189    texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1);
00190    texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1);
00191    texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac );
00192    texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac );
00193 
00194    /* compute sum of weighted samples */
00195    vector float texel_sum = spu_add(texel00, texel01);
00196    texel_sum = spu_add(texel_sum, texel10);
00197    texel_sum = spu_add(texel_sum, texel11);
00198 
00199    return texel_sum;
00200 }

vector float sample_texture_nearest ( uint  unit,
vector float  texcoord 
)

Get texture sample at texcoord.

Definition at line 125 of file spu_texture.c.

References get_texel(), spu, spu_unpack_A8R8G8B8(), spu_texture::tex_size, spu_texture::tex_size_mask, and spu_global::texture.

00126 {
00127    vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size);
00128    vector unsigned int itc = spu_convtu(tc, 0);  /* convert to int */
00129    itc = spu_and(itc, spu.texture[unit].tex_size_mask); /* mask (GL_REPEAT) */
00130    uint texel = get_texel(unit, itc);
00131    return spu_unpack_A8R8G8B8(texel);
00132 }


Generated on Tue Sep 29 06:25:32 2009 for Gallium3D by  doxygen 1.5.4