Go to the source code of this file.
Functions | |
void | invalidate_tex_cache (void) |
Mark all tex cache entries as invalid. | |
static uint | get_texel (uint unit, vec_uint4 coordinate) |
XXX look into getting texels for all four pixels in a quad at once. | |
static void | get_four_texels (uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels) |
Get four texels from locations (x[0], y[0]), (x[1], y[1]) . | |
vector float | sample_texture_nearest (uint unit, vector float texcoord) |
Get texture sample at texcoord. | |
vector float | sample_texture_bilinear (uint unit, vector float texcoord) |
static void get_four_texels | ( | uint | unit, | |
vec_uint4 | x, | |||
vec_uint4 | y, | |||
vec_uint4 * | texels | |||
) | [static] |
Get four texels from locations (x[0], y[0]), (x[1], y[1]) .
..
NOTE: in the typical case of bilinear filtering, the four texels are in a 2x2 group so we could get by with just two dcache fetches (two side-by-side texels per fetch). But when bilinear filtering wraps around a texture edge, we'll probably need code like we have now. FURTHERMORE: since we're rasterizing a quad of 2x2 pixels at a time, it's quite likely that the four pixels in a quad will need some of the same texels. So look into doing texture fetches for four pixels at a time.
Definition at line 91 of file spu_texture.c.
References offset(), spu, spu_dcache_fetch_unaligned(), spu_texture::start, spu_global::texture, and spu_texture::tiles_per_row.
00092 { 00093 const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; 00094 vec_uint4 tile_x = spu_rlmask(x, -5); 00095 vec_uint4 tile_y = spu_rlmask(y, -5); 00096 const qword offset_x = si_andi((qword) x, 0x1f); 00097 const qword offset_y = si_andi((qword) y, 0x1f); 00098 00099 const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row); 00100 const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t)); 00101 00102 qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x); 00103 tile_offset = si_mpy((qword) tile_offset, tile_size); 00104 00105 qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x); 00106 texel_offset = si_mpyui(texel_offset, 4); 00107 00108 vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset); 00109 00110 spu_dcache_fetch_unaligned((qword *) & texels[0], 00111 texture_ea + spu_extract(offset, 0), 4); 00112 spu_dcache_fetch_unaligned((qword *) & texels[1], 00113 texture_ea + spu_extract(offset, 1), 4); 00114 spu_dcache_fetch_unaligned((qword *) & texels[2], 00115 texture_ea + spu_extract(offset, 2), 4); 00116 spu_dcache_fetch_unaligned((qword *) & texels[3], 00117 texture_ea + spu_extract(offset, 3), 4); 00118 }
XXX look into getting texels for all four pixels in a quad at once.
Definition at line 55 of file spu_texture.c.
References spu, spu_dcache_fetch_unaligned(), spu_texture::start, spu_global::texture, TILE_SIZE, and spu_texture::tiles_per_row.
00056 { 00057 /* 00058 * XXX we could do the "/ TILE_SIZE" and "% TILE_SIZE" operations as 00059 * SIMD since X and Y are already in a SIMD register. 00060 */ 00061 const unsigned texture_ea = (uintptr_t) spu.texture[unit].start; 00062 ushort x = spu_extract(coordinate, 0); 00063 ushort y = spu_extract(coordinate, 1); 00064 unsigned tile_offset = sizeof(tile_t) 00065 * ((y / TILE_SIZE * spu.texture[unit].tiles_per_row) + (x / TILE_SIZE)); 00066 ushort texel_offset = (ushort) 4 00067 * (ushort) (((ushort) (y % TILE_SIZE) * (ushort) TILE_SIZE) + (x % TILE_SIZE)); 00068 vec_uint4 tmp; 00069 00070 spu_dcache_fetch_unaligned((qword *) & tmp, 00071 texture_ea + tile_offset + texel_offset, 00072 4); 00073 return spu_extract(tmp, 0); 00074 }
void invalidate_tex_cache | ( | void | ) |
Mark all tex cache entries as invalid.
Definition at line 41 of file spu_texture.c.
References spu_texture::height, spu, spu_dcache_mark_dirty(), spu_texture::start, spu_global::texture, and spu_texture::width.
00042 { 00043 uint unit = 0; 00044 uint bytes = 4 * spu.texture[unit].width 00045 * spu.texture[unit].height; 00046 00047 spu_dcache_mark_dirty((unsigned) spu.texture[unit].start, bytes); 00048 }
vector float sample_texture_bilinear | ( | uint | unit, | |
vector float | texcoord | |||
) |
Definition at line 136 of file spu_texture.c.
References get_four_texels(), spu, spu_unpack_A8R8G8B8(), spu_texture::tex_size, spu_texture::tex_size_x_mask, spu_texture::tex_size_y_mask, and spu_global::texture.
00137 { 00138 static const vec_uint4 offset_x = {0, 0, 1, 1}; 00139 static const vec_uint4 offset_y = {0, 1, 0, 1}; 00140 00141 vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size); 00142 tc = spu_add(tc, spu_splats(-0.5f)); /* half texel bias */ 00143 00144 /* integer texcoords S,T: */ 00145 vec_uint4 itc = spu_convtu(tc, 0); /* convert to int */ 00146 00147 vec_uint4 texels[4]; 00148 00149 /* setup texcoords for quad: 00150 * +-----+-----+ 00151 * |x0,y0|x1,y1| 00152 * +-----+-----+ 00153 * |x2,y2|x3,y3| 00154 * +-----+-----+ 00155 */ 00156 vec_uint4 x = spu_splats(spu_extract(itc, 0)); 00157 vec_uint4 y = spu_splats(spu_extract(itc, 1)); 00158 x = spu_add(x, offset_x); 00159 y = spu_add(y, offset_y); 00160 00161 /* GL_REPEAT wrap mode: */ 00162 x = spu_and(x, spu.texture[unit].tex_size_x_mask); 00163 y = spu_and(y, spu.texture[unit].tex_size_y_mask); 00164 00165 get_four_texels(unit, x, y, texels); 00166 00167 /* integer A8R8G8B8 to float texel conversion */ 00168 vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0)); 00169 vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0)); 00170 vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0)); 00171 vector float texel11 = spu_unpack_A8R8G8B8(spu_extract(texels[3], 0)); 00172 00173 00174 /* Compute weighting factors in [0,1] 00175 * Multiply texcoord by 1024, AND with 1023, convert back to float. 00176 */ 00177 vector float tc1024 = spu_mul(tc, spu_splats(1024.0f)); 00178 vector signed int itc1024 = spu_convts(tc1024, 0); 00179 itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1)); 00180 vector float weight = spu_convtf(itc1024, 10); 00181 00182 /* smeared frac and 1-frac */ 00183 vector float sfrac = spu_splats(spu_extract(weight, 0)); 00184 vector float tfrac = spu_splats(spu_extract(weight, 1)); 00185 vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac); 00186 vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac); 00187 00188 /* multiply the samples (colors) by the S/T weights */ 00189 texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1); 00190 texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1); 00191 texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac ); 00192 texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac ); 00193 00194 /* compute sum of weighted samples */ 00195 vector float texel_sum = spu_add(texel00, texel01); 00196 texel_sum = spu_add(texel_sum, texel10); 00197 texel_sum = spu_add(texel_sum, texel11); 00198 00199 return texel_sum; 00200 }
vector float sample_texture_nearest | ( | uint | unit, | |
vector float | texcoord | |||
) |
Get texture sample at texcoord.
Definition at line 125 of file spu_texture.c.
References get_texel(), spu, spu_unpack_A8R8G8B8(), spu_texture::tex_size, spu_texture::tex_size_mask, and spu_global::texture.
00126 { 00127 vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size); 00128 vector unsigned int itc = spu_convtu(tc, 0); /* convert to int */ 00129 itc = spu_and(itc, spu.texture[unit].tex_size_mask); /* mask (GL_REPEAT) */ 00130 uint texel = get_texel(unit, itc); 00131 return spu_unpack_A8R8G8B8(texel); 00132 }