00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include "pipe/p_compiler.h"
00030 #include "spu_main.h"
00031 #include "spu_texture.h"
00032 #include "spu_tile.h"
00033 #include "spu_colorpack.h"
00034 #include "spu_dcache.h"
00035
00036
00040 void
00041 invalidate_tex_cache(void)
00042 {
00043 uint unit = 0;
00044 uint bytes = 4 * spu.texture[unit].width
00045 * spu.texture[unit].height;
00046
00047 spu_dcache_mark_dirty((unsigned) spu.texture[unit].start, bytes);
00048 }
00049
00050
00054 static uint
00055 get_texel(uint unit, vec_uint4 coordinate)
00056 {
00057
00058
00059
00060
00061 const unsigned texture_ea = (uintptr_t) spu.texture[unit].start;
00062 ushort x = spu_extract(coordinate, 0);
00063 ushort y = spu_extract(coordinate, 1);
00064 unsigned tile_offset = sizeof(tile_t)
00065 * ((y / TILE_SIZE * spu.texture[unit].tiles_per_row) + (x / TILE_SIZE));
00066 ushort texel_offset = (ushort) 4
00067 * (ushort) (((ushort) (y % TILE_SIZE) * (ushort) TILE_SIZE) + (x % TILE_SIZE));
00068 vec_uint4 tmp;
00069
00070 spu_dcache_fetch_unaligned((qword *) & tmp,
00071 texture_ea + tile_offset + texel_offset,
00072 4);
00073 return spu_extract(tmp, 0);
00074 }
00075
00076
00090 static void
00091 get_four_texels(uint unit, vec_uint4 x, vec_uint4 y, vec_uint4 *texels)
00092 {
00093 const unsigned texture_ea = (uintptr_t) spu.texture[unit].start;
00094 vec_uint4 tile_x = spu_rlmask(x, -5);
00095 vec_uint4 tile_y = spu_rlmask(y, -5);
00096 const qword offset_x = si_andi((qword) x, 0x1f);
00097 const qword offset_y = si_andi((qword) y, 0x1f);
00098
00099 const qword tiles_per_row = (qword) spu_splats(spu.texture[unit].tiles_per_row);
00100 const qword tile_size = (qword) spu_splats((unsigned) sizeof(tile_t));
00101
00102 qword tile_offset = si_mpya((qword) tile_y, tiles_per_row, (qword) tile_x);
00103 tile_offset = si_mpy((qword) tile_offset, tile_size);
00104
00105 qword texel_offset = si_a(si_mpyui(offset_y, 32), offset_x);
00106 texel_offset = si_mpyui(texel_offset, 4);
00107
00108 vec_uint4 offset = (vec_uint4) si_a(tile_offset, texel_offset);
00109
00110 spu_dcache_fetch_unaligned((qword *) & texels[0],
00111 texture_ea + spu_extract(offset, 0), 4);
00112 spu_dcache_fetch_unaligned((qword *) & texels[1],
00113 texture_ea + spu_extract(offset, 1), 4);
00114 spu_dcache_fetch_unaligned((qword *) & texels[2],
00115 texture_ea + spu_extract(offset, 2), 4);
00116 spu_dcache_fetch_unaligned((qword *) & texels[3],
00117 texture_ea + spu_extract(offset, 3), 4);
00118 }
00119
00120
00124 vector float
00125 sample_texture_nearest(uint unit, vector float texcoord)
00126 {
00127 vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size);
00128 vector unsigned int itc = spu_convtu(tc, 0);
00129 itc = spu_and(itc, spu.texture[unit].tex_size_mask);
00130 uint texel = get_texel(unit, itc);
00131 return spu_unpack_A8R8G8B8(texel);
00132 }
00133
00134
00135 vector float
00136 sample_texture_bilinear(uint unit, vector float texcoord)
00137 {
00138 static const vec_uint4 offset_x = {0, 0, 1, 1};
00139 static const vec_uint4 offset_y = {0, 1, 0, 1};
00140
00141 vector float tc = spu_mul(texcoord, spu.texture[unit].tex_size);
00142 tc = spu_add(tc, spu_splats(-0.5f));
00143
00144
00145 vec_uint4 itc = spu_convtu(tc, 0);
00146
00147 vec_uint4 texels[4];
00148
00149
00150
00151
00152
00153
00154
00155
00156 vec_uint4 x = spu_splats(spu_extract(itc, 0));
00157 vec_uint4 y = spu_splats(spu_extract(itc, 1));
00158 x = spu_add(x, offset_x);
00159 y = spu_add(y, offset_y);
00160
00161
00162 x = spu_and(x, spu.texture[unit].tex_size_x_mask);
00163 y = spu_and(y, spu.texture[unit].tex_size_y_mask);
00164
00165 get_four_texels(unit, x, y, texels);
00166
00167
00168 vector float texel00 = spu_unpack_A8R8G8B8(spu_extract(texels[0], 0));
00169 vector float texel01 = spu_unpack_A8R8G8B8(spu_extract(texels[1], 0));
00170 vector float texel10 = spu_unpack_A8R8G8B8(spu_extract(texels[2], 0));
00171 vector float texel11 = spu_unpack_A8R8G8B8(spu_extract(texels[3], 0));
00172
00173
00174
00175
00176
00177 vector float tc1024 = spu_mul(tc, spu_splats(1024.0f));
00178 vector signed int itc1024 = spu_convts(tc1024, 0);
00179 itc1024 = spu_and(itc1024, spu_splats((1 << 10) - 1));
00180 vector float weight = spu_convtf(itc1024, 10);
00181
00182
00183 vector float sfrac = spu_splats(spu_extract(weight, 0));
00184 vector float tfrac = spu_splats(spu_extract(weight, 1));
00185 vector float sfrac1 = spu_sub(spu_splats(1.0f), sfrac);
00186 vector float tfrac1 = spu_sub(spu_splats(1.0f), tfrac);
00187
00188
00189 texel00 = spu_mul(spu_mul(texel00, sfrac1), tfrac1);
00190 texel10 = spu_mul(spu_mul(texel10, sfrac ), tfrac1);
00191 texel01 = spu_mul(spu_mul(texel01, sfrac1), tfrac );
00192 texel11 = spu_mul(spu_mul(texel11, sfrac ), tfrac );
00193
00194
00195 vector float texel_sum = spu_add(texel00, texel01);
00196 texel_sum = spu_add(texel_sum, texel10);
00197 texel_sum = spu_add(texel_sum, texel11);
00198
00199 return texel_sum;
00200 }