draw_vs_aos_io.c

Go to the documentation of this file.
00001 /**************************************************************************
00002  * 
00003  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
00004  * All Rights Reserved.
00005  * 
00006  * Permission is hereby granted, free of charge, to any person obtaining a
00007  * copy of this software and associated documentation files (the
00008  * "Software"), to deal in the Software without restriction, including
00009  * without limitation the rights to use, copy, modify, merge, publish,
00010  * distribute, sub license, and/or sell copies of the Software, and to
00011  * permit persons to whom the Software is furnished to do so, subject to
00012  * the following conditions:
00013  * 
00014  * The above copyright notice and this permission notice (including the
00015  * next paragraph) shall be included in all copies or substantial portions
00016  * of the Software.
00017  * 
00018  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00019  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
00021  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
00022  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00023  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00024  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00025  * 
00026  **************************************************************************/
00027 
00028 
00029 #include "util/u_memory.h"
00030 #include "pipe/p_shader_tokens.h"
00031 #include "tgsi/tgsi_parse.h"
00032 #include "tgsi/tgsi_util.h"
00033 #include "tgsi/tgsi_exec.h"
00034 #include "draw_vs.h"
00035 #include "draw_vs_aos.h"
00036 #include "draw_vertex.h"
00037 
00038 #include "rtasm/rtasm_x86sse.h"
00039 
00040 #ifdef PIPE_ARCH_X86
00041 
00042 /* Note - don't yet have to worry about interacting with the code in
00043  * draw_vs_aos.c as there is no intermingling of generated code...
00044  * That may have to change, we'll see.
00045  */
00046 static void emit_load_R32G32B32A32( struct aos_compilation *cp,                            
00047                                     struct x86_reg data,
00048                                     struct x86_reg src_ptr )
00049 {
00050    sse_movups(cp->func, data, src_ptr);
00051 }
00052 
00053 static void emit_load_R32G32B32( struct aos_compilation *cp,                       
00054                                  struct x86_reg data,
00055                                  struct x86_reg src_ptr )
00056 {
00057 #if 1
00058    sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
00059    /* data = z ? ? ? */
00060    sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
00061    /* data = z ? 0 1 */
00062    sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
00063    /* data = ? 0 z 1 */
00064    sse_movlps(cp->func, data, src_ptr);
00065    /* data = x y z 1 */
00066 #else
00067    sse_movups(cp->func, data, src_ptr);
00068    /* data = x y z ? */
00069    sse2_pshufd(cp->func, data, data, SHUF(W,X,Y,Z) );
00070    /* data = ? x y z */
00071    sse_movss(cp->func, data, aos_get_internal_xmm( cp, IMM_ONES ) );
00072    /* data = 1 x y z */
00073    sse2_pshufd(cp->func, data, data, SHUF(Y,Z,W,X) );
00074    /* data = x y z 1 */
00075 #endif
00076 }
00077 
00078 static void emit_load_R32G32( struct aos_compilation *cp, 
00079                            struct x86_reg data,
00080                            struct x86_reg src_ptr )
00081 {
00082    sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
00083    sse_movlps(cp->func, data, src_ptr);
00084 }
00085 
00086 
00087 static void emit_load_R32( struct aos_compilation *cp, 
00088                            struct x86_reg data,
00089                            struct x86_reg src_ptr )
00090 {
00091    sse_movss(cp->func, data, src_ptr);
00092    sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
00093 }
00094 
00095 
00096 static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
00097                                        struct x86_reg data,
00098                                        struct x86_reg src_ptr )
00099 {
00100    sse_movss(cp->func, data, src_ptr);
00101    sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
00102    sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
00103    sse2_cvtdq2ps(cp->func, data, data);
00104    sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
00105 }
00106 
00107 
00108 
00109 /* Extended swizzles?  Maybe later.
00110  */  
00111 static void emit_swizzle( struct aos_compilation *cp,
00112                           struct x86_reg dest,
00113                           struct x86_reg src,
00114                           ubyte shuffle )
00115 {
00116    sse_shufps(cp->func, dest, src, shuffle);
00117 }
00118 
00119 
00120 
00121 static boolean get_buffer_ptr( struct aos_compilation *cp,
00122                                boolean linear,
00123                                unsigned buf_idx,
00124                                struct x86_reg elt,
00125                                struct x86_reg ptr)
00126 {
00127    struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), 
00128                                       buf_idx * sizeof(struct aos_buffer));
00129 
00130    struct x86_reg buf_stride = x86_make_disp(buf, 
00131                                              Offset(struct aos_buffer, stride));
00132    if (linear) {
00133       struct x86_reg buf_ptr = x86_make_disp(buf, 
00134                                              Offset(struct aos_buffer, ptr));
00135 
00136 
00137       /* Calculate pointer to current attrib:
00138        */
00139       x86_mov(cp->func, ptr, buf_ptr);
00140       x86_mov(cp->func, elt, buf_stride);
00141       x86_add(cp->func, elt, ptr);
00142       if (buf_idx == 0) sse_prefetchnta(cp->func, x86_make_disp(elt, 192));
00143       x86_mov(cp->func, buf_ptr, elt);
00144    }
00145    else {
00146       struct x86_reg buf_base_ptr = x86_make_disp(buf, 
00147                                                   Offset(struct aos_buffer, base_ptr));
00148 
00149 
00150       /* Calculate pointer to current attrib:
00151        */
00152       x86_mov(cp->func, ptr, buf_stride);
00153       x86_imul(cp->func, ptr, elt);
00154       x86_add(cp->func, ptr, buf_base_ptr);
00155    }
00156 
00157    cp->insn_counter++;
00158 
00159    return TRUE;
00160 }
00161 
00162 
00163 static boolean load_input( struct aos_compilation *cp,
00164                            unsigned idx,
00165                            struct x86_reg bufptr )
00166 {
00167    unsigned format = cp->vaos->base.key.element[idx].in.format;
00168    unsigned offset = cp->vaos->base.key.element[idx].in.offset;
00169    struct x86_reg dataXMM = aos_get_xmm_reg(cp);
00170 
00171    /* Figure out source pointer address:
00172     */
00173    struct x86_reg src = x86_make_disp(bufptr, offset);
00174 
00175    aos_adopt_xmm_reg( cp,
00176                       dataXMM,
00177                       TGSI_FILE_INPUT,
00178                       idx,
00179                       TRUE );
00180 
00181    switch (format) {
00182    case PIPE_FORMAT_R32_FLOAT:
00183       emit_load_R32(cp, dataXMM, src);
00184       break;
00185    case PIPE_FORMAT_R32G32_FLOAT:
00186       emit_load_R32G32(cp, dataXMM, src);
00187       break;
00188    case PIPE_FORMAT_R32G32B32_FLOAT:
00189       emit_load_R32G32B32(cp, dataXMM, src);
00190       break;
00191    case PIPE_FORMAT_R32G32B32A32_FLOAT:
00192       emit_load_R32G32B32A32(cp, dataXMM, src);
00193       break;
00194    case PIPE_FORMAT_B8G8R8A8_UNORM:
00195       emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
00196       emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
00197       break;
00198    case PIPE_FORMAT_R8G8B8A8_UNORM:
00199       emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
00200       break;
00201    default:
00202       ERROR(cp, "unhandled input format");
00203       return FALSE;
00204    }
00205 
00206    return TRUE;
00207 }
00208 
00209 static boolean load_inputs( struct aos_compilation *cp,
00210                             unsigned buffer,
00211                             struct x86_reg ptr )
00212 {
00213    unsigned i;
00214 
00215    for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) {
00216       if (cp->vaos->base.key.element[i].in.buffer == buffer) {
00217 
00218          if (!load_input( cp, i, ptr ))
00219             return FALSE;
00220 
00221          cp->insn_counter++;
00222       }
00223    }
00224    
00225    return TRUE;
00226 }
00227 
00228 boolean aos_init_inputs( struct aos_compilation *cp, boolean linear )
00229 {
00230    unsigned i;
00231    for (i = 0; i < cp->vaos->nr_vb; i++) {
00232       struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), 
00233                                          i * sizeof(struct aos_buffer));
00234 
00235       struct x86_reg buf_base_ptr = x86_make_disp(buf, 
00236                                                   Offset(struct aos_buffer, base_ptr));
00237 
00238       if (cp->vaos->base.key.const_vbuffers & (1<<i)) {
00239          struct x86_reg ptr = cp->tmp_EAX;
00240 
00241          x86_mov(cp->func, ptr, buf_base_ptr);
00242 
00243          /* Load all inputs for this constant vertex buffer
00244           */
00245          load_inputs( cp, i, x86_deref(ptr) );
00246          
00247          /* Then just force them out to aos_machine.input[]
00248           */
00249          aos_spill_all( cp );
00250 
00251       }
00252       else if (linear) {
00253 
00254          struct x86_reg elt = cp->idx_EBX;
00255          struct x86_reg ptr = cp->tmp_EAX;
00256 
00257          struct x86_reg buf_stride = x86_make_disp(buf, 
00258                                                    Offset(struct aos_buffer, stride));
00259 
00260          struct x86_reg buf_ptr = x86_make_disp(buf, 
00261                                                 Offset(struct aos_buffer, ptr));
00262 
00263 
00264          /* Calculate pointer to current attrib:
00265           */
00266          x86_mov(cp->func, ptr, buf_stride);
00267          x86_imul(cp->func, ptr, elt);
00268          x86_add(cp->func, ptr, buf_base_ptr);
00269 
00270 
00271          /* In the linear case, keep the buffer pointer instead of the
00272           * index number.
00273           */
00274          if (cp->vaos->nr_vb == 1) 
00275             x86_mov( cp->func, elt, ptr );
00276          else
00277             x86_mov( cp->func, buf_ptr, ptr );
00278 
00279          cp->insn_counter++;
00280       }
00281    }
00282 
00283    return TRUE;
00284 }
00285 
00286 boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
00287 {
00288    unsigned j;
00289 
00290    for (j = 0; j < cp->vaos->nr_vb; j++) {
00291       if (cp->vaos->base.key.const_vbuffers & (1<<j)) {
00292          /* just retreive pre-transformed input */
00293       }
00294       else if (linear && cp->vaos->nr_vb == 1) {
00295          load_inputs( cp, 0, cp->idx_EBX );
00296       }
00297       else {
00298          struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX);
00299          struct x86_reg ptr = cp->tmp_EAX;
00300 
00301          if (!get_buffer_ptr( cp, linear, j, elt, ptr ))
00302             return FALSE;
00303 
00304          if (!load_inputs( cp, j, ptr ))
00305             return FALSE;
00306       }
00307    }
00308 
00309    return TRUE;
00310 }
00311 
00312 boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear )
00313 {
00314    if (linear && cp->vaos->nr_vb == 1) {
00315       struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), 
00316                                             (0 * sizeof(struct aos_buffer) + 
00317                                              Offset(struct aos_buffer, stride)));
00318 
00319       x86_add(cp->func, cp->idx_EBX, stride);
00320       sse_prefetchnta(cp->func, x86_make_disp(cp->idx_EBX, 192));
00321    }
00322    else if (linear) {
00323       /* Nothing to do */
00324    } 
00325    else {
00326       x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4));
00327    }
00328 
00329    return TRUE;
00330 }
00331 
00332 
00333 
00334 
00335 
00336 
00337 static void emit_store_R32G32B32A32( struct aos_compilation *cp,                           
00338                                      struct x86_reg dst_ptr,
00339                                      struct x86_reg dataXMM )
00340 {
00341    sse_movups(cp->func, dst_ptr, dataXMM);
00342 }
00343 
00344 static void emit_store_R32G32B32( struct aos_compilation *cp, 
00345                                   struct x86_reg dst_ptr,
00346                                   struct x86_reg dataXMM )
00347 {
00348    sse_movlps(cp->func, dst_ptr, dataXMM);
00349    sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
00350    sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM);
00351 }
00352 
00353 static void emit_store_R32G32( struct aos_compilation *cp, 
00354                                struct x86_reg dst_ptr,
00355                                struct x86_reg dataXMM )
00356 {
00357    sse_movlps(cp->func, dst_ptr, dataXMM);
00358 }
00359 
00360 static void emit_store_R32( struct aos_compilation *cp, 
00361                             struct x86_reg dst_ptr,
00362                             struct x86_reg dataXMM )
00363 {
00364    sse_movss(cp->func, dst_ptr, dataXMM);
00365 }
00366 
00367 
00368 
00369 static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
00370                                        struct x86_reg dst_ptr,
00371                                        struct x86_reg dataXMM )
00372 {
00373    sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255));
00374    sse2_cvtps2dq(cp->func, dataXMM, dataXMM);
00375    sse2_packssdw(cp->func, dataXMM, dataXMM);
00376    sse2_packuswb(cp->func, dataXMM, dataXMM);
00377    sse_movss(cp->func, dst_ptr, dataXMM);
00378 }
00379 
00380 
00381 
00382 
00383 
00384 static boolean emit_output( struct aos_compilation *cp,
00385                             struct x86_reg ptr,
00386                             struct x86_reg dataXMM, 
00387                             unsigned format )
00388 {
00389    switch (format) {
00390    case EMIT_1F:
00391    case EMIT_1F_PSIZE:
00392       emit_store_R32(cp, ptr, dataXMM);
00393       break;
00394    case EMIT_2F:
00395       emit_store_R32G32(cp, ptr, dataXMM);
00396       break;
00397    case EMIT_3F:
00398       emit_store_R32G32B32(cp, ptr, dataXMM);
00399       break;
00400    case EMIT_4F:
00401       emit_store_R32G32B32A32(cp, ptr, dataXMM);
00402       break;
00403    case EMIT_4UB:
00404       if (1) {
00405          emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
00406          emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
00407       }
00408       else {
00409          emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
00410       }
00411       break;
00412    default:
00413       ERROR(cp, "unhandled output format");
00414       return FALSE;
00415    }
00416 
00417    return TRUE;
00418 }
00419 
00420 
00421 
00422 boolean aos_emit_outputs( struct aos_compilation *cp )
00423 {
00424    unsigned i;
00425    
00426    for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) {
00427       unsigned format = cp->vaos->base.key.element[i].out.format;
00428       unsigned offset = cp->vaos->base.key.element[i].out.offset;
00429       unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output;
00430 
00431       struct x86_reg data;
00432 
00433       if (format == EMIT_1F_PSIZE) {
00434          data = aos_get_internal_xmm( cp, IMM_PSIZE );
00435       }
00436       else {
00437          data = aos_get_shader_reg( cp, 
00438                                     TGSI_FILE_OUTPUT,
00439                                     vs_output );
00440       }
00441 
00442       if (data.file != file_XMM) {
00443          struct x86_reg tmp = aos_get_xmm_reg( cp );
00444          sse_movaps(cp->func, tmp, data);
00445          data = tmp;
00446       }
00447       
00448       if (!emit_output( cp, 
00449                         x86_make_disp( cp->outbuf_ECX, offset ),
00450                         data, 
00451                         format ))
00452          return FALSE;
00453 
00454       aos_release_xmm_reg( cp, data.idx );
00455 
00456       cp->insn_counter++;
00457    }
00458 
00459    return TRUE;
00460 }
00461 
00462 #endif

Generated on Tue Sep 29 06:25:14 2009 for Gallium3D by  doxygen 1.5.4