brw_vs_emit.c

Go to the documentation of this file.
00001 /*
00002  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
00003  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
00004  develop this 3D driver.
00005 
00006  Permission is hereby granted, free of charge, to any person obtaining
00007  a copy of this software and associated documentation files (the
00008  "Software"), to deal in the Software without restriction, including
00009  without limitation the rights to use, copy, modify, merge, publish,
00010  distribute, sublicense, and/or sell copies of the Software, and to
00011  permit persons to whom the Software is furnished to do so, subject to
00012  the following conditions:
00013 
00014  The above copyright notice and this permission notice (including the
00015  next paragraph) shall be included in all copies or substantial
00016  portions of the Software.
00017 
00018  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00019  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
00021  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
00022  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
00023  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
00024  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00025 
00026  **********************************************************************/
00027  /*
00028   * Authors:
00029   *   Keith Whitwell <keith@tungstengraphics.com>
00030   */
00031 
00032 #include "brw_context.h"
00033 #include "brw_vs.h"
00034 
00035 #include "pipe/p_shader_tokens.h"
00036 #include "tgsi/tgsi_parse.h"
00037 
00038 struct brw_prog_info {
00039    unsigned num_temps;
00040    unsigned num_addrs;
00041    unsigned num_consts;
00042 
00043    unsigned writes_psize;
00044 
00045    unsigned pos_idx;
00046    unsigned result_edge_idx;
00047    unsigned edge_flag_idx;
00048    unsigned psize_idx;
00049 };
00050 
00051 /* Do things as simply as possible.  Allocate and populate all regs
00052  * ahead of time.
00053  */
00054 static void brw_vs_alloc_regs( struct brw_vs_compile *c,
00055                                struct brw_prog_info *info )
00056 {
00057    unsigned i, reg = 0, mrf;
00058    unsigned nr_params;
00059 
00060    /* r0 -- reserved as usual
00061     */
00062    c->r0 = brw_vec8_grf(reg, 0); reg++;
00063 
00064    /* User clip planes from curbe:
00065     */
00066    if (c->key.nr_userclip) {
00067       for (i = 0; i < c->key.nr_userclip; i++) {
00068          c->userplane[i] = stride( brw_vec4_grf(reg+3+i/2, (i%2) * 4), 0, 4, 1);
00069       }
00070 
00071       /* Deal with curbe alignment:
00072        */
00073       reg += ((6+c->key.nr_userclip+3)/4)*2;
00074    }
00075 
00076    /* Vertex program parameters from curbe:
00077     */
00078    nr_params = c->prog_data.max_const;
00079    for (i = 0; i < nr_params; i++) {
00080       c->regs[TGSI_FILE_CONSTANT][i] = stride(brw_vec4_grf(reg+i/2, (i%2) * 4), 0, 4, 1);
00081    }
00082    reg += (nr_params+1)/2;
00083    c->prog_data.curb_read_length = reg - 1;
00084 
00085 
00086 
00087    /* Allocate input regs:
00088     */
00089    c->nr_inputs = c->vp->info.num_inputs;
00090    for (i = 0; i < c->nr_inputs; i++) {
00091          c->regs[TGSI_FILE_INPUT][i] = brw_vec8_grf(reg, 0);
00092          reg++;
00093    }
00094 
00095 
00096    /* Allocate outputs: TODO: could organize the non-position outputs
00097     * to go straight into message regs.
00098     */
00099    c->nr_outputs = 0;
00100    c->first_output = reg;
00101    mrf = 4;
00102    for (i = 0; i < c->vp->info.num_outputs; i++) {
00103       c->nr_outputs++;
00104 #if 0
00105       if (i == VERT_RESULT_HPOS) {
00106          c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
00107          reg++;
00108       }
00109       else if (i == VERT_RESULT_PSIZ) {
00110          c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
00111          reg++;
00112          mrf++;         /* just a placeholder?  XXX fix later stages & remove this */
00113       }
00114       else {
00115          c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
00116          mrf++;
00117       }
00118 #else
00119       /*treat pos differently for now */
00120       if (i == info->pos_idx) {
00121          c->regs[TGSI_FILE_OUTPUT][i] = brw_vec8_grf(reg, 0);
00122          reg++;
00123       } else {
00124          c->regs[TGSI_FILE_OUTPUT][i] = brw_message_reg(mrf);
00125          mrf++;
00126       }
00127 #endif
00128    }
00129 
00130    /* Allocate program temporaries:
00131     */
00132    for (i = 0; i < info->num_temps; i++) {
00133       c->regs[TGSI_FILE_TEMPORARY][i] = brw_vec8_grf(reg, 0);
00134       reg++;
00135    }
00136 
00137    /* Address reg(s).  Don't try to use the internal address reg until
00138     * deref time.
00139     */
00140    for (i = 0; i < info->num_addrs; i++) {
00141       c->regs[TGSI_FILE_ADDRESS][i] =  brw_reg(BRW_GENERAL_REGISTER_FILE,
00142                                                reg,
00143                                                0,
00144                                                BRW_REGISTER_TYPE_D,
00145                                                BRW_VERTICAL_STRIDE_8,
00146                                                BRW_WIDTH_8,
00147                                                BRW_HORIZONTAL_STRIDE_1,
00148                                                BRW_SWIZZLE_XXXX,
00149                                                TGSI_WRITEMASK_X);
00150       reg++;
00151    }
00152 
00153    for (i = 0; i < 128; i++) {
00154       if (c->output_regs[i].used_in_src) {
00155          c->output_regs[i].reg = brw_vec8_grf(reg, 0);
00156          reg++;
00157       }
00158    }
00159 
00160    c->stack =  brw_uw16_reg(BRW_GENERAL_REGISTER_FILE, reg, 0);
00161    reg += 2;
00162 
00163 
00164    /* Some opcodes need an internal temporary:
00165     */
00166    c->first_tmp = reg;
00167    c->last_tmp = reg;           /* for allocation purposes */
00168 
00169    /* Each input reg holds data from two vertices.  The
00170     * urb_read_length is the number of registers read from *each*
00171     * vertex urb, so is half the amount:
00172     */
00173    c->prog_data.urb_read_length = (c->nr_inputs+1)/2;
00174 
00175    c->prog_data.urb_entry_size = (c->nr_outputs+2+3)/4;
00176    c->prog_data.total_grf = reg;
00177 }
00178 
00179 
00180 static struct brw_reg get_tmp( struct brw_vs_compile *c )
00181 {
00182    struct brw_reg tmp = brw_vec8_grf(c->last_tmp, 0);
00183 
00184    if (++c->last_tmp > c->prog_data.total_grf)
00185       c->prog_data.total_grf = c->last_tmp;
00186 
00187    return tmp;
00188 }
00189 
00190 static void release_tmp( struct brw_vs_compile *c, struct brw_reg tmp )
00191 {
00192    if (tmp.nr == c->last_tmp-1)
00193       c->last_tmp--;
00194 }
00195 
00196 static void release_tmps( struct brw_vs_compile *c )
00197 {
00198    c->last_tmp = c->first_tmp;
00199 }
00200 
00201 
00202 static void unalias1( struct brw_vs_compile *c,
00203                       struct brw_reg dst,
00204                       struct brw_reg arg0,
00205                       void (*func)( struct brw_vs_compile *,
00206                                     struct brw_reg,
00207                                     struct brw_reg ))
00208 {
00209    if (dst.file == arg0.file && dst.nr == arg0.nr) {
00210       struct brw_compile *p = &c->func;
00211       struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
00212       func(c, tmp, arg0);
00213       brw_MOV(p, dst, tmp);
00214    }
00215    else {
00216       func(c, dst, arg0);
00217    }
00218 }
00219 
00220 static void unalias2( struct brw_vs_compile *c,
00221                       struct brw_reg dst,
00222                       struct brw_reg arg0,
00223                       struct brw_reg arg1,
00224                       void (*func)( struct brw_vs_compile *,
00225                                     struct brw_reg,
00226                                     struct brw_reg,
00227                                     struct brw_reg ))
00228 {
00229    if ((dst.file == arg0.file && dst.nr == arg0.nr) ||
00230        (dst.file == arg1.file && dst.nr == arg1.nr)) {
00231       struct brw_compile *p = &c->func;
00232       struct brw_reg tmp = brw_writemask(get_tmp(c), dst.dw1.bits.writemask);
00233       func(c, tmp, arg0, arg1);
00234       brw_MOV(p, dst, tmp);
00235    }
00236    else {
00237       func(c, dst, arg0, arg1);
00238    }
00239 }
00240 
00241 static void emit_sop( struct brw_compile *p,
00242                       struct brw_reg dst,
00243                       struct brw_reg arg0,
00244                       struct brw_reg arg1,
00245                       unsigned cond)
00246 {
00247    brw_push_insn_state(p);
00248    brw_CMP(p, brw_null_reg(), cond, arg0, arg1);
00249    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
00250    brw_MOV(p, dst, brw_imm_f(1.0f));
00251    brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
00252    brw_MOV(p, dst, brw_imm_f(0.0f));
00253    brw_pop_insn_state(p);
00254 }
00255 
00256 static void emit_seq( struct brw_compile *p,
00257                       struct brw_reg dst,
00258                       struct brw_reg arg0,
00259                       struct brw_reg arg1 )
00260 {
00261    emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_EQ);
00262 }
00263 
00264 static void emit_sne( struct brw_compile *p,
00265                       struct brw_reg dst,
00266                       struct brw_reg arg0,
00267                       struct brw_reg arg1 )
00268 {
00269    emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_NEQ);
00270 }
00271 static void emit_slt( struct brw_compile *p,
00272                       struct brw_reg dst,
00273                       struct brw_reg arg0,
00274                       struct brw_reg arg1 )
00275 {
00276    emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_L);
00277 }
00278 
00279 static void emit_sle( struct brw_compile *p,
00280                       struct brw_reg dst,
00281                       struct brw_reg arg0,
00282                       struct brw_reg arg1 )
00283 {
00284    emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_LE);
00285 }
00286 
00287 static void emit_sgt( struct brw_compile *p,
00288                       struct brw_reg dst,
00289                       struct brw_reg arg0,
00290                       struct brw_reg arg1 )
00291 {
00292    emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_G);
00293 }
00294 
00295 static void emit_sge( struct brw_compile *p,
00296                       struct brw_reg dst,
00297                       struct brw_reg arg0,
00298                       struct brw_reg arg1 )
00299 {
00300   emit_sop(p, dst, arg0, arg1, BRW_CONDITIONAL_GE);
00301 }
00302 
00303 static void emit_max( struct brw_compile *p,
00304                       struct brw_reg dst,
00305                       struct brw_reg arg0,
00306                       struct brw_reg arg1 )
00307 {
00308    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
00309    brw_SEL(p, dst, arg1, arg0);
00310    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
00311 }
00312 
00313 static void emit_min( struct brw_compile *p,
00314                       struct brw_reg dst,
00315                       struct brw_reg arg0,
00316                       struct brw_reg arg1 )
00317 {
00318    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_L, arg0, arg1);
00319    brw_SEL(p, dst, arg0, arg1);
00320    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
00321 }
00322 
00323 
00324 static void emit_math1( struct brw_vs_compile *c,
00325                         unsigned function,
00326                         struct brw_reg dst,
00327                         struct brw_reg arg0,
00328                         unsigned precision)
00329 {
00330    /* There are various odd behaviours with SEND on the simulator.  In
00331     * addition there are documented issues with the fact that the GEN4
00332     * processor doesn't do dependency control properly on SEND
00333     * results.  So, on balance, this kludge to get around failures
00334     * with writemasked math results looks like it might be necessary
00335     * whether that turns out to be a simulator bug or not:
00336     */
00337    struct brw_compile *p = &c->func;
00338    struct brw_reg tmp = dst;
00339    boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
00340                          dst.file != BRW_GENERAL_REGISTER_FILE);
00341 
00342    if (need_tmp)
00343       tmp = get_tmp(c);
00344 
00345    brw_math(p,
00346             tmp,
00347             function,
00348             BRW_MATH_SATURATE_NONE,
00349             2,
00350             arg0,
00351             BRW_MATH_DATA_SCALAR,
00352             precision);
00353 
00354    if (need_tmp) {
00355       brw_MOV(p, dst, tmp);
00356       release_tmp(c, tmp);
00357    }
00358 }
00359 
00360 static void emit_math2( struct brw_vs_compile *c,
00361                         unsigned function,
00362                         struct brw_reg dst,
00363                         struct brw_reg arg0,
00364                         struct brw_reg arg1,
00365                         unsigned precision)
00366 {
00367    struct brw_compile *p = &c->func;
00368    struct brw_reg tmp = dst;
00369    boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
00370                          dst.file != BRW_GENERAL_REGISTER_FILE);
00371 
00372    if (need_tmp)
00373       tmp = get_tmp(c);
00374 
00375    brw_MOV(p, brw_message_reg(3), arg1);
00376 
00377    brw_math(p,
00378             tmp,
00379             function,
00380             BRW_MATH_SATURATE_NONE,
00381             2,
00382             arg0,
00383             BRW_MATH_DATA_SCALAR,
00384             precision);
00385 
00386    if (need_tmp) {
00387       brw_MOV(p, dst, tmp);
00388       release_tmp(c, tmp);
00389    }
00390 }
00391 
00392 
00393 
00394 static void emit_exp_noalias( struct brw_vs_compile *c,
00395                               struct brw_reg dst,
00396                               struct brw_reg arg0 )
00397 {
00398    struct brw_compile *p = &c->func;
00399 
00400 
00401    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X) {
00402       struct brw_reg tmp = get_tmp(c);
00403       struct brw_reg tmp_d = retype(tmp, BRW_REGISTER_TYPE_D);
00404 
00405       /* tmp_d = floor(arg0.x) */
00406       brw_RNDD(p, tmp_d, brw_swizzle1(arg0, 0));
00407 
00408       /* result[0] = 2.0 ^ tmp */
00409 
00410       /* Adjust exponent for floating point:
00411        * exp += 127
00412        */
00413       brw_ADD(p, brw_writemask(tmp_d, TGSI_WRITEMASK_X), tmp_d, brw_imm_d(127));
00414 
00415       /* Install exponent and sign.
00416        * Excess drops off the edge:
00417        */
00418       brw_SHL(p, brw_writemask(retype(dst, BRW_REGISTER_TYPE_D), TGSI_WRITEMASK_X),
00419               tmp_d, brw_imm_d(23));
00420 
00421       release_tmp(c, tmp);
00422    }
00423 
00424    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y) {
00425       /* result[1] = arg0.x - floor(arg0.x) */
00426       brw_FRC(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0, 0));
00427    }
00428 
00429    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) {
00430       /* As with the LOG instruction, we might be better off just
00431        * doing a taylor expansion here, seeing as we have to do all
00432        * the prep work.
00433        *
00434        * If mathbox partial precision is too low, consider also:
00435        * result[3] = result[0] * EXP(result[1])
00436        */
00437       emit_math1(c,
00438                  BRW_MATH_FUNCTION_EXP,
00439                  brw_writemask(dst, TGSI_WRITEMASK_Z),
00440                  brw_swizzle1(arg0, 0),
00441                  BRW_MATH_PRECISION_PARTIAL);
00442    }
00443 
00444    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) {
00445       /* result[3] = 1.0; */
00446       brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), brw_imm_f(1));
00447    }
00448 }
00449 
00450 
00451 static void emit_log_noalias( struct brw_vs_compile *c,
00452                               struct brw_reg dst,
00453                               struct brw_reg arg0 )
00454 {
00455    struct brw_compile *p = &c->func;
00456    struct brw_reg tmp = dst;
00457    struct brw_reg tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
00458    struct brw_reg arg0_ud = retype(arg0, BRW_REGISTER_TYPE_UD);
00459    boolean need_tmp = (dst.dw1.bits.writemask != 0xf ||
00460                          dst.file != BRW_GENERAL_REGISTER_FILE);
00461 
00462    if (need_tmp) {
00463       tmp = get_tmp(c);
00464       tmp_ud = retype(tmp, BRW_REGISTER_TYPE_UD);
00465    }
00466 
00467    /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
00468     * according to spec:
00469     *
00470     * These almost look likey they could be joined up, but not really
00471     * practical:
00472     *
00473     * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
00474     * result[1].i = (x.i & ((1<<23)-1)        + (127<<23)
00475     */
00476    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_XZ) {
00477       brw_AND(p,
00478               brw_writemask(tmp_ud, TGSI_WRITEMASK_X),
00479               brw_swizzle1(arg0_ud, 0),
00480               brw_imm_ud((1U<<31)-1));
00481 
00482       brw_SHR(p,
00483               brw_writemask(tmp_ud, TGSI_WRITEMASK_X),
00484               tmp_ud,
00485               brw_imm_ud(23));
00486 
00487       brw_ADD(p,
00488               brw_writemask(tmp, TGSI_WRITEMASK_X),
00489               retype(tmp_ud, BRW_REGISTER_TYPE_D),      /* does it matter? */
00490               brw_imm_d(-127));
00491    }
00492 
00493    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_YZ) {
00494       brw_AND(p,
00495               brw_writemask(tmp_ud, TGSI_WRITEMASK_Y),
00496               brw_swizzle1(arg0_ud, 0),
00497               brw_imm_ud((1<<23)-1));
00498 
00499       brw_OR(p,
00500              brw_writemask(tmp_ud, TGSI_WRITEMASK_Y),
00501              tmp_ud,
00502              brw_imm_ud(127<<23));
00503    }
00504 
00505    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z) {
00506       /* result[2] = result[0] + LOG2(result[1]); */
00507 
00508       /* Why bother?  The above is just a hint how to do this with a
00509        * taylor series.  Maybe we *should* use a taylor series as by
00510        * the time all the above has been done it's almost certainly
00511        * quicker than calling the mathbox, even with low precision.
00512        *
00513        * Options are:
00514        *    - result[0] + mathbox.LOG2(result[1])
00515        *    - mathbox.LOG2(arg0.x)
00516        *    - result[0] + inline_taylor_approx(result[1])
00517        */
00518       emit_math1(c,
00519                  BRW_MATH_FUNCTION_LOG,
00520                  brw_writemask(tmp, TGSI_WRITEMASK_Z),
00521                  brw_swizzle1(tmp, 1),
00522                  BRW_MATH_PRECISION_FULL);
00523 
00524       brw_ADD(p,
00525               brw_writemask(tmp, TGSI_WRITEMASK_Z),
00526               brw_swizzle1(tmp, 2),
00527               brw_swizzle1(tmp, 0));
00528    }
00529 
00530    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W) {
00531       /* result[3] = 1.0; */
00532       brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_W), brw_imm_f(1));
00533    }
00534 
00535    if (need_tmp) {
00536       brw_MOV(p, dst, tmp);
00537       release_tmp(c, tmp);
00538    }
00539 }
00540 
00541 
00542 
00543 
00544 /* Need to unalias - consider swizzles:   r0 = DST r0.xxxx r1
00545  */
00546 static void emit_dst_noalias( struct brw_vs_compile *c,
00547                               struct brw_reg dst,
00548                               struct brw_reg arg0,
00549                               struct brw_reg arg1)
00550 {
00551    struct brw_compile *p = &c->func;
00552 
00553    /* There must be a better way to do this:
00554     */
00555    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_X)
00556       brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_X), brw_imm_f(1.0));
00557    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Y)
00558       brw_MUL(p, brw_writemask(dst, TGSI_WRITEMASK_Y), arg0, arg1);
00559    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_Z)
00560       brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Z), arg0);
00561    if (dst.dw1.bits.writemask & TGSI_WRITEMASK_W)
00562       brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_W), arg1);
00563 }
00564 
00565 static void emit_xpd( struct brw_compile *p,
00566                       struct brw_reg dst,
00567                       struct brw_reg t,
00568                       struct brw_reg u)
00569 {
00570    brw_MUL(p, brw_null_reg(), brw_swizzle(t, 1,2,0,3),  brw_swizzle(u,2,0,1,3));
00571    brw_MAC(p, dst,     negate(brw_swizzle(t, 2,0,1,3)), brw_swizzle(u,1,2,0,3));
00572 }
00573 
00574 
00575 
00576 static void emit_lit_noalias( struct brw_vs_compile *c,
00577                               struct brw_reg dst,
00578                               struct brw_reg arg0 )
00579 {
00580    struct brw_compile *p = &c->func;
00581    struct brw_instruction *if_insn;
00582    struct brw_reg tmp = dst;
00583    boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
00584 
00585    if (need_tmp)
00586       tmp = get_tmp(c);
00587 
00588    brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_YZ), brw_imm_f(0));
00589    brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_XW), brw_imm_f(1));
00590 
00591    /* Need to use BRW_EXECUTE_8 and also do an 8-wide compare in order
00592     * to get all channels active inside the IF.  In the clipping code
00593     * we run with NoMask, so it's not an option and we can use
00594     * BRW_EXECUTE_1 for all comparisions.
00595     */
00596    brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,0), brw_imm_f(0));
00597    if_insn = brw_IF(p, BRW_EXECUTE_8);
00598    {
00599       brw_MOV(p, brw_writemask(dst, TGSI_WRITEMASK_Y), brw_swizzle1(arg0,0));
00600 
00601       brw_CMP(p, brw_null_reg(), BRW_CONDITIONAL_G, brw_swizzle1(arg0,1), brw_imm_f(0));
00602       brw_MOV(p, brw_writemask(tmp, TGSI_WRITEMASK_Z),  brw_swizzle1(arg0,1));
00603       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
00604 
00605       emit_math2(c,
00606                  BRW_MATH_FUNCTION_POW,
00607                  brw_writemask(dst, TGSI_WRITEMASK_Z),
00608                  brw_swizzle1(tmp, 2),
00609                  brw_swizzle1(arg0, 3),
00610                  BRW_MATH_PRECISION_PARTIAL);
00611    }
00612 
00613    brw_ENDIF(p, if_insn);
00614 }
00615 
00616 
00617 
00618 
00619 
00620 /* TODO: relative addressing!
00621  */
00622 static struct brw_reg get_reg( struct brw_vs_compile *c,
00623                                unsigned file,
00624                                unsigned index )
00625 {
00626    switch (file) {
00627    case TGSI_FILE_TEMPORARY:
00628    case TGSI_FILE_INPUT:
00629    case TGSI_FILE_OUTPUT:
00630       assert(c->regs[file][index].nr != 0);
00631       return c->regs[file][index];
00632    case TGSI_FILE_CONSTANT:
00633       assert(c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm].nr != 0);
00634       return c->regs[TGSI_FILE_CONSTANT][index + c->prog_data.num_imm];
00635    case TGSI_FILE_IMMEDIATE:
00636       assert(c->regs[TGSI_FILE_CONSTANT][index].nr != 0);
00637       return c->regs[TGSI_FILE_CONSTANT][index];
00638    case TGSI_FILE_ADDRESS:
00639       assert(index == 0);
00640       return c->regs[file][index];
00641 
00642    case TGSI_FILE_NULL:                 /* undef values */
00643       return brw_null_reg();
00644 
00645    default:
00646       assert(0);
00647       return brw_null_reg();
00648    }
00649 }
00650 
00651 
00652 
00653 static struct brw_reg deref( struct brw_vs_compile *c,
00654                              struct brw_reg arg,
00655                              int offset)
00656 {
00657    struct brw_compile *p = &c->func;
00658    struct brw_reg tmp = vec4(get_tmp(c));
00659    struct brw_reg vp_address = retype(vec1(get_reg(c, TGSI_FILE_ADDRESS, 0)), BRW_REGISTER_TYPE_UW);
00660    unsigned byte_offset = arg.nr * 32 + arg.subnr + offset * 16;
00661    struct brw_reg indirect = brw_vec4_indirect(0,0);
00662 
00663    {
00664       brw_push_insn_state(p);
00665       brw_set_access_mode(p, BRW_ALIGN_1);
00666 
00667       /* This is pretty clunky - load the address register twice and
00668        * fetch each 4-dword value in turn.  There must be a way to do
00669        * this in a single pass, but I couldn't get it to work.
00670        */
00671       brw_ADD(p, brw_address_reg(0), vp_address, brw_imm_d(byte_offset));
00672       brw_MOV(p, tmp, indirect);
00673 
00674       brw_ADD(p, brw_address_reg(0), suboffset(vp_address, 8), brw_imm_d(byte_offset));
00675       brw_MOV(p, suboffset(tmp, 4), indirect);
00676 
00677       brw_pop_insn_state(p);
00678    }
00679 
00680    return vec8(tmp);
00681 }
00682 
00683 
00684 static void emit_arl( struct brw_vs_compile *c,
00685                       struct brw_reg dst,
00686                       struct brw_reg arg0 )
00687 {
00688    struct brw_compile *p = &c->func;
00689    struct brw_reg tmp = dst;
00690    boolean need_tmp = (dst.file != BRW_GENERAL_REGISTER_FILE);
00691 
00692    if (need_tmp)
00693       tmp = get_tmp(c);
00694 
00695    brw_RNDD(p, tmp, arg0);
00696    brw_MUL(p, dst, tmp, brw_imm_d(16));
00697 
00698    if (need_tmp)
00699       release_tmp(c, tmp);
00700 }
00701 
00702 
00703 /* Will return mangled results for SWZ op.  The emit_swz() function
00704  * ignores this result and recalculates taking extended swizzles into
00705  * account.
00706  */
00707 static struct brw_reg get_arg( struct brw_vs_compile *c,
00708                                struct tgsi_src_register *src )
00709 {
00710    struct brw_reg reg;
00711 
00712    if (src->File == TGSI_FILE_NULL)
00713       return brw_null_reg();
00714 
00715 #if 0
00716    if (src->RelAddr)
00717       reg = deref(c, c->regs[PROGRAM_STATE_VAR][0], src->Index);
00718    else
00719 #endif
00720       reg = get_reg(c, src->File, src->Index);
00721 
00722    /* Convert 3-bit swizzle to 2-bit.
00723     */
00724    reg.dw1.bits.swizzle = BRW_SWIZZLE4(src->SwizzleX,
00725                                        src->SwizzleY,
00726                                        src->SwizzleZ,
00727                                        src->SwizzleW);
00728 
00729    /* Note this is ok for non-swizzle instructions:
00730     */
00731    reg.negate = src->Negate ? 1 : 0;
00732 
00733    return reg;
00734 }
00735 
00736 
00737 static struct brw_reg get_dst( struct brw_vs_compile *c,
00738                                const struct tgsi_dst_register *dst )
00739 {
00740    struct brw_reg reg = get_reg(c, dst->File, dst->Index);
00741 
00742    reg.dw1.bits.writemask = dst->WriteMask;
00743 
00744    return reg;
00745 }
00746 
00747 
00748 
00749 
00750 static void emit_swz( struct brw_vs_compile *c,
00751                       struct brw_reg dst,
00752                       struct tgsi_src_register src )
00753 {
00754    struct brw_compile *p = &c->func;
00755    unsigned zeros_mask = 0;
00756    unsigned ones_mask = 0;
00757    unsigned src_mask = 0;
00758    ubyte src_swz[4];
00759    boolean need_tmp = (src.Negate &&
00760                          dst.file != BRW_GENERAL_REGISTER_FILE);
00761    struct brw_reg tmp = dst;
00762    unsigned i;
00763 
00764    if (need_tmp)
00765       tmp = get_tmp(c);
00766 
00767    for (i = 0; i < 4; i++) {
00768       if (dst.dw1.bits.writemask & (1<<i)) {
00769          ubyte s = 0;
00770          switch(i) {
00771          case 0:
00772             s = src.SwizzleX;
00773             break;
00774             s = src.SwizzleY;
00775          case 1:
00776             break;
00777             s = src.SwizzleZ;
00778          case 2:
00779             break;
00780             s = src.SwizzleW;
00781          case 3:
00782             break;
00783          }
00784          switch (s) {
00785          case TGSI_SWIZZLE_X:
00786          case TGSI_SWIZZLE_Y:
00787          case TGSI_SWIZZLE_Z:
00788          case TGSI_SWIZZLE_W:
00789             src_mask |= 1<<i;
00790             src_swz[i] = s;
00791             break;
00792          case TGSI_EXTSWIZZLE_ZERO:
00793             zeros_mask |= 1<<i;
00794             break;
00795          case TGSI_EXTSWIZZLE_ONE:
00796             ones_mask |= 1<<i;
00797             break;
00798          }
00799       }
00800    }
00801 
00802    /* Do src first, in case dst aliases src:
00803     */
00804    if (src_mask) {
00805       struct brw_reg arg0;
00806 
00807 #if 0
00808       if (src.RelAddr)
00809          arg0 = deref(c, c->regs[PROGRAM_STATE_VAR][0], src.Index);
00810       else
00811 #endif
00812          arg0 = get_reg(c, src.File, src.Index);
00813 
00814       arg0 = brw_swizzle(arg0,
00815                          src_swz[0], src_swz[1],
00816                          src_swz[2], src_swz[3]);
00817 
00818       brw_MOV(p, brw_writemask(tmp, src_mask), arg0);
00819    }
00820 
00821    if (zeros_mask)
00822       brw_MOV(p, brw_writemask(tmp, zeros_mask), brw_imm_f(0));
00823 
00824    if (ones_mask)
00825       brw_MOV(p, brw_writemask(tmp, ones_mask), brw_imm_f(1));
00826 
00827    if (src.Negate)
00828       brw_MOV(p, brw_writemask(tmp, src.Negate), negate(tmp));
00829 
00830    if (need_tmp) {
00831       brw_MOV(p, dst, tmp);
00832       release_tmp(c, tmp);
00833    }
00834 }
00835 
00836 
00837 
00838 /* Post-vertex-program processing.  Send the results to the URB.
00839  */
00840 static void emit_vertex_write( struct brw_vs_compile *c, struct brw_prog_info *info)
00841 {
00842    struct brw_compile *p = &c->func;
00843    struct brw_reg m0 = brw_message_reg(0);
00844    struct brw_reg pos = c->regs[TGSI_FILE_OUTPUT][info->pos_idx];
00845    struct brw_reg ndc;
00846 
00847    if (c->key.copy_edgeflag) {
00848       brw_MOV(p,
00849               get_reg(c, TGSI_FILE_OUTPUT, info->result_edge_idx),
00850               get_reg(c, TGSI_FILE_INPUT, info->edge_flag_idx));
00851    }
00852 
00853 
00854    /* Build ndc coords?   TODO: Shortcircuit when w is known to be one.
00855     */
00856    if (!c->key.know_w_is_one) {
00857       ndc = get_tmp(c);
00858       emit_math1(c, BRW_MATH_FUNCTION_INV, ndc, brw_swizzle1(pos, 3), BRW_MATH_PRECISION_FULL);
00859       brw_MUL(p, brw_writemask(ndc, TGSI_WRITEMASK_XYZ), pos, ndc);
00860    }
00861    else {
00862       ndc = pos;
00863    }
00864 
00865    /* This includes the workaround for -ve rhw, so is no longer an
00866     * optional step:
00867     */
00868    if (info->writes_psize ||
00869        c->key.nr_userclip ||
00870        !c->key.know_w_is_one)
00871    {
00872       struct brw_reg header1 = retype(get_tmp(c), BRW_REGISTER_TYPE_UD);
00873       unsigned i;
00874 
00875       brw_MOV(p, header1, brw_imm_ud(0));
00876 
00877       brw_set_access_mode(p, BRW_ALIGN_16);
00878 
00879       if (info->writes_psize) {
00880          struct brw_reg psiz = c->regs[TGSI_FILE_OUTPUT][info->psize_idx];
00881          brw_MUL(p, brw_writemask(header1, TGSI_WRITEMASK_W),
00882                  brw_swizzle1(psiz, 0), brw_imm_f(1<<11));
00883          brw_AND(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1,
00884                  brw_imm_ud(0x7ff<<8));
00885       }
00886 
00887 
00888       for (i = 0; i < c->key.nr_userclip; i++) {
00889          brw_set_conditionalmod(p, BRW_CONDITIONAL_L);
00890          brw_DP4(p, brw_null_reg(), pos, c->userplane[i]);
00891          brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<i));
00892          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
00893       }
00894 
00895 
00896       /* i965 clipping workaround:
00897        * 1) Test for -ve rhw
00898        * 2) If set,
00899        *      set ndc = (0,0,0,0)
00900        *      set ucp[6] = 1
00901        *
00902        * Later, clipping will detect ucp[6] and ensure the primitive is
00903        * clipped against all fixed planes.
00904        */
00905       if (!c->key.know_w_is_one) {
00906          brw_CMP(p,
00907                  vec8(brw_null_reg()),
00908                  BRW_CONDITIONAL_L,
00909                  brw_swizzle1(ndc, 3),
00910                  brw_imm_f(0));
00911 
00912          brw_OR(p, brw_writemask(header1, TGSI_WRITEMASK_W), header1, brw_imm_ud(1<<6));
00913          brw_MOV(p, ndc, brw_imm_f(0));
00914          brw_set_predicate_control(p, BRW_PREDICATE_NONE);
00915       }
00916 
00917       brw_set_access_mode(p, BRW_ALIGN_1);      /* why? */
00918       brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), header1);
00919       brw_set_access_mode(p, BRW_ALIGN_16);
00920 
00921       release_tmp(c, header1);
00922    }
00923    else {
00924       brw_MOV(p, retype(brw_message_reg(1), BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
00925    }
00926 
00927 
00928    /* Emit the (interleaved) headers for the two vertices - an 8-reg
00929     * of zeros followed by two sets of NDC coordinates:
00930     */
00931    brw_set_access_mode(p, BRW_ALIGN_1);
00932    brw_MOV(p, offset(m0, 2), ndc);
00933    brw_MOV(p, offset(m0, 3), pos);
00934 
00935 
00936    brw_urb_WRITE(p,
00937                  brw_null_reg(), /* dest */
00938                  0,             /* starting mrf reg nr */
00939                  c->r0,         /* src */
00940                  0,             /* allocate */
00941                  1,             /* used */
00942                  c->nr_outputs + 3, /* msg len */
00943                  0,             /* response len */
00944                  1,             /* eot */
00945                  1,             /* writes complete */
00946                  0,             /* urb destination offset */
00947                  BRW_URB_SWIZZLE_INTERLEAVE);
00948 
00949 }
00950 
00951 static void
00952 post_vs_emit( struct brw_vs_compile *c, struct brw_instruction *end_inst )
00953 {
00954    struct tgsi_parse_context parse;
00955    const struct tgsi_token *tokens = c->vp->program.tokens;
00956    tgsi_parse_init(&parse, tokens);
00957    while (!tgsi_parse_end_of_tokens(&parse)) {
00958       tgsi_parse_token(&parse);
00959       if (parse.FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION) {
00960 #if 0
00961          struct brw_instruction *brw_inst1, *brw_inst2;
00962          const struct tgsi_full_instruction *inst1, *inst2;
00963          int offset;
00964          inst1 = &parse.FullToken.FullInstruction;
00965          brw_inst1 = inst1->Data;
00966          switch (inst1->Opcode) {
00967          case TGSI_OPCODE_CAL:
00968          case TGSI_OPCODE_BRA:
00969             target_insn = inst1->BranchTarget;
00970             inst2 = &c->vp->program.Base.Instructions[target_insn];
00971             brw_inst2 = inst2->Data;
00972             offset = brw_inst2 - brw_inst1;
00973             brw_set_src1(brw_inst1, brw_imm_d(offset*16));
00974             break;
00975          case TGSI_OPCODE_END:
00976             offset = end_inst - brw_inst1;
00977             brw_set_src1(brw_inst1, brw_imm_d(offset*16));
00978             break;
00979          default:
00980             break;
00981          }
00982 #endif
00983       }
00984    }
00985    tgsi_parse_free(&parse);
00986 }
00987 
00988 static void process_declaration(const struct tgsi_full_declaration *decl,
00989                                 struct brw_prog_info *info)
00990 {
00991    int first = decl->DeclarationRange.First;
00992    int last = decl->DeclarationRange.Last;
00993    
00994    switch(decl->Declaration.File) {
00995    case TGSI_FILE_CONSTANT: 
00996       info->num_consts += last - first + 1;
00997       break;
00998    case TGSI_FILE_INPUT: {
00999    }
01000       break;
01001    case TGSI_FILE_OUTPUT: {
01002       assert(last == first);    /* for now */
01003       if (decl->Declaration.Semantic) {
01004          switch (decl->Semantic.SemanticName) {
01005          case TGSI_SEMANTIC_POSITION: {
01006             info->pos_idx = first;
01007          }
01008             break;
01009          case TGSI_SEMANTIC_COLOR:
01010             break;
01011          case TGSI_SEMANTIC_BCOLOR:
01012             break;
01013          case TGSI_SEMANTIC_FOG:
01014             break;
01015          case TGSI_SEMANTIC_PSIZE: {
01016             info->writes_psize = TRUE;
01017             info->psize_idx = first;
01018          }
01019             break;
01020          case TGSI_SEMANTIC_GENERIC:
01021             break;
01022          }
01023       }
01024    }
01025       break;
01026    case TGSI_FILE_TEMPORARY: {
01027       info->num_temps += (last - first) + 1;
01028    }
01029       break;
01030    case TGSI_FILE_SAMPLER: {
01031    }
01032       break;
01033    case TGSI_FILE_ADDRESS: {
01034       info->num_addrs += (last - first) + 1;
01035    }
01036       break;
01037    case TGSI_FILE_IMMEDIATE: {
01038    }
01039       break;
01040    case TGSI_FILE_NULL: {
01041    }
01042       break;
01043    }
01044 }
01045 
01046 static void process_instruction(struct brw_vs_compile *c,
01047                                 struct tgsi_full_instruction *inst,
01048                                 struct brw_prog_info *info)
01049 {
01050    struct brw_reg args[3], dst;
01051    struct brw_compile *p = &c->func;
01052    /*struct brw_indirect stack_index = brw_indirect(0, 0);*/
01053    unsigned i;
01054    unsigned index;
01055    unsigned file;
01056    /*FIXME: might not be the only one*/
01057    const struct tgsi_dst_register *dst_reg = &inst->FullDstRegisters[0].DstRegister;
01058    /*
01059    struct brw_instruction *if_inst[MAX_IFSN];
01060    unsigned insn, if_insn = 0;
01061    */
01062 
01063    for (i = 0; i < 3; i++) {
01064       struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i];
01065       index = src->SrcRegister.Index;
01066       file = src->SrcRegister.File;
01067       if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src)
01068          args[i] = c->output_regs[index].reg;
01069       else
01070          args[i] = get_arg(c, &src->SrcRegister);
01071    }
01072 
01073    /* Get dest regs.  Note that it is possible for a reg to be both
01074     * dst and arg, given the static allocation of registers.  So
01075     * care needs to be taken emitting multi-operation instructions.
01076     */
01077    index = dst_reg->Index;
01078    file = dst_reg->File;
01079    if (file == TGSI_FILE_OUTPUT && c->output_regs[index].used_in_src)
01080       dst = c->output_regs[index].reg;
01081    else
01082       dst = get_dst(c, dst_reg);
01083 
01084    switch (inst->Instruction.Opcode) {
01085    case TGSI_OPCODE_ABS:
01086       brw_MOV(p, dst, brw_abs(args[0]));
01087       break;
01088    case TGSI_OPCODE_ADD:
01089       brw_ADD(p, dst, args[0], args[1]);
01090       break;
01091    case TGSI_OPCODE_DP3:
01092       brw_DP3(p, dst, args[0], args[1]);
01093       break;
01094    case TGSI_OPCODE_DP4:
01095       brw_DP4(p, dst, args[0], args[1]);
01096       break;
01097    case TGSI_OPCODE_DPH:
01098       brw_DPH(p, dst, args[0], args[1]);
01099       break;
01100    case TGSI_OPCODE_DST:
01101       unalias2(c, dst, args[0], args[1], emit_dst_noalias);
01102       break;
01103    case TGSI_OPCODE_EXP:
01104       unalias1(c, dst, args[0], emit_exp_noalias);
01105       break;
01106    case TGSI_OPCODE_EX2:
01107       emit_math1(c, BRW_MATH_FUNCTION_EXP, dst, args[0], BRW_MATH_PRECISION_FULL);
01108       break;
01109    case TGSI_OPCODE_ARL:
01110       emit_arl(c, dst, args[0]);
01111       break;
01112    case TGSI_OPCODE_FLR:
01113       brw_RNDD(p, dst, args[0]);
01114       break;
01115    case TGSI_OPCODE_FRC:
01116       brw_FRC(p, dst, args[0]);
01117       break;
01118    case TGSI_OPCODE_LOG:
01119       unalias1(c, dst, args[0], emit_log_noalias);
01120       break;
01121    case TGSI_OPCODE_LG2:
01122       emit_math1(c, BRW_MATH_FUNCTION_LOG, dst, args[0], BRW_MATH_PRECISION_FULL);
01123       break;
01124    case TGSI_OPCODE_LIT:
01125       unalias1(c, dst, args[0], emit_lit_noalias);
01126       break;
01127    case TGSI_OPCODE_MAD:
01128       brw_MOV(p, brw_acc_reg(), args[2]);
01129       brw_MAC(p, dst, args[0], args[1]);
01130       break;
01131    case TGSI_OPCODE_MAX:
01132       emit_max(p, dst, args[0], args[1]);
01133       break;
01134    case TGSI_OPCODE_MIN:
01135       emit_min(p, dst, args[0], args[1]);
01136       break;
01137    case TGSI_OPCODE_MOV:
01138    case TGSI_OPCODE_SWZ:
01139 #if 0
01140       /* The args[0] value can't be used here as it won't have
01141        * correctly encoded the full swizzle:
01142        */
01143       emit_swz(c, dst, inst->SrcReg[0] );
01144 #endif
01145       brw_MOV(p, dst, args[0]);
01146       break;
01147    case TGSI_OPCODE_MUL:
01148       brw_MUL(p, dst, args[0], args[1]);
01149       break;
01150    case TGSI_OPCODE_POW:
01151       emit_math2(c, BRW_MATH_FUNCTION_POW, dst, args[0], args[1], BRW_MATH_PRECISION_FULL);
01152       break;
01153    case TGSI_OPCODE_RCP:
01154       emit_math1(c, BRW_MATH_FUNCTION_INV, dst, args[0], BRW_MATH_PRECISION_FULL);
01155       break;
01156    case TGSI_OPCODE_RSQ:
01157       emit_math1(c, BRW_MATH_FUNCTION_RSQ, dst, args[0], BRW_MATH_PRECISION_FULL);
01158       break;
01159 
01160    case TGSI_OPCODE_SEQ:
01161       emit_seq(p, dst, args[0], args[1]);
01162       break;
01163    case TGSI_OPCODE_SNE:
01164       emit_sne(p, dst, args[0], args[1]);
01165       break;
01166    case TGSI_OPCODE_SGE:
01167       emit_sge(p, dst, args[0], args[1]);
01168       break;
01169    case TGSI_OPCODE_SGT:
01170       emit_sgt(p, dst, args[0], args[1]);
01171       break;
01172    case TGSI_OPCODE_SLT:
01173       emit_slt(p, dst, args[0], args[1]);
01174       break;
01175    case TGSI_OPCODE_SLE:
01176       emit_sle(p, dst, args[0], args[1]);
01177       break;
01178    case TGSI_OPCODE_SUB:
01179       brw_ADD(p, dst, args[0], negate(args[1]));
01180       break;
01181    case TGSI_OPCODE_XPD:
01182       emit_xpd(p, dst, args[0], args[1]);
01183       break;
01184 #if 0
01185    case TGSI_OPCODE_IF:
01186       assert(if_insn < MAX_IFSN);
01187       if_inst[if_insn++] = brw_IF(p, BRW_EXECUTE_8);
01188       break;
01189    case TGSI_OPCODE_ELSE:
01190       if_inst[if_insn-1] = brw_ELSE(p, if_inst[if_insn-1]);
01191       break;
01192    case TGSI_OPCODE_ENDIF:
01193       assert(if_insn > 0);
01194       brw_ENDIF(p, if_inst[--if_insn]);
01195       break;
01196    case TGSI_OPCODE_BRA:
01197       brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
01198       brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
01199       brw_set_predicate_control_flag_value(p, 0xff);
01200       break;
01201    case TGSI_OPCODE_CAL:
01202       brw_set_access_mode(p, BRW_ALIGN_1);
01203       brw_ADD(p, deref_1uw(stack_index, 0), brw_ip_reg(), brw_imm_d(3*16));
01204       brw_set_access_mode(p, BRW_ALIGN_16);
01205       brw_ADD(p, get_addr_reg(stack_index),
01206               get_addr_reg(stack_index), brw_imm_d(4));
01207       inst->Data = &p->store[p->nr_insn];
01208       brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
01209       break;
01210 #endif
01211    case TGSI_OPCODE_RET:
01212 #if 0
01213       brw_ADD(p, get_addr_reg(stack_index),
01214               get_addr_reg(stack_index), brw_imm_d(-4));
01215       brw_set_access_mode(p, BRW_ALIGN_1);
01216       brw_MOV(p, brw_ip_reg(), deref_1uw(stack_index, 0));
01217       brw_set_access_mode(p, BRW_ALIGN_16);
01218 #else
01219       /*brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));*/
01220 #endif
01221       break;
01222    case TGSI_OPCODE_END:
01223       brw_ADD(p, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
01224       break;
01225    case TGSI_OPCODE_BGNSUB:
01226    case TGSI_OPCODE_ENDSUB:
01227       break;
01228    default:
01229       debug_printf("Unsupport opcode %d in vertex shader\n", inst->Instruction.Opcode);
01230       break;
01231    }
01232 
01233    if (dst_reg->File == TGSI_FILE_OUTPUT
01234        && dst_reg->Index != info->pos_idx
01235        && c->output_regs[dst_reg->Index].used_in_src)
01236       brw_MOV(p, get_dst(c, dst_reg), dst);
01237 
01238    release_tmps(c);
01239 }
01240 
01241 /* Emit the fragment program instructions here.
01242  */
01243 void brw_vs_emit(struct brw_vs_compile *c)
01244 {
01245 #define MAX_IFSN 32
01246    struct brw_compile *p = &c->func;
01247    struct brw_instruction *end_inst;
01248    struct tgsi_parse_context parse;
01249    struct brw_indirect stack_index = brw_indirect(0, 0);
01250    const struct tgsi_token *tokens = c->vp->program.tokens;
01251    struct brw_prog_info prog_info;
01252    unsigned allocated_registers = 0;
01253    memset(&prog_info, 0, sizeof(struct brw_prog_info));
01254 
01255    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
01256    brw_set_access_mode(p, BRW_ALIGN_16);
01257 
01258    tgsi_parse_init(&parse, tokens);
01259    /* Message registers can't be read, so copy the output into GRF register
01260       if they are used in source registers */
01261    while (!tgsi_parse_end_of_tokens(&parse)) {
01262       tgsi_parse_token(&parse);
01263       unsigned i;
01264       switch (parse.FullToken.Token.Type) {
01265       case TGSI_TOKEN_TYPE_INSTRUCTION: {
01266          const struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
01267          for (i = 0; i < 3; ++i) {
01268             const struct tgsi_src_register *src = &inst->FullSrcRegisters[i].SrcRegister;
01269             unsigned index = src->Index;
01270             unsigned file = src->File;
01271             if (file == TGSI_FILE_OUTPUT)
01272                c->output_regs[index].used_in_src = TRUE;
01273          }
01274       }
01275          break;
01276       default:
01277          /* nothing */
01278          break;
01279       }
01280    }
01281    tgsi_parse_free(&parse);
01282 
01283    tgsi_parse_init(&parse, tokens);
01284 
01285    while (!tgsi_parse_end_of_tokens(&parse)) {
01286       tgsi_parse_token(&parse);
01287 
01288       switch (parse.FullToken.Token.Type) {
01289       case TGSI_TOKEN_TYPE_DECLARATION: {
01290          struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
01291          process_declaration(decl, &prog_info);
01292       }
01293          break;
01294       case TGSI_TOKEN_TYPE_IMMEDIATE: {
01295          struct tgsi_full_immediate *imm = &parse.FullToken.FullImmediate;
01296          /*assert(imm->Immediate.Size == 4);*/
01297          c->prog_data.imm_buf[c->prog_data.num_imm][0] = imm->u.ImmediateFloat32[0].Float;
01298          c->prog_data.imm_buf[c->prog_data.num_imm][1] = imm->u.ImmediateFloat32[1].Float;
01299          c->prog_data.imm_buf[c->prog_data.num_imm][2] = imm->u.ImmediateFloat32[2].Float;
01300          c->prog_data.imm_buf[c->prog_data.num_imm][3] = imm->u.ImmediateFloat32[3].Float;
01301          c->prog_data.num_imm++;
01302       }
01303          break;
01304       case TGSI_TOKEN_TYPE_INSTRUCTION: {
01305          struct tgsi_full_instruction *inst = &parse.FullToken.FullInstruction;
01306          if (!allocated_registers) {
01307             /* first instruction (declerations finished).
01308              * now that we know what vars are being used allocate
01309              * registers for them.*/
01310             c->prog_data.num_consts = prog_info.num_consts;
01311             c->prog_data.max_const = prog_info.num_consts + c->prog_data.num_imm;
01312             brw_vs_alloc_regs(c, &prog_info);
01313 
01314             brw_set_access_mode(p, BRW_ALIGN_1);
01315             brw_MOV(p, get_addr_reg(stack_index), brw_address(c->stack));
01316             brw_set_access_mode(p, BRW_ALIGN_16);
01317             allocated_registers = 1;
01318          }
01319          process_instruction(c, inst, &prog_info);
01320       }
01321          break;
01322       }
01323    }
01324 
01325    end_inst = &p->store[p->nr_insn];
01326    emit_vertex_write(c, &prog_info);
01327    post_vs_emit(c, end_inst);
01328    tgsi_parse_free(&parse);
01329 
01330 }

Generated on Tue Sep 29 06:25:17 2009 for Gallium3D by  doxygen 1.5.4