brw_eu_emit.c

Go to the documentation of this file.
00001 /*
00002  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
00003  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
00004  develop this 3D driver.
00005 
00006  Permission is hereby granted, free of charge, to any person obtaining
00007  a copy of this software and associated documentation files (the
00008  "Software"), to deal in the Software without restriction, including
00009  without limitation the rights to use, copy, modify, merge, publish,
00010  distribute, sublicense, and/or sell copies of the Software, and to
00011  permit persons to whom the Software is furnished to do so, subject to
00012  the following conditions:
00013 
00014  The above copyright notice and this permission notice (including the
00015  next paragraph) shall be included in all copies or substantial
00016  portions of the Software.
00017 
00018  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00019  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
00021  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
00022  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
00023  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
00024  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00025 
00026  **********************************************************************/
00027  /*
00028   * Authors:
00029   *   Keith Whitwell <keith@tungstengraphics.com>
00030   */
00031 
00032 
00033 #include "brw_context.h"
00034 #include "brw_defines.h"
00035 #include "brw_eu.h"
00036 
00037 
00038 
00039 
00040 /***********************************************************************
00041  * Internal helper for constructing instructions
00042  */
00043 
00044 static void guess_execution_size( struct brw_instruction *insn,
00045                                   struct brw_reg reg )
00046 {
00047    if (reg.width == BRW_WIDTH_8 &&
00048        insn->header.compression_control == BRW_COMPRESSION_COMPRESSED)
00049       insn->header.execution_size = BRW_EXECUTE_16;
00050    else
00051       insn->header.execution_size = reg.width;  /* note - definitions are compatible */
00052 }
00053 
00054 
00055 static void brw_set_dest( struct brw_instruction *insn,
00056                           struct brw_reg dest )
00057 {
00058    insn->bits1.da1.dest_reg_file = dest.file;
00059    insn->bits1.da1.dest_reg_type = dest.type;
00060    insn->bits1.da1.dest_address_mode = dest.address_mode;
00061 
00062    if (dest.address_mode == BRW_ADDRESS_DIRECT) {
00063       insn->bits1.da1.dest_reg_nr = dest.nr;
00064 
00065       if (insn->header.access_mode == BRW_ALIGN_1) {
00066          insn->bits1.da1.dest_subreg_nr = dest.subnr;
00067          insn->bits1.da1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
00068       }
00069       else {
00070          insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
00071          insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
00072       }
00073    }
00074    else {
00075       insn->bits1.ia1.dest_subreg_nr = dest.subnr;
00076 
00077       /* These are different sizes in align1 vs align16:
00078        */
00079       if (insn->header.access_mode == BRW_ALIGN_1) {
00080          insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
00081          insn->bits1.ia1.dest_horiz_stride = BRW_HORIZONTAL_STRIDE_1;
00082       }
00083       else {
00084          insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
00085       }
00086    }
00087 
00088    /* NEW: Set the execution size based on dest.width and
00089     * insn->compression_control:
00090     */
00091    guess_execution_size(insn, dest);
00092 }
00093 
00094 static void brw_set_src0( struct brw_instruction *insn,
00095                       struct brw_reg reg )
00096 {
00097    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
00098 
00099    insn->bits1.da1.src0_reg_file = reg.file;
00100    insn->bits1.da1.src0_reg_type = reg.type;
00101    insn->bits2.da1.src0_abs = reg.abs;
00102    insn->bits2.da1.src0_negate = reg.negate;
00103    insn->bits2.da1.src0_address_mode = reg.address_mode;
00104 
00105    if (reg.file == BRW_IMMEDIATE_VALUE) {
00106       insn->bits3.ud = reg.dw1.ud;
00107 
00108       /* Required to set some fields in src1 as well:
00109        */
00110       insn->bits1.da1.src1_reg_file = 0; /* arf */
00111       insn->bits1.da1.src1_reg_type = reg.type;
00112    }
00113    else
00114    {
00115       if (reg.address_mode == BRW_ADDRESS_DIRECT) {
00116          if (insn->header.access_mode == BRW_ALIGN_1) {
00117             insn->bits2.da1.src0_subreg_nr = reg.subnr;
00118             insn->bits2.da1.src0_reg_nr = reg.nr;
00119          }
00120          else {
00121             insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
00122             insn->bits2.da16.src0_reg_nr = reg.nr;
00123          }
00124       }
00125       else {
00126          insn->bits2.ia1.src0_subreg_nr = reg.subnr;
00127 
00128          if (insn->header.access_mode == BRW_ALIGN_1) {
00129             insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
00130          }
00131          else {
00132             insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
00133          }
00134       }
00135 
00136       if (insn->header.access_mode == BRW_ALIGN_1) {
00137          if (reg.width == BRW_WIDTH_1 &&
00138              insn->header.execution_size == BRW_EXECUTE_1) {
00139             insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
00140             insn->bits2.da1.src0_width = BRW_WIDTH_1;
00141             insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
00142          }
00143          else {
00144             insn->bits2.da1.src0_horiz_stride = reg.hstride;
00145             insn->bits2.da1.src0_width = reg.width;
00146             insn->bits2.da1.src0_vert_stride = reg.vstride;
00147          }
00148       }
00149       else {
00150          insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
00151          insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
00152          insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
00153          insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
00154 
00155          /* This is an oddity of the fact we're using the same
00156           * descriptions for registers in align_16 as align_1:
00157           */
00158          if (reg.vstride == BRW_VERTICAL_STRIDE_8)
00159             insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
00160          else
00161             insn->bits2.da16.src0_vert_stride = reg.vstride;
00162       }
00163    }
00164 }
00165 
00166 
00167 void brw_set_src1( struct brw_instruction *insn,
00168                           struct brw_reg reg )
00169 {
00170    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
00171 
00172    insn->bits1.da1.src1_reg_file = reg.file;
00173    insn->bits1.da1.src1_reg_type = reg.type;
00174    insn->bits3.da1.src1_abs = reg.abs;
00175    insn->bits3.da1.src1_negate = reg.negate;
00176 
00177    /* Only src1 can be immediate in two-argument instructions.
00178     */
00179    assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
00180 
00181    if (reg.file == BRW_IMMEDIATE_VALUE) {
00182       insn->bits3.ud = reg.dw1.ud;
00183    }
00184    else {
00185       /* This is a hardware restriction, which may or may not be lifted
00186        * in the future:
00187        */
00188       assert (reg.address_mode == BRW_ADDRESS_DIRECT);
00189       //assert (reg.file == BRW_GENERAL_REGISTER_FILE);
00190 
00191       if (insn->header.access_mode == BRW_ALIGN_1) {
00192          insn->bits3.da1.src1_subreg_nr = reg.subnr;
00193          insn->bits3.da1.src1_reg_nr = reg.nr;
00194       }
00195       else {
00196          insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
00197          insn->bits3.da16.src1_reg_nr = reg.nr;
00198       }
00199 
00200       if (insn->header.access_mode == BRW_ALIGN_1) {
00201          if (reg.width == BRW_WIDTH_1 &&
00202              insn->header.execution_size == BRW_EXECUTE_1) {
00203             insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
00204             insn->bits3.da1.src1_width = BRW_WIDTH_1;
00205             insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
00206          }
00207          else {
00208             insn->bits3.da1.src1_horiz_stride = reg.hstride;
00209             insn->bits3.da1.src1_width = reg.width;
00210             insn->bits3.da1.src1_vert_stride = reg.vstride;
00211          }
00212       }
00213       else {
00214          insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
00215          insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
00216          insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
00217          insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
00218 
00219          /* This is an oddity of the fact we're using the same
00220           * descriptions for registers in align_16 as align_1:
00221           */
00222          if (reg.vstride == BRW_VERTICAL_STRIDE_8)
00223             insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
00224          else
00225             insn->bits3.da16.src1_vert_stride = reg.vstride;
00226       }
00227    }
00228 }
00229 
00230 
00231 
00232 static void brw_set_math_message( struct brw_instruction *insn,
00233                                   unsigned msg_length,
00234                                   unsigned response_length,
00235                                   unsigned function,
00236                                   unsigned integer_type,
00237                                   boolean low_precision,
00238                                   boolean saturate,
00239                                   unsigned dataType )
00240 {
00241    brw_set_src1(insn, brw_imm_d(0));
00242 
00243    insn->bits3.math.function = function;
00244    insn->bits3.math.int_type = integer_type;
00245    insn->bits3.math.precision = low_precision;
00246    insn->bits3.math.saturate = saturate;
00247    insn->bits3.math.data_type = dataType;
00248    insn->bits3.math.response_length = response_length;
00249    insn->bits3.math.msg_length = msg_length;
00250    insn->bits3.math.msg_target = BRW_MESSAGE_TARGET_MATH;
00251    insn->bits3.math.end_of_thread = 0;
00252 }
00253 
00254 static void brw_set_urb_message( struct brw_instruction *insn,
00255                                  boolean allocate,
00256                                  boolean used,
00257                                  unsigned msg_length,
00258                                  unsigned response_length,
00259                                  boolean end_of_thread,
00260                                  boolean complete,
00261                                  unsigned offset,
00262                                  unsigned swizzle_control )
00263 {
00264    brw_set_src1(insn, brw_imm_d(0));
00265 
00266    insn->bits3.urb.opcode = 0;  /* ? */
00267    insn->bits3.urb.offset = offset;
00268    insn->bits3.urb.swizzle_control = swizzle_control;
00269    insn->bits3.urb.allocate = allocate;
00270    insn->bits3.urb.used = used; /* ? */
00271    insn->bits3.urb.complete = complete;
00272    insn->bits3.urb.response_length = response_length;
00273    insn->bits3.urb.msg_length = msg_length;
00274    insn->bits3.urb.msg_target = BRW_MESSAGE_TARGET_URB;
00275    insn->bits3.urb.end_of_thread = end_of_thread;
00276 }
00277 
00278 static void brw_set_dp_write_message( struct brw_instruction *insn,
00279                                       unsigned binding_table_index,
00280                                       unsigned msg_control,
00281                                       unsigned msg_type,
00282                                       unsigned msg_length,
00283                                       unsigned pixel_scoreboard_clear,
00284                                       unsigned response_length,
00285                                       unsigned end_of_thread )
00286 {
00287    brw_set_src1(insn, brw_imm_d(0));
00288 
00289    insn->bits3.dp_write.binding_table_index = binding_table_index;
00290    insn->bits3.dp_write.msg_control = msg_control;
00291    insn->bits3.dp_write.pixel_scoreboard_clear = pixel_scoreboard_clear;
00292    insn->bits3.dp_write.msg_type = msg_type;
00293    insn->bits3.dp_write.send_commit_msg = 0;
00294    insn->bits3.dp_write.response_length = response_length;
00295    insn->bits3.dp_write.msg_length = msg_length;
00296    insn->bits3.dp_write.msg_target = BRW_MESSAGE_TARGET_DATAPORT_WRITE;
00297    insn->bits3.urb.end_of_thread = end_of_thread;
00298 }
00299 
00300 static void brw_set_dp_read_message( struct brw_instruction *insn,
00301                                       unsigned binding_table_index,
00302                                       unsigned msg_control,
00303                                       unsigned msg_type,
00304                                       unsigned target_cache,
00305                                       unsigned msg_length,
00306                                       unsigned response_length,
00307                                       unsigned end_of_thread )
00308 {
00309    brw_set_src1(insn, brw_imm_d(0));
00310 
00311    insn->bits3.dp_read.binding_table_index = binding_table_index;
00312    insn->bits3.dp_read.msg_control = msg_control;
00313    insn->bits3.dp_read.msg_type = msg_type;
00314    insn->bits3.dp_read.target_cache = target_cache;
00315    insn->bits3.dp_read.response_length = response_length;
00316    insn->bits3.dp_read.msg_length = msg_length;
00317    insn->bits3.dp_read.msg_target = BRW_MESSAGE_TARGET_DATAPORT_READ;
00318    insn->bits3.dp_read.end_of_thread = end_of_thread;
00319 }
00320 
00321 static void brw_set_sampler_message( struct brw_instruction *insn,
00322                                      unsigned binding_table_index,
00323                                      unsigned sampler,
00324                                      unsigned msg_type,
00325                                      unsigned response_length,
00326                                      unsigned msg_length,
00327                                      boolean eot)
00328 {
00329    brw_set_src1(insn, brw_imm_d(0));
00330 
00331    insn->bits3.sampler.binding_table_index = binding_table_index;
00332    insn->bits3.sampler.sampler = sampler;
00333    insn->bits3.sampler.msg_type = msg_type;
00334    insn->bits3.sampler.return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
00335    insn->bits3.sampler.response_length = response_length;
00336    insn->bits3.sampler.msg_length = msg_length;
00337    insn->bits3.sampler.end_of_thread = eot;
00338    insn->bits3.sampler.msg_target = BRW_MESSAGE_TARGET_SAMPLER;
00339 }
00340 
00341 
00342 
00343 static struct brw_instruction *next_insn( struct brw_compile *p,
00344                                           unsigned opcode )
00345 {
00346    struct brw_instruction *insn;
00347 
00348    assert(p->nr_insn + 1 < BRW_EU_MAX_INSN);
00349 
00350    insn = &p->store[p->nr_insn++];
00351    memcpy(insn, p->current, sizeof(*insn));
00352 
00353    /* Reset this one-shot flag:
00354     */
00355 
00356    if (p->current->header.destreg__conditonalmod) {
00357       p->current->header.destreg__conditonalmod = 0;
00358       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
00359    }
00360 
00361    insn->header.opcode = opcode;
00362    return insn;
00363 }
00364 
00365 
00366 struct brw_instruction *brw_alu1( struct brw_compile *p,
00367                                   unsigned opcode,
00368                                   struct brw_reg dest,
00369                                   struct brw_reg src )
00370 {
00371    struct brw_instruction *insn = next_insn(p, opcode);
00372    brw_set_dest(insn, dest);
00373    brw_set_src0(insn, src);
00374    return insn;
00375 }
00376 
00377 struct brw_instruction *brw_alu2(struct brw_compile *p,
00378                                  unsigned opcode,
00379                                  struct brw_reg dest,
00380                                  struct brw_reg src0,
00381                                  struct brw_reg src1 )
00382 {
00383    struct brw_instruction *insn = next_insn(p, opcode);
00384    brw_set_dest(insn, dest);
00385    brw_set_src0(insn, src0);
00386    brw_set_src1(insn, src1);
00387    return insn;
00388 }
00389 
00390 
00391 /***********************************************************************
00392  * Convenience routines.
00393  */
00394 #define ALU1(OP)                                        \
00395 struct brw_instruction *brw_##OP(struct brw_compile *p,                 \
00396               struct brw_reg dest,                      \
00397               struct brw_reg src0)                      \
00398 {                                                       \
00399    return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);     \
00400 }
00401 
00402 #define ALU2(OP)                                        \
00403 struct brw_instruction *brw_##OP(struct brw_compile *p,                 \
00404               struct brw_reg dest,                      \
00405               struct brw_reg src0,                      \
00406               struct brw_reg src1)                      \
00407 {                                                       \
00408    return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);       \
00409 }
00410 
00411 
00412 ALU1(MOV)
00413 ALU2(SEL)
00414 ALU1(NOT)
00415 ALU2(AND)
00416 ALU2(OR)
00417 ALU2(XOR)
00418 ALU2(SHR)
00419 ALU2(SHL)
00420 ALU2(RSR)
00421 ALU2(RSL)
00422 ALU2(ASR)
00423 ALU2(ADD)
00424 ALU2(MUL)
00425 ALU1(FRC)
00426 ALU1(RNDD)
00427 ALU2(MAC)
00428 ALU2(MACH)
00429 ALU1(LZD)
00430 ALU2(DP4)
00431 ALU2(DPH)
00432 ALU2(DP3)
00433 ALU2(DP2)
00434 ALU2(LINE)
00435 
00436 
00437 
00438 
00439 void brw_NOP(struct brw_compile *p)
00440 {
00441    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);
00442    brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
00443    brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
00444    brw_set_src1(insn, brw_imm_ud(0x0));
00445 }
00446 
00447 
00448 
00449 
00450 
00451 /***********************************************************************
00452  * Comparisons, if/else/endif
00453  */
00454 
00455 struct brw_instruction *brw_JMPI(struct brw_compile *p,
00456               struct brw_reg dest,
00457               struct brw_reg src0,
00458               struct brw_reg src1)
00459 {
00460    struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
00461 
00462    p->current->header.predicate_control = BRW_PREDICATE_NONE;
00463 
00464    return insn;
00465 }
00466 
00467 /* EU takes the value from the flag register and pushes it onto some
00468  * sort of a stack (presumably merging with any flag value already on
00469  * the stack).  Within an if block, the flags at the top of the stack
00470  * control execution on each channel of the unit, eg. on each of the
00471  * 16 pixel values in our wm programs.
00472  *
00473  * When the matching 'else' instruction is reached (presumably by
00474  * countdown of the instruction count patched in by our ELSE/ENDIF
00475  * functions), the relevent flags are inverted.
00476  *
00477  * When the matching 'endif' instruction is reached, the flags are
00478  * popped off.  If the stack is now empty, normal execution resumes.
00479  *
00480  * No attempt is made to deal with stack overflow (14 elements?).
00481  */
00482 struct brw_instruction *brw_IF(struct brw_compile *p, unsigned execute_size)
00483 {
00484    struct brw_instruction *insn;
00485 
00486    if (p->single_program_flow) {
00487       assert(execute_size == BRW_EXECUTE_1);
00488 
00489       insn = next_insn(p, BRW_OPCODE_ADD);
00490       insn->header.predicate_inverse = 1;
00491    } else {
00492       insn = next_insn(p, BRW_OPCODE_IF);
00493    }
00494 
00495    /* Override the defaults for this instruction:
00496     */
00497    brw_set_dest(insn, brw_ip_reg());
00498    brw_set_src0(insn, brw_ip_reg());
00499    brw_set_src1(insn, brw_imm_d(0x0));
00500 
00501    insn->header.execution_size = execute_size;
00502    insn->header.compression_control = BRW_COMPRESSION_NONE;
00503    insn->header.predicate_control = BRW_PREDICATE_NORMAL;
00504    insn->header.mask_control = BRW_MASK_ENABLE;
00505 
00506    p->current->header.predicate_control = BRW_PREDICATE_NONE;
00507 
00508    return insn;
00509 }
00510 
00511 
00512 struct brw_instruction *brw_ELSE(struct brw_compile *p,
00513                                  struct brw_instruction *if_insn)
00514 {
00515    struct brw_instruction *insn;
00516 
00517    if (p->single_program_flow) {
00518       insn = next_insn(p, BRW_OPCODE_ADD);
00519    } else {
00520       insn = next_insn(p, BRW_OPCODE_ELSE);
00521    }
00522 
00523    brw_set_dest(insn, brw_ip_reg());
00524    brw_set_src0(insn, brw_ip_reg());
00525    brw_set_src1(insn, brw_imm_d(0x0));
00526 
00527    insn->header.compression_control = BRW_COMPRESSION_NONE;
00528    insn->header.execution_size = if_insn->header.execution_size;
00529    insn->header.mask_control = BRW_MASK_ENABLE;
00530 
00531    /* Patch the if instruction to point at this instruction.
00532     */
00533    if (p->single_program_flow) {
00534       assert(if_insn->header.opcode == BRW_OPCODE_ADD);
00535 
00536       if_insn->bits3.ud = (insn - if_insn + 1) * 16;
00537    } else {
00538       assert(if_insn->header.opcode == BRW_OPCODE_IF);
00539 
00540       if_insn->bits3.if_else.jump_count = insn - if_insn;
00541       if_insn->bits3.if_else.pop_count = 1;
00542       if_insn->bits3.if_else.pad0 = 0;
00543    }
00544 
00545    return insn;
00546 }
00547 
00548 void brw_ENDIF(struct brw_compile *p,
00549                struct brw_instruction *patch_insn)
00550 {
00551    if (p->single_program_flow) {
00552       /* In single program flow mode, there's no need to execute an ENDIF,
00553        * since we don't need to do any stack operations, and if we're executing
00554        * currently, we want to just continue executing.
00555        */
00556       struct brw_instruction *next = &p->store[p->nr_insn];
00557 
00558       assert(patch_insn->header.opcode == BRW_OPCODE_ADD);
00559 
00560       patch_insn->bits3.ud = (next - patch_insn) * 16;
00561    } else {
00562       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_ENDIF);
00563 
00564       brw_set_dest(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
00565       brw_set_src0(insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
00566       brw_set_src1(insn, brw_imm_d(0x0));
00567 
00568       insn->header.compression_control = BRW_COMPRESSION_NONE;
00569       insn->header.execution_size = patch_insn->header.execution_size;
00570       insn->header.mask_control = BRW_MASK_ENABLE;
00571 
00572       assert(patch_insn->bits3.if_else.jump_count == 0);
00573 
00574       /* Patch the if or else instructions to point at this or the next
00575        * instruction respectively.
00576        */
00577       if (patch_insn->header.opcode == BRW_OPCODE_IF) {
00578          /* Automagically turn it into an IFF:
00579           */
00580          patch_insn->header.opcode = BRW_OPCODE_IFF;
00581          patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
00582          patch_insn->bits3.if_else.pop_count = 0;
00583          patch_insn->bits3.if_else.pad0 = 0;
00584       } else if (patch_insn->header.opcode == BRW_OPCODE_ELSE) {
00585          patch_insn->bits3.if_else.jump_count = insn - patch_insn + 1;
00586          patch_insn->bits3.if_else.pop_count = 1;
00587          patch_insn->bits3.if_else.pad0 = 0;
00588       } else {
00589          assert(0);
00590       }
00591 
00592       /* Also pop item off the stack in the endif instruction:
00593        */
00594       insn->bits3.if_else.jump_count = 0;
00595       insn->bits3.if_else.pop_count = 1;
00596       insn->bits3.if_else.pad0 = 0;
00597    }
00598 }
00599 
00600 struct brw_instruction *brw_BREAK(struct brw_compile *p)
00601 {
00602    struct brw_instruction *insn;
00603    insn = next_insn(p, BRW_OPCODE_BREAK);
00604    brw_set_dest(insn, brw_ip_reg());
00605    brw_set_src0(insn, brw_ip_reg());
00606    brw_set_src1(insn, brw_imm_d(0x0));
00607    insn->header.compression_control = BRW_COMPRESSION_NONE;
00608    insn->header.execution_size = BRW_EXECUTE_8;
00609    insn->header.mask_control = BRW_MASK_DISABLE;
00610    insn->bits3.if_else.pad0 = 0;
00611    return insn;
00612 }
00613 
00614 struct brw_instruction *brw_CONT(struct brw_compile *p)
00615 {
00616    struct brw_instruction *insn;
00617    insn = next_insn(p, BRW_OPCODE_CONTINUE);
00618    brw_set_dest(insn, brw_ip_reg());
00619    brw_set_src0(insn, brw_ip_reg());
00620    brw_set_src1(insn, brw_imm_d(0x0));
00621    insn->header.compression_control = BRW_COMPRESSION_NONE;
00622    insn->header.execution_size = BRW_EXECUTE_8;
00623    insn->header.mask_control = BRW_MASK_DISABLE;
00624    insn->bits3.if_else.pad0 = 0;
00625    return insn;
00626 }
00627 
00628 /* DO/WHILE loop:
00629  */
00630 struct brw_instruction *brw_DO(struct brw_compile *p, unsigned execute_size)
00631 {
00632    if (p->single_program_flow) {
00633       return &p->store[p->nr_insn];
00634    } else {
00635       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
00636 
00637       /* Override the defaults for this instruction:
00638        */
00639       brw_set_dest(insn, brw_null_reg());
00640       brw_set_src0(insn, brw_null_reg());
00641       brw_set_src1(insn, brw_null_reg());
00642 
00643       insn->header.compression_control = BRW_COMPRESSION_NONE;
00644       insn->header.execution_size = execute_size;
00645       insn->header.predicate_control = BRW_PREDICATE_NONE;
00646       /* insn->header.mask_control = BRW_MASK_ENABLE; */
00647       insn->header.mask_control = BRW_MASK_DISABLE;
00648 
00649       return insn;
00650    }
00651 }
00652 
00653 
00654 
00655 struct brw_instruction *brw_WHILE(struct brw_compile *p,
00656                struct brw_instruction *do_insn)
00657 {
00658    struct brw_instruction *insn;
00659 
00660    if (p->single_program_flow)
00661       insn = next_insn(p, BRW_OPCODE_ADD);
00662    else
00663       insn = next_insn(p, BRW_OPCODE_WHILE);
00664 
00665    brw_set_dest(insn, brw_ip_reg());
00666    brw_set_src0(insn, brw_ip_reg());
00667    brw_set_src1(insn, brw_imm_d(0x0));
00668 
00669    insn->header.compression_control = BRW_COMPRESSION_NONE;
00670 
00671    if (p->single_program_flow) {
00672       insn->header.execution_size = BRW_EXECUTE_1;
00673 
00674       insn->bits3.d = (do_insn - insn) * 16;
00675    } else {
00676       insn->header.execution_size = do_insn->header.execution_size;
00677 
00678       assert(do_insn->header.opcode == BRW_OPCODE_DO);
00679       insn->bits3.if_else.jump_count = do_insn - insn;
00680       insn->bits3.if_else.pop_count = 0;
00681       insn->bits3.if_else.pad0 = 0;
00682    }
00683 
00684 /*    insn->header.mask_control = BRW_MASK_ENABLE; */
00685 
00686    insn->header.mask_control = BRW_MASK_DISABLE;
00687    p->current->header.predicate_control = BRW_PREDICATE_NONE;
00688    return insn;
00689 }
00690 
00691 
00692 /* FORWARD JUMPS:
00693  */
00694 void brw_land_fwd_jump(struct brw_compile *p,
00695                        struct brw_instruction *jmp_insn)
00696 {
00697    struct brw_instruction *landing = &p->store[p->nr_insn];
00698 
00699    assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
00700    assert(jmp_insn->bits1.da1.src1_reg_file = BRW_IMMEDIATE_VALUE);
00701 
00702    jmp_insn->bits3.ud = (landing - jmp_insn) - 1;
00703 }
00704 
00705 
00706 
00707 /* To integrate with the above, it makes sense that the comparison
00708  * instruction should populate the flag register.  It might be simpler
00709  * just to use the flag reg for most WM tasks?
00710  */
00711 void brw_CMP(struct brw_compile *p,
00712              struct brw_reg dest,
00713              unsigned conditional,
00714              struct brw_reg src0,
00715              struct brw_reg src1)
00716 {
00717    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
00718 
00719    insn->header.destreg__conditonalmod = conditional;
00720    brw_set_dest(insn, dest);
00721    brw_set_src0(insn, src0);
00722    brw_set_src1(insn, src1);
00723 
00724 /*    guess_execution_size(insn, src0); */
00725 
00726 
00727    /* Make it so that future instructions will use the computed flag
00728     * value until brw_set_predicate_control_flag_value() is called
00729     * again.
00730     */
00731    if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
00732        dest.nr == 0) {
00733       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
00734       p->flag_value = 0xff;
00735    }
00736 }
00737 
00738 
00739 
00740 /***********************************************************************
00741  * Helpers for the various SEND message types:
00742  */
00743 
00744 /* Invert 8 values
00745  */
00746 void brw_math( struct brw_compile *p,
00747                struct brw_reg dest,
00748                unsigned function,
00749                unsigned saturate,
00750                unsigned msg_reg_nr,
00751                struct brw_reg src,
00752                unsigned data_type,
00753                unsigned precision )
00754 {
00755    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
00756    unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
00757    unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
00758 
00759    /* Example code doesn't set predicate_control for send
00760     * instructions.
00761     */
00762    insn->header.predicate_control = 0;
00763    insn->header.destreg__conditonalmod = msg_reg_nr;
00764 
00765    brw_set_dest(insn, dest);
00766    brw_set_src0(insn, src);
00767    brw_set_math_message(insn,
00768                         msg_length, response_length,
00769                         function,
00770                         BRW_MATH_INTEGER_UNSIGNED,
00771                         precision,
00772                         saturate,
00773                         data_type);
00774 }
00775 
00776 /* Use 2 send instructions to invert 16 elements
00777  */
00778 void brw_math_16( struct brw_compile *p,
00779                   struct brw_reg dest,
00780                   unsigned function,
00781                   unsigned saturate,
00782                   unsigned msg_reg_nr,
00783                   struct brw_reg src,
00784                   unsigned precision )
00785 {
00786    struct brw_instruction *insn;
00787    unsigned msg_length = (function == BRW_MATH_FUNCTION_POW) ? 2 : 1;
00788    unsigned response_length = (function == BRW_MATH_FUNCTION_SINCOS) ? 2 : 1;
00789 
00790    /* First instruction:
00791     */
00792    brw_push_insn_state(p);
00793    brw_set_predicate_control_flag_value(p, 0xff);
00794    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
00795 
00796    insn = next_insn(p, BRW_OPCODE_SEND);
00797    insn->header.destreg__conditonalmod = msg_reg_nr;
00798 
00799    brw_set_dest(insn, dest);
00800    brw_set_src0(insn, src);
00801    brw_set_math_message(insn,
00802                         msg_length, response_length,
00803                         function,
00804                         BRW_MATH_INTEGER_UNSIGNED,
00805                         precision,
00806                         saturate,
00807                         BRW_MATH_DATA_VECTOR);
00808 
00809    /* Second instruction:
00810     */
00811    insn = next_insn(p, BRW_OPCODE_SEND);
00812    insn->header.compression_control = BRW_COMPRESSION_2NDHALF;
00813    insn->header.destreg__conditonalmod = msg_reg_nr+1;
00814 
00815    brw_set_dest(insn, offset(dest,1));
00816    brw_set_src0(insn, src);
00817    brw_set_math_message(insn,
00818                         msg_length, response_length,
00819                         function,
00820                         BRW_MATH_INTEGER_UNSIGNED,
00821                         precision,
00822                         saturate,
00823                         BRW_MATH_DATA_VECTOR);
00824 
00825    brw_pop_insn_state(p);
00826 }
00827 
00828 
00829 
00830 
00831 void brw_dp_WRITE_16( struct brw_compile *p,
00832                       struct brw_reg src,
00833                       unsigned msg_reg_nr,
00834                       unsigned scratch_offset )
00835 {
00836    {
00837       brw_push_insn_state(p);
00838       brw_set_mask_control(p, BRW_MASK_DISABLE);
00839       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
00840 
00841       brw_MOV(p,
00842               retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
00843               brw_imm_d(scratch_offset));
00844 
00845       brw_pop_insn_state(p);
00846    }
00847 
00848    {
00849       unsigned msg_length = 3;
00850       struct brw_reg dest = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
00851       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
00852 
00853       insn->header.predicate_control = 0; /* XXX */
00854       insn->header.compression_control = BRW_COMPRESSION_NONE;
00855       insn->header.destreg__conditonalmod = msg_reg_nr;
00856 
00857       brw_set_dest(insn, dest);
00858       brw_set_src0(insn, src);
00859 
00860       brw_set_dp_write_message(insn,
00861                                255, /* bti */
00862                                BRW_DATAPORT_OWORD_BLOCK_4_OWORDS, /* msg_control */
00863                                BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE, /* msg_type */
00864                                msg_length,
00865                                0, /* pixel scoreboard */
00866                                0, /* response_length */
00867                                0); /* eot */
00868    }
00869 
00870 }
00871 
00872 
00873 void brw_dp_READ_16( struct brw_compile *p,
00874                       struct brw_reg dest,
00875                       unsigned msg_reg_nr,
00876                       unsigned scratch_offset )
00877 {
00878    {
00879       brw_push_insn_state(p);
00880       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
00881       brw_set_mask_control(p, BRW_MASK_DISABLE);
00882 
00883       brw_MOV(p,
00884               retype(brw_vec1_grf(0, 2), BRW_REGISTER_TYPE_D),
00885               brw_imm_d(scratch_offset));
00886 
00887       brw_pop_insn_state(p);
00888    }
00889 
00890    {
00891       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
00892 
00893       insn->header.predicate_control = 0; /* XXX */
00894       insn->header.compression_control = BRW_COMPRESSION_NONE;
00895       insn->header.destreg__conditonalmod = msg_reg_nr;
00896 
00897       brw_set_dest(insn, dest); /* UW? */
00898       brw_set_src0(insn, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW));
00899 
00900       brw_set_dp_read_message(insn,
00901                               255, /* bti */
00902                               3,  /* msg_control */
00903                               BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
00904                               1, /* target cache */
00905                               1, /* msg_length */
00906                               2, /* response_length */
00907                               0); /* eot */
00908    }
00909 }
00910 
00911 
00912 void brw_fb_WRITE(struct brw_compile *p,
00913                    struct brw_reg dest,
00914                    unsigned msg_reg_nr,
00915                    struct brw_reg src0,
00916                    unsigned binding_table_index,
00917                    unsigned msg_length,
00918                    unsigned response_length,
00919                    boolean eot)
00920 {
00921    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
00922 
00923    insn->header.predicate_control = 0; /* XXX */
00924    insn->header.compression_control = BRW_COMPRESSION_NONE;
00925    insn->header.destreg__conditonalmod = msg_reg_nr;
00926 
00927    brw_set_dest(insn, dest);
00928    brw_set_src0(insn, src0);
00929    brw_set_dp_write_message(insn,
00930                             binding_table_index,
00931                             BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE, /* msg_control */
00932                             BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, /* msg_type */
00933                             msg_length,
00934                             1,  /* pixel scoreboard */
00935                             response_length,
00936                             eot);
00937 }
00938 
00939 
00940 
00941 void brw_SAMPLE(struct brw_compile *p,
00942                 struct brw_reg dest,
00943                 unsigned msg_reg_nr,
00944                 struct brw_reg src0,
00945                 unsigned binding_table_index,
00946                 unsigned sampler,
00947                 unsigned writemask,
00948                 unsigned msg_type,
00949                 unsigned response_length,
00950                 unsigned msg_length,
00951                 boolean eot)
00952 {
00953    boolean need_stall = 0;
00954 
00955    if(writemask == 0) {
00956 /*       debug_printf("%s: zero writemask??\n", __FUNCTION__); */
00957       return;
00958    }
00959 
00960    /* Hardware doesn't do destination dependency checking on send
00961     * instructions properly.  Add a workaround which generates the
00962     * dependency by other means.  In practice it seems like this bug
00963     * only crops up for texture samples, and only where registers are
00964     * written by the send and then written again later without being
00965     * read in between.  Luckily for us, we already track that
00966     * information and use it to modify the writemask for the
00967     * instruction, so that is a guide for whether a workaround is
00968     * needed.
00969     */
00970    if (writemask != TGSI_WRITEMASK_XYZW) {
00971       unsigned dst_offset = 0;
00972       unsigned i, newmask = 0, len = 0;
00973 
00974       for (i = 0; i < 4; i++) {
00975          if (writemask & (1<<i))
00976             break;
00977          dst_offset += 2;
00978       }
00979       for (; i < 4; i++) {
00980          if (!(writemask & (1<<i)))
00981             break;
00982          newmask |= 1<<i;
00983          len++;
00984       }
00985 
00986       if (newmask != writemask) {
00987          need_stall = 1;
00988 /*       debug_printf("need stall %x %x\n", newmask , writemask); */
00989       }
00990       else {
00991          struct brw_reg m1 = brw_message_reg(msg_reg_nr);
00992 
00993          newmask = ~newmask & TGSI_WRITEMASK_XYZW;
00994 
00995          brw_push_insn_state(p);
00996 
00997          brw_set_compression_control(p, BRW_COMPRESSION_NONE);
00998          brw_set_mask_control(p, BRW_MASK_DISABLE);
00999 
01000          brw_MOV(p, m1, brw_vec8_grf(0,0));
01001          brw_MOV(p, get_element_ud(m1, 2), brw_imm_ud(newmask << 12));
01002 
01003          brw_pop_insn_state(p);
01004 
01005          src0 = retype(brw_null_reg(), BRW_REGISTER_TYPE_UW);
01006          dest = offset(dest, dst_offset);
01007          response_length = len * 2;
01008       }
01009    }
01010 
01011    {
01012       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
01013 
01014       insn->header.predicate_control = 0; /* XXX */
01015       insn->header.compression_control = BRW_COMPRESSION_NONE;
01016       insn->header.destreg__conditonalmod = msg_reg_nr;
01017 
01018       brw_set_dest(insn, dest);
01019       brw_set_src0(insn, src0);
01020       brw_set_sampler_message(insn,
01021                               binding_table_index,
01022                               sampler,
01023                               msg_type,
01024                               response_length,
01025                               msg_length,
01026                               eot);
01027    }
01028 
01029    if (need_stall)
01030    {
01031       struct brw_reg reg = vec8(offset(dest, response_length-1));
01032 
01033       /*  mov (8) r9.0<1>:f    r9.0<8;8,1>:f    { Align1 }
01034        */
01035       brw_push_insn_state(p);
01036       brw_set_compression_control(p, FALSE);
01037       brw_MOV(p, reg, reg);
01038       brw_pop_insn_state(p);
01039    }
01040 
01041 }
01042 
01043 /* All these variables are pretty confusing - we might be better off
01044  * using bitmasks and macros for this, in the old style.  Or perhaps
01045  * just having the caller instantiate the fields in dword3 itself.
01046  */
01047 void brw_urb_WRITE(struct brw_compile *p,
01048                    struct brw_reg dest,
01049                    unsigned msg_reg_nr,
01050                    struct brw_reg src0,
01051                    boolean allocate,
01052                    boolean used,
01053                    unsigned msg_length,
01054                    unsigned response_length,
01055                    boolean eot,
01056                    boolean writes_complete,
01057                    unsigned offset,
01058                    unsigned swizzle)
01059 {
01060    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
01061 
01062    assert(msg_length < 16);
01063 
01064    brw_set_dest(insn, dest);
01065    brw_set_src0(insn, src0);
01066    brw_set_src1(insn, brw_imm_d(0));
01067 
01068    insn->header.destreg__conditonalmod = msg_reg_nr;
01069 
01070    brw_set_urb_message(insn,
01071                        allocate,
01072                        used,
01073                        msg_length,
01074                        response_length,
01075                        eot,
01076                        writes_complete,
01077                        offset,
01078                        swizzle);
01079 }
01080 

Generated on Tue Sep 29 06:25:16 2009 for Gallium3D by  doxygen 1.5.4