brw_curbe.c

Go to the documentation of this file.
00001 /*
00002  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
00003  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
00004  develop this 3D driver.
00005 
00006  Permission is hereby granted, free of charge, to any person obtaining
00007  a copy of this software and associated documentation files (the
00008  "Software"), to deal in the Software without restriction, including
00009  without limitation the rights to use, copy, modify, merge, publish,
00010  distribute, sublicense, and/or sell copies of the Software, and to
00011  permit persons to whom the Software is furnished to do so, subject to
00012  the following conditions:
00013 
00014  The above copyright notice and this permission notice (including the
00015  next paragraph) shall be included in all copies or substantial
00016  portions of the Software.
00017 
00018  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
00019  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
00021  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
00022  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
00023  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
00024  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00025 
00026  **********************************************************************/
00027  /*
00028   * Authors:
00029   *   Keith Whitwell <keith@tungstengraphics.com>
00030   */
00031 
00032 
00033 
00034 #include "brw_context.h"
00035 #include "brw_defines.h"
00036 #include "brw_state.h"
00037 #include "brw_batch.h"
00038 #include "brw_util.h"
00039 #include "brw_wm.h"
00040 #include "pipe/p_state.h"
00041 #include "pipe/p_winsys.h"
00042 #include "util/u_math.h"
00043 #include "util/u_memory.h"
00044 
00045 #define FILE_DEBUG_FLAG DEBUG_FALLBACKS
00046 
00047 /* Partition the CURBE between the various users of constant values:
00048  */
00049 static void calculate_curbe_offsets( struct brw_context *brw )
00050 {
00051    /* CACHE_NEW_WM_PROG */
00052    unsigned nr_fp_regs = align(brw->wm.prog_data->max_const, 16);
00053 
00054    /* BRW_NEW_VERTEX_PROGRAM */
00055    unsigned nr_vp_regs = align(brw->vs.prog_data->max_const, 16);
00056    unsigned nr_clip_regs = 0;
00057    unsigned total_regs;
00058 
00059 #if 0
00060    /* BRW_NEW_CLIP ? */
00061    if (brw->attribs.Transform->ClipPlanesEnabled) {
00062       unsigned nr_planes = 6 + brw_count_bits(brw->attribs.Transform->ClipPlanesEnabled);
00063       nr_clip_regs = align(nr_planes * 4, 16);
00064    }
00065 #endif
00066 
00067 
00068    total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs;
00069 
00070    /* This can happen - what to do?  Probably rather than falling
00071     * back, the best thing to do is emit programs which code the
00072     * constants as immediate values.  Could do this either as a static
00073     * cap on WM and VS, or adaptively.
00074     *
00075     * Unfortunately, this is currently dependent on the results of the
00076     * program generation process (in the case of wm), so this would
00077     * introduce the need to re-generate programs in the event of a
00078     * curbe allocation failure.
00079     */
00080    /* Max size is 32 - just large enough to
00081     * hold the 128 parameters allowed by
00082     * the fragment and vertex program
00083     * api's.  It's not clear what happens
00084     * when both VP and FP want to use 128
00085     * parameters, though.
00086     */
00087    assert(total_regs <= 32);
00088 
00089    /* Lazy resize:
00090     */
00091    if (nr_fp_regs > brw->curbe.wm_size ||
00092        nr_vp_regs > brw->curbe.vs_size ||
00093        nr_clip_regs != brw->curbe.clip_size ||
00094        (total_regs < brw->curbe.total_size / 4 &&
00095         brw->curbe.total_size > 16)) {
00096 
00097       unsigned reg = 0;
00098 
00099       /* Calculate a new layout:
00100        */
00101       reg = 0;
00102       brw->curbe.wm_start = reg;
00103       brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs;
00104       brw->curbe.clip_start = reg;
00105       brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs;
00106       brw->curbe.vs_start = reg;
00107       brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs;
00108       brw->curbe.total_size = reg;
00109 
00110 #if 0
00111       if (0)
00112          DBG("curbe wm %d+%d clip %d+%d vs %d+%d\n",
00113                       brw->curbe.wm_start,
00114                       brw->curbe.wm_size,
00115                       brw->curbe.clip_start,
00116                       brw->curbe.clip_size,
00117                       brw->curbe.vs_start,
00118                       brw->curbe.vs_size );
00119 #endif
00120 
00121       brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS;
00122    }
00123 }
00124 
00125 
00126 const struct brw_tracked_state brw_curbe_offsets = {
00127    .dirty = {
00128       .brw  = (BRW_NEW_CLIP |
00129                BRW_NEW_VS),
00130       .cache = CACHE_NEW_WM_PROG
00131    },
00132    .update = calculate_curbe_offsets
00133 };
00134 
00135 
00136 
00137 /* Define the number of curbes within CS's urb allocation.  Multiple
00138  * urb entries -> multiple curbes.  These will be used by
00139  * fixed-function hardware in a double-buffering scheme to avoid a
00140  * pipeline stall each time the contents of the curbe is changed.
00141  */
00142 void brw_upload_constant_buffer_state(struct brw_context *brw)
00143 {
00144    struct brw_constant_buffer_state cbs;
00145    memset(&cbs, 0, sizeof(cbs));
00146 
00147    /* It appears that this is the state packet for the CS unit, ie. the
00148     * urb entries detailed here are housed in the CS range from the
00149     * URB_FENCE command.
00150     */
00151    cbs.header.opcode = CMD_CONST_BUFFER_STATE;
00152    cbs.header.length = sizeof(cbs)/4 - 2;
00153 
00154    /* BRW_NEW_URB_FENCE */
00155    cbs.bits0.nr_urb_entries = brw->urb.nr_cs_entries;
00156    cbs.bits0.urb_entry_size = brw->urb.csize - 1;
00157 
00158    assert(brw->urb.nr_cs_entries);
00159    BRW_CACHED_BATCH_STRUCT(brw, &cbs);
00160 }
00161 
00162 
00163 static float fixed_plane[6][4] = {
00164    { 0,    0,   -1, 1 },
00165    { 0,    0,    1, 1 },
00166    { 0,   -1,    0, 1 },
00167    { 0,    1,    0, 1 },
00168    {-1,    0,    0, 1 },
00169    { 1,    0,    0, 1 }
00170 };
00171 
00172 /* Upload a new set of constants.  Too much variability to go into the
00173  * cache mechanism, but maybe would benefit from a comparison against
00174  * the current uploaded set of constants.
00175  */
00176 static void upload_constant_buffer(struct brw_context *brw)
00177 {
00178    struct brw_mem_pool *pool = &brw->pool[BRW_GS_POOL];
00179    unsigned sz = brw->curbe.total_size;
00180    unsigned bufsz = sz * sizeof(float);
00181    float *buf;
00182    unsigned i;
00183 
00184 
00185    if (sz == 0) {
00186       struct brw_constant_buffer cb;
00187       cb.header.opcode = CMD_CONST_BUFFER;
00188       cb.header.length = sizeof(cb)/4 - 2;
00189       cb.header.valid = 0;
00190       cb.bits0.buffer_length = 0;
00191       cb.bits0.buffer_address = 0;
00192       BRW_BATCH_STRUCT(brw, &cb);
00193 
00194       if (brw->curbe.last_buf) {
00195          free(brw->curbe.last_buf);
00196          brw->curbe.last_buf = NULL;
00197          brw->curbe.last_bufsz  = 0;
00198       }
00199 
00200       return;
00201    }
00202 
00203    buf = (float *)malloc(bufsz);
00204 
00205    memset(buf, 0, bufsz);
00206 
00207    if (brw->curbe.wm_size) {
00208       unsigned offset = brw->curbe.wm_start * 16;
00209 
00210       /* First the constant buffer constants:
00211        */
00212       
00213       /* Then any internally generated constants: 
00214        */
00215       for (i = 0; i < brw->wm.prog_data->nr_internal_consts; i++)
00216          buf[offset + i] = brw->wm.prog_data->internal_const[i];
00217 
00218       assert(brw->wm.prog_data->max_const == 
00219              brw->wm.prog_data->nr_internal_consts);
00220    }
00221 
00222 
00223    /* The clipplanes are actually delivered to both CLIP and VS units.
00224     * VS uses them to calculate the outcode bitmasks.
00225     */
00226    if (brw->curbe.clip_size) {
00227       unsigned offset = brw->curbe.clip_start * 16;
00228       unsigned j;
00229 
00230       /* If any planes are going this way, send them all this way:
00231        */
00232       for (i = 0; i < 6; i++) {
00233          buf[offset + i * 4 + 0] = fixed_plane[i][0];
00234          buf[offset + i * 4 + 1] = fixed_plane[i][1];
00235          buf[offset + i * 4 + 2] = fixed_plane[i][2];
00236          buf[offset + i * 4 + 3] = fixed_plane[i][3];
00237       }
00238 
00239       /* Clip planes: BRW_NEW_CLIP:
00240        */
00241       for (j = 0; j < brw->attribs.Clip.nr; j++) {
00242          buf[offset + i * 4 + 0] = brw->attribs.Clip.ucp[j][0];
00243          buf[offset + i * 4 + 1] = brw->attribs.Clip.ucp[j][1];
00244          buf[offset + i * 4 + 2] = brw->attribs.Clip.ucp[j][2];
00245          buf[offset + i * 4 + 3] = brw->attribs.Clip.ucp[j][3];
00246          i++;
00247       }
00248    }
00249 
00250 
00251    if (brw->curbe.vs_size) {
00252       unsigned offset = brw->curbe.vs_start * 16;
00253       /*unsigned nr = vp->max_const;*/
00254       const struct pipe_constant_buffer *cbuffer = brw->attribs.Constants[0];
00255       struct pipe_winsys *ws = brw->pipe.winsys;
00256       /* FIXME: buffer size is num_consts + num_immediates */
00257       if (brw->vs.prog_data->num_consts) {
00258          /* map the vertex constant buffer and copy to curbe: */
00259          void *data = ws->buffer_map(ws, cbuffer->buffer, 0);
00260          /* FIXME: this is wrong. the cbuffer->size currently
00261           * represents size of consts + immediates. so if we'll
00262           * have both we'll copy over the end of the buffer
00263           * with the subsequent memcpy */
00264          memcpy(&buf[offset], data, cbuffer->size);
00265          ws->buffer_unmap(ws, cbuffer->buffer);
00266          offset += cbuffer->size;
00267       }
00268       /*immediates*/
00269       if (brw->vs.prog_data->num_imm) {
00270          memcpy(&buf[offset], brw->vs.prog_data->imm_buf,
00271                 brw->vs.prog_data->num_imm * 4 * sizeof(float));
00272       }
00273    }
00274 
00275    if (1) {
00276       for (i = 0; i < sz; i+=4)
00277          debug_printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4,
00278                       buf[i+0], buf[i+1], buf[i+2], buf[i+3]);
00279 
00280       debug_printf("last_buf %p buf %p sz %d/%d cmp %d\n",
00281                    brw->curbe.last_buf, buf,
00282                    bufsz, brw->curbe.last_bufsz,
00283                    brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1);
00284    }
00285 
00286    if (brw->curbe.last_buf &&
00287        bufsz == brw->curbe.last_bufsz &&
00288        memcmp(buf, brw->curbe.last_buf, bufsz) == 0) {
00289       free(buf);
00290 /*       return; */
00291    }
00292    else {
00293       if (brw->curbe.last_buf)
00294          free(brw->curbe.last_buf);
00295       brw->curbe.last_buf = buf;
00296       brw->curbe.last_bufsz = bufsz;
00297 
00298 
00299       if (!brw_pool_alloc(pool,
00300                           bufsz,
00301                           1 << 6,
00302                           &brw->curbe.gs_offset)) {
00303          debug_printf("out of GS memory for curbe\n");
00304          assert(0);
00305          return;
00306       }
00307 
00308 
00309       /* Copy data to the buffer:
00310        */
00311       brw->winsys->buffer_subdata_typed(brw->winsys,
00312                                         pool->buffer, 
00313                                         brw->curbe.gs_offset, 
00314                                         bufsz, 
00315                                         buf,
00316                                         BRW_CONSTANT_BUFFER );
00317    }
00318 
00319    /* TODO: only emit the constant_buffer packet when necessary, ie:
00320       - contents have changed
00321       - offset has changed
00322       - hw requirements due to other packets emitted.
00323    */
00324    {
00325       struct brw_constant_buffer cb;
00326 
00327       memset(&cb, 0, sizeof(cb));
00328 
00329       cb.header.opcode = CMD_CONST_BUFFER;
00330       cb.header.length = sizeof(cb)/4 - 2;
00331       cb.header.valid = 1;
00332       cb.bits0.buffer_length = sz - 1;
00333       cb.bits0.buffer_address = brw->curbe.gs_offset >> 6;
00334 
00335       /* Because this provokes an action (ie copy the constants into the
00336        * URB), it shouldn't be shortcircuited if identical to the
00337        * previous time - because eg. the urb destination may have
00338        * changed, or the urb contents different to last time.
00339        *
00340        * Note that the data referred to is actually copied internally,
00341        * not just used in place according to passed pointer.
00342        *
00343        * It appears that the CS unit takes care of using each available
00344        * URB entry (Const URB Entry == CURBE) in turn, and issuing
00345        * flushes as necessary when doublebuffering of CURBEs isn't
00346        * possible.
00347        */
00348       BRW_BATCH_STRUCT(brw, &cb);
00349    }
00350 }
00351 
00352 /* This tracked state is unique in that the state it monitors varies
00353  * dynamically depending on the parameters tracked by the fragment and
00354  * vertex programs.  This is the template used as a starting point,
00355  * each context will maintain a copy of this internally and update as
00356  * required.
00357  */
00358 const struct brw_tracked_state brw_constant_buffer = {
00359    .dirty = {
00360       .brw  = (BRW_NEW_CLIP |
00361                BRW_NEW_CONSTANTS |
00362                BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */
00363                BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */
00364                BRW_NEW_CURBE_OFFSETS),
00365       .cache = (CACHE_NEW_WM_PROG)
00366    },
00367    .update = upload_constant_buffer
00368 };
00369 

Generated on Tue Sep 29 06:25:16 2009 for Gallium3D by  doxygen 1.5.4