00001 /* 00002 Copyright (C) Intel Corp. 2006. All Rights Reserved. 00003 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to 00004 develop this 3D driver. 00005 00006 Permission is hereby granted, free of charge, to any person obtaining 00007 a copy of this software and associated documentation files (the 00008 "Software"), to deal in the Software without restriction, including 00009 without limitation the rights to use, copy, modify, merge, publish, 00010 distribute, sublicense, and/or sell copies of the Software, and to 00011 permit persons to whom the Software is furnished to do so, subject to 00012 the following conditions: 00013 00014 The above copyright notice and this permission notice (including the 00015 next paragraph) shall be included in all copies or substantial 00016 portions of the Software. 00017 00018 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00019 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00020 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 00021 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 00022 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 00023 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 00024 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00025 00026 **********************************************************************/ 00027 /* 00028 * Authors: 00029 * Keith Whitwell <keith@tungstengraphics.com> 00030 */ 00031 00032 00033 #ifndef BRWCONTEXT_INC 00034 #define BRWCONTEXT_INC 00035 00036 00037 #include "pipe/p_context.h" 00038 #include "pipe/p_defines.h" 00039 #include "pipe/p_state.h" 00040 00041 #include "tgsi/tgsi_scan.h" 00042 00043 #include "brw_structs.h" 00044 #include "brw_winsys.h" 00045 00046 00047 /* Glossary: 00048 * 00049 * URB - uniform resource buffer. A mid-sized buffer which is 00050 * partitioned between the fixed function units and used for passing 00051 * values (vertices, primitives, constants) between them. 00052 * 00053 * CURBE - constant URB entry. An urb region (entry) used to hold 00054 * constant values which the fixed function units can be instructed to 00055 * preload into the GRF when spawining a thread. 00056 * 00057 * VUE - vertex URB entry. An urb entry holding a vertex and usually 00058 * a vertex header. The header contains control information and 00059 * things like primitive type, Begin/end flags and clip codes. 00060 * 00061 * PUE - primitive URB entry. An urb entry produced by the setup (SF) 00062 * unit holding rasterization and interpolation parameters. 00063 * 00064 * GRF - general register file. One of several register files 00065 * addressable by programmed threads. The inputs (r0, payload, curbe, 00066 * urb) of the thread are preloaded to this area before the thread is 00067 * spawned. The registers are individually 8 dwords wide and suitable 00068 * for general usage. Registers holding thread input values are not 00069 * special and may be overwritten. 00070 * 00071 * MRF - message register file. Threads communicate (and terminate) 00072 * by sending messages. Message parameters are placed in contigous 00073 * MRF registers. All program output is via these messages. URB 00074 * entries are populated by sending a message to the shared URB 00075 * function containing the new data, together with a control word, 00076 * often an unmodified copy of R0. 00077 * 00078 * R0 - GRF register 0. Typically holds control information used when 00079 * sending messages to other threads. 00080 * 00081 * EU or GEN4 EU: The name of the programmable subsystem of the 00082 * i965 hardware. Threads are executed by the EU, the registers 00083 * described above are part of the EU architecture. 00084 * 00085 * Fixed function units: 00086 * 00087 * CS - Command streamer. Notional first unit, little software 00088 * interaction. Holds the URB entries used for constant data, ie the 00089 * CURBEs. 00090 * 00091 * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of 00092 * this unit is responsible for pulling vertices out of vertex buffers 00093 * in vram and injecting them into the processing pipe as VUEs. If 00094 * enabled, it first passes them to a VS thread which is a good place 00095 * for the driver to implement any active vertex shader. 00096 * 00097 * GS - Geometry Shader. This corresponds to a new DX10 concept. If 00098 * enabled, incoming strips etc are passed to GS threads in individual 00099 * line/triangle/point units. The GS thread may perform arbitary 00100 * computation and emit whatever primtives with whatever vertices it 00101 * chooses. This makes GS an excellent place to implement GL's 00102 * unfilled polygon modes, though of course it is capable of much 00103 * more. Additionally, GS is used to translate away primitives not 00104 * handled by latter units, including Quads and Lineloops. 00105 * 00106 * CS - Clipper. Mesa's clipping algorithms are imported to run on 00107 * this unit. The fixed function part performs cliptesting against 00108 * the 6 fixed clipplanes and makes descisions on whether or not the 00109 * incoming primitive needs to be passed to a thread for clipping. 00110 * User clip planes are handled via cooperation with the VS thread. 00111 * 00112 * SF - Strips Fans or Setup: Triangles are prepared for 00113 * rasterization. Interpolation coefficients are calculated. 00114 * Flatshading and two-side lighting usually performed here. 00115 * 00116 * WM - Windower. Interpolation of vertex attributes performed here. 00117 * Fragment shader implemented here. SIMD aspects of EU taken full 00118 * advantage of, as pixels are processed in blocks of 16. 00119 * 00120 * CC - Color Calculator. No EU threads associated with this unit. 00121 * Handles blending and (presumably) depth and stencil testing. 00122 */ 00123 00124 #define BRW_MAX_CURBE (32*16) 00125 00126 struct brw_context; 00127 struct brw_winsys; 00128 00129 00130 /* Raised when we receive new state across the pipe interface: 00131 */ 00132 #define BRW_NEW_VIEWPORT 0x1 00133 #define BRW_NEW_RASTERIZER 0x2 00134 #define BRW_NEW_FS 0x4 00135 #define BRW_NEW_BLEND 0x8 00136 #define BRW_NEW_CLIP 0x10 00137 #define BRW_NEW_SCISSOR 0x20 00138 #define BRW_NEW_STIPPLE 0x40 00139 #define BRW_NEW_FRAMEBUFFER 0x80 00140 #define BRW_NEW_ALPHA_TEST 0x100 00141 #define BRW_NEW_DEPTH_STENCIL 0x200 00142 #define BRW_NEW_SAMPLER 0x400 00143 #define BRW_NEW_TEXTURE 0x800 00144 #define BRW_NEW_CONSTANTS 0x1000 00145 #define BRW_NEW_VBO 0x2000 00146 #define BRW_NEW_VS 0x4000 00147 00148 /* Raised for other internal events: 00149 */ 00150 #define BRW_NEW_URB_FENCE 0x10000 00151 #define BRW_NEW_PSP 0x20000 00152 #define BRW_NEW_CURBE_OFFSETS 0x40000 00153 #define BRW_NEW_REDUCED_PRIMITIVE 0x80000 00154 #define BRW_NEW_PRIMITIVE 0x100000 00155 #define BRW_NEW_SCENE 0x200000 00156 #define BRW_NEW_SF_LINKAGE 0x400000 00157 00158 extern int BRW_DEBUG; 00159 00160 #define DEBUG_TEXTURE 0x1 00161 #define DEBUG_STATE 0x2 00162 #define DEBUG_IOCTL 0x4 00163 #define DEBUG_PRIMS 0x8 00164 #define DEBUG_VERTS 0x10 00165 #define DEBUG_FALLBACKS 0x20 00166 #define DEBUG_VERBOSE 0x40 00167 #define DEBUG_DRI 0x80 00168 #define DEBUG_DMA 0x100 00169 #define DEBUG_SANITY 0x200 00170 #define DEBUG_SYNC 0x400 00171 #define DEBUG_SLEEP 0x800 00172 #define DEBUG_PIXEL 0x1000 00173 #define DEBUG_STATS 0x2000 00174 #define DEBUG_TILE 0x4000 00175 #define DEBUG_SINGLE_THREAD 0x8000 00176 #define DEBUG_WM 0x10000 00177 #define DEBUG_URB 0x20000 00178 #define DEBUG_VS 0x40000 00179 #define DEBUG_BATCH 0x80000 00180 #define DEBUG_BUFMGR 0x100000 00181 #define DEBUG_BLIT 0x200000 00182 #define DEBUG_REGION 0x400000 00183 #define DEBUG_MIPTREE 0x800000 00184 00185 #define DBG(...) do { \ 00186 if (BRW_DEBUG & FILE_DEBUG_FLAG) \ 00187 debug_printf(__VA_ARGS__); \ 00188 } while(0) 00189 00190 #define PRINT(...) do { \ 00191 debug_printf(__VA_ARGS__); \ 00192 } while(0) 00193 00194 struct brw_state_flags { 00195 unsigned cache; 00196 unsigned brw; 00197 }; 00198 00199 00200 struct brw_vertex_program { 00201 struct pipe_shader_state program; 00202 struct tgsi_shader_info info; 00203 int id; 00204 }; 00205 00206 00207 struct brw_fragment_program { 00208 struct pipe_shader_state program; 00209 struct tgsi_shader_info info; 00210 00211 boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ 00212 int id; 00213 }; 00214 00215 00216 struct pipe_setup_linkage { 00217 struct { 00218 unsigned vp_output:5; 00219 unsigned interp_mode:4; 00220 unsigned bf_vp_output:5; 00221 } fp_input[PIPE_MAX_SHADER_INPUTS]; 00222 00223 unsigned fp_input_count:5; 00224 unsigned max_vp_output:5; 00225 }; 00226 00227 00228 00229 struct brw_texture { 00230 struct pipe_texture base; 00231 00232 /* Derived from the above: 00233 */ 00234 unsigned stride; 00235 unsigned depth_pitch; /* per-image on i945? */ 00236 unsigned total_nblocksy; 00237 00238 unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; 00239 00240 /* Explicitly store the offset of each image for each cube face or 00241 * depth value. Pretty much have to accept that hardware formats 00242 * are going to be so diverse that there is no unified way to 00243 * compute the offsets of depth/cube images within a mipmap level, 00244 * so have to store them as a lookup table: 00245 */ 00246 unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; 00248 /* Includes image offset tables: 00249 */ 00250 unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; 00251 00252 /* The data is held here: 00253 */ 00254 struct pipe_buffer *buffer; 00255 }; 00256 00257 /* Data about a particular attempt to compile a program. Note that 00258 * there can be many of these, each in a different GL state 00259 * corresponding to a different brw_wm_prog_key struct, with different 00260 * compiled programs: 00261 */ 00262 /* Data about a particular attempt to compile a program. Note that 00263 * there can be many of these, each in a different GL state 00264 * corresponding to a different brw_wm_prog_key struct, with different 00265 * compiled programs: 00266 */ 00267 00268 struct brw_wm_prog_data { 00269 unsigned curb_read_length; 00270 unsigned urb_read_length; 00271 00272 unsigned first_curbe_grf; 00273 unsigned total_grf; 00274 unsigned total_scratch; 00275 00276 /* Internally generated constants for the CURBE. These are loaded 00277 * ahead of the data from the constant buffer. 00278 */ 00279 const float internal_const[8]; 00280 unsigned nr_internal_consts; 00281 unsigned max_const; 00282 00283 boolean error; 00284 }; 00285 00286 struct brw_sf_prog_data { 00287 unsigned urb_read_length; 00288 unsigned total_grf; 00289 00290 /* Each vertex may have upto 12 attributes, 4 components each, 00291 * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 00292 * rows. 00293 * 00294 * Actually we use 4 for each, so call it 12 rows. 00295 */ 00296 unsigned urb_entry_size; 00297 }; 00298 00299 struct brw_clip_prog_data { 00300 unsigned curb_read_length; /* user planes? */ 00301 unsigned clip_mode; 00302 unsigned urb_read_length; 00303 unsigned total_grf; 00304 }; 00305 00306 struct brw_gs_prog_data { 00307 unsigned urb_read_length; 00308 unsigned total_grf; 00309 }; 00310 00311 struct brw_vs_prog_data { 00312 unsigned curb_read_length; 00313 unsigned urb_read_length; 00314 unsigned total_grf; 00315 unsigned outputs_written; 00316 00317 unsigned inputs_read; 00318 00319 unsigned max_const; 00320 00321 float imm_buf[PIPE_MAX_CONSTANT][4]; 00322 unsigned num_imm; 00323 unsigned num_consts; 00324 00325 /* Used for calculating urb partitions: 00326 */ 00327 unsigned urb_entry_size; 00328 }; 00329 00330 00331 #define BRW_MAX_TEX_UNIT 8 00332 #define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 00333 00334 /* Create a fixed sized struct for caching binding tables: 00335 */ 00336 struct brw_surface_binding_table { 00337 unsigned surf_ss_offset[BRW_WM_MAX_SURF]; 00338 }; 00339 00340 00341 struct brw_cache; 00342 00343 struct brw_mem_pool { 00344 struct pipe_buffer *buffer; 00345 00346 unsigned size; 00347 unsigned offset; /* offset of first free byte */ 00348 00349 struct brw_context *brw; 00350 }; 00351 00352 struct brw_cache_item { 00353 unsigned hash; 00354 unsigned key_size; /* for variable-sized keys */ 00355 const void *key; 00356 00357 unsigned offset; /* offset within pool's buffer */ 00358 unsigned data_size; 00359 00360 struct brw_cache_item *next; 00361 }; 00362 00363 00364 00365 struct brw_cache { 00366 unsigned id; 00367 00368 const char *name; 00369 00370 struct brw_context *brw; 00371 struct brw_mem_pool *pool; 00372 00373 struct brw_cache_item **items; 00374 unsigned size, n_items; 00375 00376 unsigned key_size; /* for fixed-size keys */ 00377 unsigned aux_size; 00378 00379 unsigned last_addr; /* offset of active item */ 00380 }; 00381 00382 00383 00384 00385 /* Considered adding a member to this struct to document which flags 00386 * an update might raise so that ordering of the state atoms can be 00387 * checked or derived at runtime. Dropped the idea in favor of having 00388 * a debug mode where the state is monitored for flags which are 00389 * raised that have already been tested against. 00390 */ 00391 struct brw_tracked_state { 00392 struct brw_state_flags dirty; 00393 void (*update)( struct brw_context *brw ); 00394 }; 00395 00396 00397 /* Flags for brw->state.cache. 00398 */ 00399 #define CACHE_NEW_CC_VP (1<<BRW_CC_VP) 00400 #define CACHE_NEW_CC_UNIT (1<<BRW_CC_UNIT) 00401 #define CACHE_NEW_WM_PROG (1<<BRW_WM_PROG) 00402 #define CACHE_NEW_SAMPLER_DEFAULT_COLOR (1<<BRW_SAMPLER_DEFAULT_COLOR) 00403 #define CACHE_NEW_SAMPLER (1<<BRW_SAMPLER) 00404 #define CACHE_NEW_WM_UNIT (1<<BRW_WM_UNIT) 00405 #define CACHE_NEW_SF_PROG (1<<BRW_SF_PROG) 00406 #define CACHE_NEW_SF_VP (1<<BRW_SF_VP) 00407 #define CACHE_NEW_SF_UNIT (1<<BRW_SF_UNIT) 00408 #define CACHE_NEW_VS_UNIT (1<<BRW_VS_UNIT) 00409 #define CACHE_NEW_VS_PROG (1<<BRW_VS_PROG) 00410 #define CACHE_NEW_GS_UNIT (1<<BRW_GS_UNIT) 00411 #define CACHE_NEW_GS_PROG (1<<BRW_GS_PROG) 00412 #define CACHE_NEW_CLIP_VP (1<<BRW_CLIP_VP) 00413 #define CACHE_NEW_CLIP_UNIT (1<<BRW_CLIP_UNIT) 00414 #define CACHE_NEW_CLIP_PROG (1<<BRW_CLIP_PROG) 00415 #define CACHE_NEW_SURFACE (1<<BRW_SS_SURFACE) 00416 #define CACHE_NEW_SURF_BIND (1<<BRW_SS_SURF_BIND) 00417 00418 00419 00420 00421 enum brw_mempool_id { 00422 BRW_GS_POOL, 00423 BRW_SS_POOL, 00424 BRW_MAX_POOL 00425 }; 00426 00427 00428 struct brw_cached_batch_item { 00429 struct header *header; 00430 unsigned sz; 00431 struct brw_cached_batch_item *next; 00432 }; 00433 00434 00435 00436 /* Protect against a future where PIPE_MAX_ATTRIBS > 32. Wouldn't life 00437 * be easier if C allowed arrays of packed elements? 00438 */ 00439 #define ATTRIB_BIT_DWORDS ((PIPE_MAX_ATTRIBS+31)/32) 00440 00441 00442 00443 00444 struct brw_vertex_info { 00445 unsigned varying; /* varying:1[PIPE_MAX_ATTRIBS] */ 00446 unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_MAX_ATTRIBS] */ 00447 }; 00448 00449 00450 00451 00452 00453 struct brw_context 00454 { 00455 struct pipe_context pipe; 00456 struct brw_winsys *winsys; 00457 00458 unsigned primitive; 00459 unsigned reduced_primitive; 00460 00461 boolean emit_state_always; 00462 00463 struct { 00464 struct brw_state_flags dirty; 00465 } state; 00466 00467 00468 struct { 00469 const struct pipe_blend_state *Blend; 00470 const struct pipe_depth_stencil_alpha_state *DepthStencil; 00471 const struct pipe_poly_stipple *PolygonStipple; 00472 const struct pipe_rasterizer_state *Raster; 00473 const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; 00474 const struct brw_vertex_program *VertexProgram; 00475 const struct brw_fragment_program *FragmentProgram; 00476 00477 struct pipe_clip_state Clip; 00478 struct pipe_blend_color BlendColor; 00479 struct pipe_scissor_state Scissor; 00480 struct pipe_viewport_state Viewport; 00481 struct pipe_framebuffer_state FrameBuffer; 00482 00483 const struct pipe_constant_buffer *Constants[2]; 00484 const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; 00485 } attribs; 00486 00487 unsigned num_samplers; 00488 unsigned num_textures; 00489 00490 struct brw_mem_pool pool[BRW_MAX_POOL]; 00491 struct brw_cache cache[BRW_MAX_CACHE]; 00492 struct brw_cached_batch_item *cached_batch_items; 00493 00494 struct { 00495 00496 /* Arrays with buffer objects to copy non-bufferobj arrays into 00497 * for upload: 00498 */ 00499 const struct pipe_vertex_buffer *vbo_array[PIPE_MAX_ATTRIBS]; 00500 00501 struct brw_vertex_element_state inputs[PIPE_MAX_ATTRIBS]; 00502 00503 #define BRW_NR_UPLOAD_BUFS 17 00504 #define BRW_UPLOAD_INIT_SIZE (128*1024) 00505 00506 /* Summary of size and varying of active arrays, so we can check 00507 * for changes to this state: 00508 */ 00509 struct brw_vertex_info info; 00510 } vb; 00511 00512 00513 unsigned hardware_dirty; 00514 unsigned dirty; 00515 unsigned pci_id; 00516 /* BRW_NEW_URB_ALLOCATIONS: 00517 */ 00518 struct { 00519 unsigned vsize; /* vertex size plus header in urb registers */ 00520 unsigned csize; /* constant buffer size in urb registers */ 00521 unsigned sfsize; /* setup data size in urb registers */ 00522 00523 boolean constrained; 00524 00525 unsigned nr_vs_entries; 00526 unsigned nr_gs_entries; 00527 unsigned nr_clip_entries; 00528 unsigned nr_sf_entries; 00529 unsigned nr_cs_entries; 00530 00531 /* unsigned vs_size; */ 00532 /* unsigned gs_size; */ 00533 /* unsigned clip_size; */ 00534 /* unsigned sf_size; */ 00535 /* unsigned cs_size; */ 00536 00537 unsigned vs_start; 00538 unsigned gs_start; 00539 unsigned clip_start; 00540 unsigned sf_start; 00541 unsigned cs_start; 00542 } urb; 00543 00544 00545 /* BRW_NEW_CURBE_OFFSETS: 00546 */ 00547 struct { 00548 unsigned wm_start; 00549 unsigned wm_size; 00550 unsigned clip_start; 00551 unsigned clip_size; 00552 unsigned vs_start; 00553 unsigned vs_size; 00554 unsigned total_size; 00555 00556 unsigned gs_offset; 00557 00558 float *last_buf; 00559 unsigned last_bufsz; 00560 } curbe; 00561 00562 struct { 00563 struct brw_vs_prog_data *prog_data; 00564 00565 unsigned prog_gs_offset; 00566 unsigned state_gs_offset; 00567 } vs; 00568 00569 struct { 00570 struct brw_gs_prog_data *prog_data; 00571 00572 boolean prog_active; 00573 unsigned prog_gs_offset; 00574 unsigned state_gs_offset; 00575 } gs; 00576 00577 struct { 00578 struct brw_clip_prog_data *prog_data; 00579 00580 unsigned prog_gs_offset; 00581 unsigned vp_gs_offset; 00582 unsigned state_gs_offset; 00583 } clip; 00584 00585 00586 struct { 00587 struct brw_sf_prog_data *prog_data; 00588 00589 struct pipe_setup_linkage linkage; 00590 00591 unsigned prog_gs_offset; 00592 unsigned vp_gs_offset; 00593 unsigned state_gs_offset; 00594 } sf; 00595 00596 struct { 00597 struct brw_wm_prog_data *prog_data; 00598 00599 // struct brw_wm_compiler *compile_data; 00600 00601 00606 struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; 00607 00608 unsigned render_surf; 00609 unsigned nr_surfaces; 00610 00611 unsigned max_threads; 00612 struct pipe_buffer *scratch_buffer; 00613 unsigned scratch_buffer_size; 00614 00615 unsigned sampler_count; 00616 unsigned sampler_gs_offset; 00617 00618 struct brw_surface_binding_table bind; 00619 unsigned bind_ss_offset; 00620 00621 unsigned prog_gs_offset; 00622 unsigned state_gs_offset; 00623 } wm; 00624 00625 00626 struct { 00627 unsigned vp_gs_offset; 00628 unsigned state_gs_offset; 00629 } cc; 00630 00631 00632 /* Used to give every program string a unique id 00633 */ 00634 unsigned program_id; 00635 }; 00636 00637 00638 #define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) 00639 00640 00641 /*====================================================================== 00642 * brw_vtbl.c 00643 */ 00644 void brw_do_flush( struct brw_context *brw, 00645 unsigned flags ); 00646 00647 00648 /*====================================================================== 00649 * brw_state.c 00650 */ 00651 void brw_validate_state(struct brw_context *brw); 00652 void brw_init_state(struct brw_context *brw); 00653 void brw_destroy_state(struct brw_context *brw); 00654 00655 00656 /*====================================================================== 00657 * brw_tex.c 00658 */ 00659 void brwUpdateTextureState( struct brw_context *brw ); 00660 00661 00662 /* brw_urb.c 00663 */ 00664 void brw_upload_urb_fence(struct brw_context *brw); 00665 00666 void brw_upload_constant_buffer_state(struct brw_context *brw); 00667 00668 void brw_init_surface_functions(struct brw_context *brw); 00669 void brw_init_state_functions(struct brw_context *brw); 00670 void brw_init_flush_functions(struct brw_context *brw); 00671 void brw_init_string_functions(struct brw_context *brw); 00672 00673 /*====================================================================== 00674 * Inline conversion functions. These are better-typed than the 00675 * macros used previously: 00676 */ 00677 static inline struct brw_context * 00678 brw_context( struct pipe_context *ctx ) 00679 { 00680 return (struct brw_context *)ctx; 00681 } 00682 00683 #endif 00684