Gallium3D: gallium/auxiliary/gallivm/gallivm.cpp Source File

00001 /**************************************************************************
00002  *
00003  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
00004  * All Rights Reserved.
00005  *
00006  * Permission is hereby granted, free of charge, to any person obtaining a
00007  * copy of this software and associated documentation files (the
00008  * "Software"), to deal in the Software without restriction, including
00009  * without limitation the rights to use, copy, modify, merge, publish,
00010  * distribute, sub license, and/or sell copies of the Software, and to
00011  * permit persons to whom the Software is furnished to do so, subject to
00012  * the following conditions:
00013  *
00014  * The above copyright notice and this permission notice (including the
00015  * next paragraph) shall be included in all copies or substantial portions
00016  * of the Software.
00017  *
00018  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00019  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
00021  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
00022  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00023  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00024  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00025  *
00026  **************************************************************************/
00027 
00028  /*
00029   * Authors:
00030   *   Zack Rusin zack@tungstengraphics.com
00031   */
00032 #ifdef MESA_LLVM
00033 
00034 #include "gallivm.h"
00035 #include "gallivm_p.h"
00036 
00037 #include "instructions.h"
00038 #include "loweringpass.h"
00039 #include "storage.h"
00040 #include "tgsitollvm.h"
00041 
00042 #include "pipe/p_context.h"
00043 #include "pipe/p_shader_tokens.h"
00044 
00045 #include "tgsi/tgsi_exec.h"
00046 #include "tgsi/tgsi_dump.h"
00047 
00048 #include <llvm/Module.h>
00049 #include <llvm/CallingConv.h>
00050 #include <llvm/Constants.h>
00051 #include <llvm/DerivedTypes.h>
00052 #include <llvm/Instructions.h>
00053 #include <llvm/ModuleProvider.h>
00054 #include <llvm/Pass.h>
00055 #include <llvm/PassManager.h>
00056 #include <llvm/ParameterAttributes.h>
00057 #include <llvm/Support/PatternMatch.h>
00058 #include <llvm/ExecutionEngine/JIT.h>
00059 #include <llvm/ExecutionEngine/Interpreter.h>
00060 #include <llvm/ExecutionEngine/GenericValue.h>
00061 #include <llvm/Support/MemoryBuffer.h>
00062 #include <llvm/LinkAllPasses.h>
00063 #include <llvm/Analysis/Verifier.h>
00064 #include <llvm/Analysis/LoopPass.h>
00065 #include <llvm/Target/TargetData.h>
00066 #include <llvm/Bitcode/ReaderWriter.h>
00067 #include <llvm/Transforms/Utils/Cloning.h>
00068 
00069 #include <sstream>
00070 #include <fstream>
00071 #include <iostream>
00072 
00073 static int GLOBAL_ID = 0;
00074 
00075 using namespace llvm;
00076 
00077 static inline
00078 void AddStandardCompilePasses(PassManager &PM)
00079 {
00080    PM.add(new LoweringPass());
00081    PM.add(createVerifierPass());                  // Verify that input is correct
00082 
00083    PM.add(createLowerSetJmpPass());          // Lower llvm.setjmp/.longjmp
00084 
00085    //PM.add(createStripSymbolsPass(true));
00086 
00087    PM.add(createRaiseAllocationsPass());     // call %malloc -> malloc inst
00088    PM.add(createCFGSimplificationPass());    // Clean up disgusting code
00089    PM.add(createPromoteMemoryToRegisterPass());// Kill useless allocas
00090    PM.add(createGlobalOptimizerPass());      // Optimize out global vars
00091    PM.add(createGlobalDCEPass());            // Remove unused fns and globs
00092    PM.add(createIPConstantPropagationPass());// IP Constant Propagation
00093    PM.add(createDeadArgEliminationPass());   // Dead argument elimination
00094    PM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE
00095    PM.add(createCFGSimplificationPass());    // Clean up after IPCP & DAE
00096 
00097    PM.add(createPruneEHPass());              // Remove dead EH info
00098 
00099    PM.add(createFunctionInliningPass());   // Inline small functions
00100    PM.add(createArgumentPromotionPass());    // Scalarize uninlined fn args
00101 
00102    PM.add(createTailDuplicationPass());      // Simplify cfg by copying code
00103    PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl.
00104    PM.add(createCFGSimplificationPass());    // Merge & remove BBs
00105    PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas
00106    PM.add(createInstructionCombiningPass()); // Combine silly seq's
00107    PM.add(createCondPropagationPass());      // Propagate conditionals
00108 
00109    PM.add(createTailCallEliminationPass());  // Eliminate tail calls
00110    PM.add(createCFGSimplificationPass());    // Merge & remove BBs
00111    PM.add(createReassociatePass());          // Reassociate expressions
00112    PM.add(createLoopRotatePass());
00113    PM.add(createLICMPass());                 // Hoist loop invariants
00114    PM.add(createLoopUnswitchPass());         // Unswitch loops.
00115    PM.add(createLoopIndexSplitPass());       // Index split loops.
00116    PM.add(createInstructionCombiningPass()); // Clean up after LICM/reassoc
00117    PM.add(createIndVarSimplifyPass());       // Canonicalize indvars
00118    PM.add(createLoopUnrollPass());           // Unroll small loops
00119    PM.add(createInstructionCombiningPass()); // Clean up after the unroller
00120    PM.add(createGVNPass());                  // Remove redundancies
00121    PM.add(createSCCPPass());                 // Constant prop with SCCP
00122 
00123    // Run instcombine after redundancy elimination to exploit opportunities
00124    // opened up by them.
00125    PM.add(createInstructionCombiningPass());
00126    PM.add(createCondPropagationPass());      // Propagate conditionals
00127 
00128    PM.add(createDeadStoreEliminationPass()); // Delete dead stores
00129    PM.add(createAggressiveDCEPass());        // SSA based 'Aggressive DCE'
00130    PM.add(createCFGSimplificationPass());    // Merge & remove BBs
00131    PM.add(createSimplifyLibCallsPass());     // Library Call Optimizations
00132    PM.add(createDeadTypeEliminationPass());  // Eliminate dead types
00133    PM.add(createConstantMergePass());        // Merge dup global constants
00134 }
00135 
00136 void gallivm_prog_delete(struct gallivm_prog *prog)
00137 {
00138    delete prog->module;
00139    prog->module = 0;
00140    prog->function = 0;
00141    free(prog);
00142 }
00143 
00144 static inline void
00145 constant_interpolation(float (*inputs)[16][4],
00146                        const struct tgsi_interp_coef *coefs,
00147                        unsigned attrib,
00148                        unsigned chan)
00149 {
00150    unsigned i;
00151 
00152    for (i = 0; i < QUAD_SIZE; ++i) {
00153       inputs[i][attrib][chan] = coefs[attrib].a0[chan];
00154    }
00155 }
00156 
00157 static inline void
00158 linear_interpolation(float (*inputs)[16][4],
00159                      const struct tgsi_interp_coef *coefs,
00160                      unsigned attrib,
00161                      unsigned chan)
00162 {
00163    unsigned i;
00164 
00165    for( i = 0; i < QUAD_SIZE; i++ ) {
00166       const float x = inputs[i][0][0];
00167       const float y = inputs[i][0][1];
00168 
00169       inputs[i][attrib][chan] =
00170          coefs[attrib].a0[chan] +
00171          coefs[attrib].dadx[chan] * x +
00172          coefs[attrib].dady[chan] * y;
00173    }
00174 }
00175 
00176 static inline void
00177 perspective_interpolation(float (*inputs)[16][4],
00178                           const struct tgsi_interp_coef *coefs,
00179                           unsigned attrib,
00180                           unsigned chan )
00181 {
00182    unsigned i;
00183 
00184    for( i = 0; i < QUAD_SIZE; i++ ) {
00185       const float x = inputs[i][0][0];
00186       const float y = inputs[i][0][1];
00187       /* WPOS.w here is really 1/w */
00188       const float w = 1.0f / inputs[i][0][3];
00189       assert(inputs[i][0][3] != 0.0);
00190 
00191       inputs[i][attrib][chan] =
00192          (coefs[attrib].a0[chan] +
00193           coefs[attrib].dadx[chan] * x +
00194           coefs[attrib].dady[chan] * y) * w;
00195    }
00196 }
00197 
00198 void gallivm_ir_dump(struct gallivm_ir *ir, const char *file_prefix)
00199 {
00200    if (!ir || !ir->module)
00201       return;
00202 
00203    if (file_prefix) {
00204       std::ostringstream stream;
00205       stream << file_prefix;
00206       stream << ir->id;
00207       stream << ".ll";
00208       std::string name = stream.str();
00209       std::ofstream out(name.c_str());
00210       if (!out) {
00211          std::cerr<<"Can't open file : "<<stream.str()<<std::endl;;
00212          return;
00213       }
00214       out << (*ir->module);
00215       out.close();
00216    } else {
00217       const llvm::Module::FunctionListType &funcs = ir->module->getFunctionList();
00218       llvm::Module::FunctionListType::const_iterator itr;
00219       std::cout<<"; ---------- Start shader "<<ir->id<<std::endl;
00220       for (itr = funcs.begin(); itr != funcs.end(); ++itr) {
00221          const llvm::Function &func = (*itr);
00222          std::string name = func.getName();
00223          const llvm::Function *found = 0;
00224          if (name.find("vs_shader") != std::string::npos ||
00225              name.find("fs_shader") != std::string::npos ||
00226              name.find("function") != std::string::npos)
00227             found = &func;
00228          if (found) {
00229             std::cout<<*found<<std::endl;
00230          }
00231       }
00232       std::cout<<"; ---------- End shader "<<ir->id<<std::endl;
00233    }
00234 }
00235 
00236 
00237 void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog,
00238                                      float (*inputs)[16][4],
00239                                      const struct tgsi_interp_coef *coef)
00240 {
00241    for (int i = 0; i < prog->num_interp; ++i) {
00242       const gallivm_interpolate &interp = prog->interpolators[i];
00243       switch (interp.type) {
00244       case TGSI_INTERPOLATE_CONSTANT:
00245          constant_interpolation(inputs, coef, interp.attrib, interp.chan);
00246          break;
00247 
00248       case TGSI_INTERPOLATE_LINEAR:
00249          linear_interpolation(inputs, coef, interp.attrib, interp.chan);
00250          break;
00251 
00252       case TGSI_INTERPOLATE_PERSPECTIVE:
00253          perspective_interpolation(inputs, coef, interp.attrib, interp.chan);
00254          break;
00255 
00256       default:
00257          assert( 0 );
00258       }
00259    }
00260 }
00261 
00262 
00263 struct gallivm_ir * gallivm_ir_new(enum gallivm_shader_type type)
00264 {
00265    struct gallivm_ir *ir =
00266       (struct gallivm_ir *)calloc(1, sizeof(struct gallivm_ir));
00267    ++GLOBAL_ID;
00268    ir->id   = GLOBAL_ID;
00269    ir->type = type;
00270 
00271    return ir;
00272 }
00273 
00274 void gallivm_ir_set_layout(struct gallivm_ir *ir,
00275                            enum gallivm_vector_layout layout)
00276 {
00277    ir->layout = layout;
00278 }
00279 
00280 void gallivm_ir_set_components(struct gallivm_ir *ir, int num)
00281 {
00282    ir->num_components = num;
00283 }
00284 
00285 void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir,
00286                                const struct tgsi_token *tokens)
00287 {
00288    std::cout << "Creating llvm from: " <<std::endl;
00289    tgsi_dump(tokens, 0);
00290 
00291    llvm::Module *mod = tgsi_to_llvmir(ir, tokens);
00292    ir->module = mod;
00293    gallivm_ir_dump(ir, 0);
00294 }
00295 
00296 void gallivm_ir_delete(struct gallivm_ir *ir)
00297 {
00298    delete ir->module;
00299    free(ir);
00300 }
00301 
00302 struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir)
00303 {
00304    struct gallivm_prog *prog =
00305       (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog));
00306 
00307    std::cout << "Before optimizations:"<<std::endl;
00308    ir->module->dump();
00309    std::cout<<"-------------------------------"<<std::endl;
00310 
00311    PassManager veri;
00312    veri.add(createVerifierPass());
00313    veri.run(*ir->module);
00314    llvm::Module *mod = llvm::CloneModule(ir->module);
00315    prog->num_consts = ir->num_consts;
00316    memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators));
00317    prog->num_interp = ir->num_interp;
00318 
00319    /* Run optimization passes over it */
00320    PassManager passes;
00321    passes.add(new TargetData(mod));
00322    AddStandardCompilePasses(passes);
00323    passes.run(*mod);
00324    prog->module = mod;
00325 
00326    std::cout << "After optimizations:"<<std::endl;
00327    mod->dump();
00328 
00329    return prog;
00330 }
00331 
00332 #endif /* MESA_LLVM */