Gallium3D: gallium/auxiliary/gallivm/soabuiltins.c Source File

00001 /**************************************************************************
00002  *
00003  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
00004  * All Rights Reserved.
00005  *
00006  * Permission is hereby granted, free of charge, to any person obtaining a
00007  * copy of this software and associated documentation files (the
00008  * "Software"), to deal in the Software without restriction, including
00009  * without limitation the rights to use, copy, modify, merge, publish,
00010  * distribute, sub license, and/or sell copies of the Software, and to
00011  * permit persons to whom the Software is furnished to do so, subject to
00012  * the following conditions:
00013  *
00014  * The above copyright notice and this permission notice (including the
00015  * next paragraph) shall be included in all copies or substantial portions
00016  * of the Software.
00017  *
00018  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00019  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00020  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
00021  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
00022  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
00023  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
00024  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
00025  *
00026  **************************************************************************/
00027 
00028  /*
00029   * This file is compiled with clang into the LLVM bitcode
00030   *
00031   * Authors:
00032   *   Zack Rusin zack@tungstengraphics.com
00033   */
00034 typedef __attribute__(( ext_vector_type(4) )) float float4;
00035 
00036 
00037 extern float fabsf(float val);
00038 
00039 /* helpers */
00040 
00041 float4 absvec(float4 vec)
00042 {
00043    float4 res;
00044    res.x = fabsf(vec.x);
00045    res.y = fabsf(vec.y);
00046    res.z = fabsf(vec.z);
00047    res.w = fabsf(vec.w);
00048 
00049    return res;
00050 }
00051 
00052 float4 maxvec(float4 a, float4 b)
00053 {
00054    return (float4){(a.x > b.x) ? a.x : b.x,
00055          (a.y > b.y) ? a.y : b.y,
00056          (a.z > b.z) ? a.z : b.z,
00057          (a.w > b.w) ? a.w : b.w};
00058 }
00059 
00060 float4 minvec(float4 a, float4 b)
00061 {
00062    return (float4){(a.x < b.x) ? a.x : b.x,
00063          (a.y < b.y) ? a.y : b.y,
00064          (a.z < b.z) ? a.z : b.z,
00065          (a.w < b.w) ? a.w : b.w};
00066 }
00067 
00068 extern float powf(float num, float p);
00069 extern float sqrtf(float x);
00070 
00071 float4 powvec(float4 vec, float4 q)
00072 {
00073    float4 p;
00074    p.x = powf(vec.x, q.x);
00075    p.y = powf(vec.y, q.y);
00076    p.z = powf(vec.z, q.z);
00077    p.w = powf(vec.w, q.w);
00078    return p;
00079 }
00080 
00081 float4 sqrtvec(float4 vec)
00082 {
00083    float4 p;
00084    p.x = sqrtf(vec.x);
00085    p.y = sqrtf(vec.y);
00086    p.z = sqrtf(vec.z);
00087    p.w = sqrtf(vec.w);
00088    return p;
00089 }
00090 
00091 float4 sltvec(float4 v1, float4 v2)
00092 {
00093    float4 p;
00094    p.x = (v1.x < v2.x) ? 1.0 : 0.0;
00095    p.y = (v1.y < v2.y) ? 1.0 : 0.0;
00096    p.z = (v1.z < v2.z) ? 1.0 : 0.0;
00097    p.w = (v1.w < v2.w) ? 1.0 : 0.0;
00098    return p;
00099 }
00100 
00101 
00102 /* instructions */
00103 
00104 void abs(float4 *res,
00105          float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
00106 {
00107    res[0] = absvec(tmp0x);
00108    res[1] = absvec(tmp0y);
00109    res[2] = absvec(tmp0z);
00110    res[3] = absvec(tmp0w);
00111 }
00112 
00113 void dp3(float4 *res,
00114          float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
00115          float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
00116 {
00117    float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) +
00118                 (tmp0z * tmp1z);
00119 
00120    res[0] = dot;
00121    res[1] = dot;
00122    res[2] = dot;
00123    res[3] = dot;
00124 }
00125 
00126 void dp4(float4 *res,
00127          float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
00128          float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
00129 {
00130    float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) +
00131                 (tmp0z * tmp1z) + (tmp0w * tmp1w);
00132 
00133    res[0] = dot;
00134    res[1] = dot;
00135    res[2] = dot;
00136    res[3] = dot;
00137 }
00138 
00139 void lit(float4 *res,
00140          float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
00141 {
00142    const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0};
00143    const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f};
00144    const float4 plus128 = (float4) {128.f,  128.f,  128.f,  128.f};
00145 
00146    res[0] = (float4){1.0, 1.0, 1.0, 1.0};
00147    if (tmp0x.x > 0) {
00148       float4 tmpy = maxvec(tmp0y, zerovec);
00149       float4 tmpw = minvec(tmp0w, plus128);
00150       tmpw = maxvec(tmpw, min128);
00151       res[1] = tmp0x;
00152       res[2] = powvec(tmpy, tmpw);
00153    } else {
00154       res[1] = zerovec;
00155       res[2] = zerovec;
00156    }
00157    res[3] = (float4){1.0, 1.0, 1.0, 1.0};
00158 }
00159 
00160 void min(float4 *res,
00161          float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
00162          float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
00163 {
00164    res[0] = minvec(tmp0x, tmp1x);
00165    res[1] = minvec(tmp0y, tmp1y);
00166    res[2] = minvec(tmp0z, tmp1z);
00167    res[3] = minvec(tmp0w, tmp1w);
00168 }
00169 
00170 
00171 void max(float4 *res,
00172          float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
00173          float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
00174 {
00175    res[0] = maxvec(tmp0x, tmp1x);
00176    res[1] = maxvec(tmp0y, tmp1y);
00177    res[2] = maxvec(tmp0z, tmp1z);
00178    res[3] = maxvec(tmp0w, tmp1w);
00179 }
00180 
00181 void pow(float4 *res,
00182          float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
00183          float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
00184 {
00185    res[0] = powvec(tmp0x, tmp1x);
00186    res[1] = res[0];
00187    res[2] = res[0];
00188    res[3] = res[0];
00189 }
00190 
00191 void rsq(float4 *res,
00192          float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
00193 {
00194    const float4 onevec = (float4) {1., 1., 1., 1.};
00195    res[0] = onevec/sqrtvec(absvec(tmp0x));
00196    res[1] = onevec/sqrtvec(absvec(tmp0y));
00197    res[2] = onevec/sqrtvec(absvec(tmp0z));
00198    res[3] = onevec/sqrtvec(absvec(tmp0w));
00199 }
00200 
00201 void slt(float4 *res,
00202          float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
00203          float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
00204 {
00205    res[0] = sltvec(tmp0x, tmp1x);
00206    res[1] = sltvec(tmp0y, tmp1y);
00207    res[2] = sltvec(tmp0z, tmp1z);
00208    res[3] = sltvec(tmp0w, tmp1w);
00209 }
00210