00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 typedef __attribute__(( ext_vector_type(4) )) float float4;
00035
00036
00037 extern float fabsf(float val);
00038
00039
00040
00041 float4 absvec(float4 vec)
00042 {
00043 float4 res;
00044 res.x = fabsf(vec.x);
00045 res.y = fabsf(vec.y);
00046 res.z = fabsf(vec.z);
00047 res.w = fabsf(vec.w);
00048
00049 return res;
00050 }
00051
00052 float4 maxvec(float4 a, float4 b)
00053 {
00054 return (float4){(a.x > b.x) ? a.x : b.x,
00055 (a.y > b.y) ? a.y : b.y,
00056 (a.z > b.z) ? a.z : b.z,
00057 (a.w > b.w) ? a.w : b.w};
00058 }
00059
00060 float4 minvec(float4 a, float4 b)
00061 {
00062 return (float4){(a.x < b.x) ? a.x : b.x,
00063 (a.y < b.y) ? a.y : b.y,
00064 (a.z < b.z) ? a.z : b.z,
00065 (a.w < b.w) ? a.w : b.w};
00066 }
00067
00068 extern float powf(float num, float p);
00069 extern float sqrtf(float x);
00070
00071 float4 powvec(float4 vec, float4 q)
00072 {
00073 float4 p;
00074 p.x = powf(vec.x, q.x);
00075 p.y = powf(vec.y, q.y);
00076 p.z = powf(vec.z, q.z);
00077 p.w = powf(vec.w, q.w);
00078 return p;
00079 }
00080
00081 float4 sqrtvec(float4 vec)
00082 {
00083 float4 p;
00084 p.x = sqrtf(vec.x);
00085 p.y = sqrtf(vec.y);
00086 p.z = sqrtf(vec.z);
00087 p.w = sqrtf(vec.w);
00088 return p;
00089 }
00090
00091 float4 sltvec(float4 v1, float4 v2)
00092 {
00093 float4 p;
00094 p.x = (v1.x < v2.x) ? 1.0 : 0.0;
00095 p.y = (v1.y < v2.y) ? 1.0 : 0.0;
00096 p.z = (v1.z < v2.z) ? 1.0 : 0.0;
00097 p.w = (v1.w < v2.w) ? 1.0 : 0.0;
00098 return p;
00099 }
00100
00101
00102
00103
00104 void abs(float4 *res,
00105 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
00106 {
00107 res[0] = absvec(tmp0x);
00108 res[1] = absvec(tmp0y);
00109 res[2] = absvec(tmp0z);
00110 res[3] = absvec(tmp0w);
00111 }
00112
00113 void dp3(float4 *res,
00114 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
00115 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
00116 {
00117 float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) +
00118 (tmp0z * tmp1z);
00119
00120 res[0] = dot;
00121 res[1] = dot;
00122 res[2] = dot;
00123 res[3] = dot;
00124 }
00125
00126 void dp4(float4 *res,
00127 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
00128 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
00129 {
00130 float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) +
00131 (tmp0z * tmp1z) + (tmp0w * tmp1w);
00132
00133 res[0] = dot;
00134 res[1] = dot;
00135 res[2] = dot;
00136 res[3] = dot;
00137 }
00138
00139 void lit(float4 *res,
00140 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
00141 {
00142 const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0};
00143 const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f};
00144 const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f};
00145
00146 res[0] = (float4){1.0, 1.0, 1.0, 1.0};
00147 if (tmp0x.x > 0) {
00148 float4 tmpy = maxvec(tmp0y, zerovec);
00149 float4 tmpw = minvec(tmp0w, plus128);
00150 tmpw = maxvec(tmpw, min128);
00151 res[1] = tmp0x;
00152 res[2] = powvec(tmpy, tmpw);
00153 } else {
00154 res[1] = zerovec;
00155 res[2] = zerovec;
00156 }
00157 res[3] = (float4){1.0, 1.0, 1.0, 1.0};
00158 }
00159
00160 void min(float4 *res,
00161 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
00162 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
00163 {
00164 res[0] = minvec(tmp0x, tmp1x);
00165 res[1] = minvec(tmp0y, tmp1y);
00166 res[2] = minvec(tmp0z, tmp1z);
00167 res[3] = minvec(tmp0w, tmp1w);
00168 }
00169
00170
00171 void max(float4 *res,
00172 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
00173 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
00174 {
00175 res[0] = maxvec(tmp0x, tmp1x);
00176 res[1] = maxvec(tmp0y, tmp1y);
00177 res[2] = maxvec(tmp0z, tmp1z);
00178 res[3] = maxvec(tmp0w, tmp1w);
00179 }
00180
00181 void pow(float4 *res,
00182 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
00183 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
00184 {
00185 res[0] = powvec(tmp0x, tmp1x);
00186 res[1] = res[0];
00187 res[2] = res[0];
00188 res[3] = res[0];
00189 }
00190
00191 void rsq(float4 *res,
00192 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w)
00193 {
00194 const float4 onevec = (float4) {1., 1., 1., 1.};
00195 res[0] = onevec/sqrtvec(absvec(tmp0x));
00196 res[1] = onevec/sqrtvec(absvec(tmp0y));
00197 res[2] = onevec/sqrtvec(absvec(tmp0z));
00198 res[3] = onevec/sqrtvec(absvec(tmp0w));
00199 }
00200
00201 void slt(float4 *res,
00202 float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w,
00203 float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w)
00204 {
00205 res[0] = sltvec(tmp0x, tmp1x);
00206 res[1] = sltvec(tmp0y, tmp1y);
00207 res[2] = sltvec(tmp0z, tmp1z);
00208 res[3] = sltvec(tmp0w, tmp1w);
00209 }
00210