90 #ifndef PCL_GPU_FEATURES_EIGEN_HPP_ 91 #define PCL_GPU_FEATURES_EIGEN_HPP_ 93 #include <pcl/gpu/utils/device/limits.hpp> 94 #include <pcl/gpu/utils/device/algorithm.hpp> 95 #include <pcl/gpu/utils/device/vector_math.hpp> 101 __device__ __forceinline__
void computeRoots2(
const float& b,
const float& c, float3& roots)
104 float d = b * b - 4.f * c;
110 roots.z = 0.5f * (b + sd);
111 roots.y = 0.5f * (b - sd);
114 __device__ __forceinline__
void computeRoots3(
float c0,
float c1,
float c2, float3& roots)
122 const float s_inv3 = 1.f/3.f;
123 const float s_sqrt3 = sqrtf(3.f);
126 float c2_over_3 = c2 * s_inv3;
127 float a_over_3 = (c1 - c2*c2_over_3)*s_inv3;
131 float half_b = 0.5f * (c0 + c2_over_3 * (2.f * c2_over_3 * c2_over_3 - c1));
133 float q = half_b * half_b + a_over_3 * a_over_3 * a_over_3;
138 float rho = sqrtf(-a_over_3);
139 float theta = atan2f (sqrtf (-q), half_b)*s_inv3;
140 float cos_theta = __cosf (theta);
141 float sin_theta = __sinf (theta);
142 roots.x = c2_over_3 + 2.f * rho * cos_theta;
143 roots.y = c2_over_3 - rho * (cos_theta + s_sqrt3 * sin_theta);
144 roots.z = c2_over_3 - rho * (cos_theta - s_sqrt3 * sin_theta);
147 if (roots.x >= roots.y)
148 swap(roots.x, roots.y);
150 if (roots.y >= roots.z)
152 swap(roots.y, roots.z);
154 if (roots.x >= roots.y)
155 swap (roots.x, roots.y);
169 __device__ __host__ __forceinline__ float3&
operator[](
int i) {
return data[i]; }
170 __device__ __host__ __forceinline__
const float3&
operator[](
int i)
const {
return data[i]; }
186 if(!isMuchSmallerThan(src.x, src.z) || !isMuchSmallerThan(src.y, src.z))
188 float invnm = rsqrtf(src.x*src.x + src.y*src.y);
189 perp.x = -src.y * invnm;
190 perp.y = src.x * invnm;
199 float invnm = rsqrtf(src.z * src.z + src.y * src.y);
201 perp.y = -src.z * invnm;
202 perp.z = src.y * invnm;
208 __device__ __forceinline__
Eigen33(
volatile float* mat_pkg_arg) : mat_pkg(mat_pkg_arg) {}
214 float max01 = fmaxf( fabsf(mat_pkg[0]), fabsf(mat_pkg[1]) );
215 float max23 = fmaxf( fabsf(mat_pkg[2]), fabsf(mat_pkg[3]) );
216 float max45 = fmaxf( fabsf(mat_pkg[4]), fabsf(mat_pkg[5]) );
217 float m0123 = fmaxf( max01, max23);
218 float scale = fmaxf( max45, m0123);
233 float c0 = m00() * m11() * m22()
234 + 2.f * m01() * m02() * m12()
235 - m00() * m12() * m12()
236 - m11() * m02() * m02()
237 - m22() * m01() * m01();
238 float c1 = m00() * m11() -
244 float c2 = m00() + m11() + m22();
250 evecs[0] = make_float3(1.f, 0.f, 0.f);
251 evecs[1] = make_float3(0.f, 1.f, 0.f);
252 evecs[2] = make_float3(0.f, 0.f, 1.f);
257 tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2();
258 tmp[0].x -= evals.z; tmp[1].y -= evals.z; tmp[2].z -= evals.z;
260 vec_tmp[0] =
cross(tmp[0], tmp[1]);
261 vec_tmp[1] =
cross(tmp[0], tmp[2]);
262 vec_tmp[2] =
cross(tmp[1], tmp[2]);
264 float len1 =
dot (vec_tmp[0], vec_tmp[0]);
265 float len2 =
dot (vec_tmp[1], vec_tmp[1]);
266 float len3 =
dot (vec_tmp[2], vec_tmp[2]);
268 if (len1 >= len2 && len1 >= len3)
270 evecs[2] = vec_tmp[0] * rsqrtf (len1);
272 else if (len2 >= len1 && len2 >= len3)
274 evecs[2] = vec_tmp[1] * rsqrtf (len2);
278 evecs[2] = vec_tmp[2] * rsqrtf (len3);
282 evecs[0] =
cross(evecs[1], evecs[2]);
287 tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2();
288 tmp[0].x -= evals.x; tmp[1].y -= evals.x; tmp[2].z -= evals.x;
290 vec_tmp[0] =
cross(tmp[0], tmp[1]);
291 vec_tmp[1] =
cross(tmp[0], tmp[2]);
292 vec_tmp[2] =
cross(tmp[1], tmp[2]);
294 float len1 =
dot(vec_tmp[0], vec_tmp[0]);
295 float len2 =
dot(vec_tmp[1], vec_tmp[1]);
296 float len3 =
dot(vec_tmp[2], vec_tmp[2]);
298 if (len1 >= len2 && len1 >= len3)
300 evecs[0] = vec_tmp[0] * rsqrtf(len1);
302 else if (len2 >= len1 && len2 >= len3)
304 evecs[0] = vec_tmp[1] * rsqrtf(len2);
308 evecs[0] = vec_tmp[2] * rsqrtf(len3);
312 evecs[2] =
cross(evecs[0], evecs[1]);
317 tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2();
318 tmp[0].x -= evals.z; tmp[1].y -= evals.z; tmp[2].z -= evals.z;
320 vec_tmp[0] =
cross(tmp[0], tmp[1]);
321 vec_tmp[1] =
cross(tmp[0], tmp[2]);
322 vec_tmp[2] =
cross(tmp[1], tmp[2]);
324 float len1 =
dot(vec_tmp[0], vec_tmp[0]);
325 float len2 =
dot(vec_tmp[1], vec_tmp[1]);
326 float len3 =
dot(vec_tmp[2], vec_tmp[2]);
330 unsigned int min_el = 2;
331 unsigned int max_el = 2;
332 if (len1 >= len2 && len1 >= len3)
335 evecs[2] = vec_tmp[0] * rsqrtf (len1);
337 else if (len2 >= len1 && len2 >= len3)
340 evecs[2] = vec_tmp[1] * rsqrtf (len2);
345 evecs[2] = vec_tmp[2] * rsqrtf (len3);
348 tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2();
349 tmp[0].x -= evals.y; tmp[1].y -= evals.y; tmp[2].z -= evals.y;
351 vec_tmp[0] =
cross(tmp[0], tmp[1]);
352 vec_tmp[1] =
cross(tmp[0], tmp[2]);
353 vec_tmp[2] =
cross(tmp[1], tmp[2]);
355 len1 =
dot(vec_tmp[0], vec_tmp[0]);
356 len2 =
dot(vec_tmp[1], vec_tmp[1]);
357 len3 =
dot(vec_tmp[2], vec_tmp[2]);
359 if (len1 >= len2 && len1 >= len3)
362 evecs[1] = vec_tmp[0] * rsqrtf (len1);
363 min_el = len1 <= mmax[min_el] ? 1 : min_el;
364 max_el = len1 > mmax[max_el] ? 1 : max_el;
366 else if (len2 >= len1 && len2 >= len3)
369 evecs[1] = vec_tmp[1] * rsqrtf (len2);
370 min_el = len2 <= mmax[min_el] ? 1 : min_el;
371 max_el = len2 > mmax[max_el] ? 1 : max_el;
376 evecs[1] = vec_tmp[2] * rsqrtf (len3);
377 min_el = len3 <= mmax[min_el] ? 1 : min_el;
378 max_el = len3 > mmax[max_el] ? 1 : max_el;
381 tmp[0] = row0(); tmp[1] = row1(); tmp[2] = row2();
382 tmp[0].x -= evals.x; tmp[1].y -= evals.x; tmp[2].z -= evals.x;
384 vec_tmp[0] =
cross(tmp[0], tmp[1]);
385 vec_tmp[1] =
cross(tmp[0], tmp[2]);
386 vec_tmp[2] =
cross(tmp[1], tmp[2]);
388 len1 =
dot (vec_tmp[0], vec_tmp[0]);
389 len2 =
dot (vec_tmp[1], vec_tmp[1]);
390 len3 =
dot (vec_tmp[2], vec_tmp[2]);
393 if (len1 >= len2 && len1 >= len3)
396 evecs[0] = vec_tmp[0] * rsqrtf (len1);
397 min_el = len3 <= mmax[min_el] ? 0 : min_el;
398 max_el = len3 > mmax[max_el] ? 0 : max_el;
400 else if (len2 >= len1 && len2 >= len3)
403 evecs[0] = vec_tmp[1] * rsqrtf (len2);
404 min_el = len3 <= mmax[min_el] ? 0 : min_el;
405 max_el = len3 > mmax[max_el] ? 0 : max_el;
410 evecs[0] = vec_tmp[2] * rsqrtf (len3);
411 min_el = len3 <= mmax[min_el] ? 0 : min_el;
412 max_el = len3 > mmax[max_el] ? 0 : max_el;
415 unsigned mid_el = 3 - min_el - max_el;
416 evecs[min_el] =
normalized(
cross( evecs[(min_el+1) % 3], evecs[(min_el+2) % 3] ) );
417 evecs[mid_el] =
normalized(
cross( evecs[(mid_el+1) % 3], evecs[(mid_el+2) % 3] ) );
423 volatile float* mat_pkg;
425 __device__ __forceinline__
float m00()
const {
return mat_pkg[0]; }
426 __device__ __forceinline__
float m01()
const {
return mat_pkg[1]; }
427 __device__ __forceinline__
float m02()
const {
return mat_pkg[2]; }
428 __device__ __forceinline__
float m10()
const {
return mat_pkg[1]; }
429 __device__ __forceinline__
float m11()
const {
return mat_pkg[3]; }
430 __device__ __forceinline__
float m12()
const {
return mat_pkg[4]; }
431 __device__ __forceinline__
float m20()
const {
return mat_pkg[2]; }
432 __device__ __forceinline__
float m21()
const {
return mat_pkg[4]; }
433 __device__ __forceinline__
float m22()
const {
return mat_pkg[5]; }
435 __device__ __forceinline__ float3 row0()
const {
return make_float3( m00(), m01(), m02() ); }
436 __device__ __forceinline__ float3 row1()
const {
return make_float3( m10(), m11(), m12() ); }
437 __device__ __forceinline__ float3 row2()
const {
return make_float3( m20(), m21(), m22() ); }
439 __device__ __forceinline__
static bool isMuchSmallerThan (
float x,
float y)
443 return x * x <= prec_sqr * y * y;
__device__ __host__ __forceinline__ void swap(T &a, T &b)
__device__ __forceinline__ float3 normalized(const float3 &v)
__device__ __host__ __forceinline__ const float3 & operator[](int i) const
__device__ __host__ __forceinline__ float3 cross(const float3 &v1, const float3 &v2)
__device__ __forceinline__ void compute(Mat33 &tmp, Mat33 &vec_tmp, Mat33 &evecs, float3 &evals)
__device__ __forceinline__ float dot(const float3 &v1, const float3 &v2)
__device__ static __forceinline__ type epsilon()
static __forceinline__ __device__ float3 unitOrthogonal(const float3 &src)
__device__ __forceinline__ void computeRoots2(const float &b, const float &c, float3 &roots)
__device__ __forceinline__ Eigen33(volatile float *mat_pkg_arg)
__device__ __host__ __forceinline__ float3 & operator[](int i)
__device__ __forceinline__ void computeRoots3(float c0, float c1, float c2, float3 &roots)