A standalone, templated implementation of SVD is available in the PVFMM library (See file: include/mat_utils.txx).
The library is open source and is hosted on GitHub.
It is also available for download from the University of Texas website:
http://padas.ices.utexas.edu/software/
It is Algorithm1 in: http://www.cs.utexas.edu/users/inderjit/public_papers/HLA_SVD.pdf
(Computation of the Singular Value Decomposition, Alan Kaylor Cline, Inderjit S. Dhillon)
I implemented it for computing SVD in quadruple precision. My implementation is not very efficient since I only need it for offline pre-computation.
The function svd uses the interface for dgesvd in LAPACK, with JOBU='S' and JOBVT='S',
with the exception that the singular values are not sorted.
This code is available free without warranty of any kind.
#include
#include
#include
#include
#include
#include
#define U(i,j) U_[(i)*dim[0]+(j)]
#define S(i,j) S_[(i)*dim[1]+(j)]
#define V(i,j) V_[(i)*dim[1]+(j)]
template
void GivensL(T* S_, const size_t dim[2], size_t m, T a, T b){
T r=sqrt(a*a+b*b);
T c=a/r;
T s=-b/r;
#pragma omp parallel for
for(size_t i=0;i
void GivensR(T* S_, const size_t dim[2], size_t m, T a, T b){
T r=sqrt(a*a+b*b);
T c=a/r;
T s=-b/r;
#pragma omp parallel for
for(size_t i=0;i
void SVD(const size_t dim[2], T* U_, T* S_, T* V_, T eps=-1){
assert(dim[0]>=dim[1]);
{ // Bi-diagonalization
size_t n=std::min(dim[0],dim[1]);
std::vector house_vec(std::max(dim[0],dim[1]));
for(size_t i=0;i0) x_inv_norm=1/sqrt(x_inv_norm);
T alpha=sqrt(1+x1*x_inv_norm);
T beta=x_inv_norm/alpha;
house_vec[i]=-alpha;
for(size_t j=i+1;j=n-1) continue;
{
T x1=S(i,i+1);
if(x1<0) x1=-x1;
T x_inv_norm=0;
for(size_t j=i+1;j0) x_inv_norm=1/sqrt(x_inv_norm);
T alpha=sqrt(1+x1*x_inv_norm);
T beta=x_inv_norm/alpha;
house_vec[i+1]=-alpha;
for(size_t j=i+2;j1.0) eps*=0.5;
eps*=64.0;
}
while(k0S(i,i)?S_max:S(i,i));
while(k0eps*S_max) n++;
T alpha=0;
T beta=0;
{ // Compute mu
T C[2][2];
C[0][0]=S(n-2,n-2)*S(n-2,n-2);
if(n-k0>2) C[0][0]+=S(n-3,n-2)*S(n-3,n-2);
C[0][1]=S(n-2,n-2)*S(n-2,n-1);
C[1][0]=S(n-2,n-2)*S(n-2,n-1);
C[1][1]=S(n-1,n-1)*S(n-1,n-1)+S(n-2,n-1)*S(n-2,n-1);
T b=-(C[0][0]+C[1][1])/2;
T c= C[0][0]*C[1][1] - C[0][1]*C[1][0];
T d=0;
if(b*b-c>0) d=sqrt(b*b-c);
else{
T b=(C[0][0]-C[1][1])/2;
T c=-C[0][1]*C[1][0];
if(b*b-c>0) d=sqrt(b*b-c);
}
T lambda1=-b+d;
T lambda2=-b-d;
T d1=lambda1-C[1][1]; d1=(d1<0?-d1:d1);
T d2=lambda2-C[1][1]; d2=(d2<0?-d2:d2);
T mu=(d1i1 || i0+1i1 || i0+1
inline void svd(char *JOBU, char *JOBVT, int *M, int *N, T *A, int *LDA,
T *S, T *U, int *LDU, T *VT, int *LDVT, T *WORK, int *LWORK,
int *INFO){
assert(*JOBU=='S');
assert(*JOBVT=='S');
const size_t dim[2]={std::max(*N,*M), std::min(*N,*M)};
T* U_=new T[dim[0]*dim[0]]; memset(U_, 0, dim[0]*dim[0]*sizeof(T));
T* V_=new T[dim[1]*dim[1]]; memset(V_, 0, dim[1]*dim[1]*sizeof(T));
T* S_=new T[dim[0]*dim[1]];
const size_t lda=*LDA;
const size_t ldu=*LDU;
const size_t ldv=*LDVT;
if(dim[1]==*M){
for(size_t i=0;i(dim, U_, S_, V_, (T)-1);
for(size_t i=0;in?m:n);
int wssize1 = 5*(mwssize1?wssize:wssize1);
Real_t* wsbuf = new Real_t[wssize];
svd(&JOBU, &JOBVT, &m, &n, &M[0], &m, &tS[0], &tU[0], &m, &tVT[0], &k, wsbuf, &wssize, &INFO);
delete[] wsbuf;
}
{ // Check Error
Real_t max_err=0;
for(size_t i0=0;i0