@@ -88,7 +91,7 @@ extern int GESDD(char *jobz, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, L
...
@@ -88,7 +91,7 @@ extern int GESDD(char *jobz, int *m, int *n, LM_REAL *a, int *lda, LM_REAL *s, L
*
*
* A is mxm, b is mx1
* A is mxm, b is mx1
*
*
* The function returns 0 in case of error, 1 if successfull
* The function returns 0 in case of error, 1 if successful
*
*
* This function is often called repetitively to solve problems of identical
* This function is often called repetitively to solve problems of identical
* dimensions. To avoid repetitive malloc's and free's, allocated memory is
* dimensions. To avoid repetitive malloc's and free's, allocated memory is
...
@@ -100,6 +103,8 @@ int AX_EQ_B_QR(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
...
@@ -100,6 +103,8 @@ int AX_EQ_B_QR(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m)
__STATIC__LM_REAL*buf=NULL;
__STATIC__LM_REAL*buf=NULL;
__STATIC__intbuf_sz=0;
__STATIC__intbuf_sz=0;
staticintnb=0;/* no __STATIC__ decl. here! */
LM_REAL*a,*qtb,*tau,*r,*work;
LM_REAL*a,*qtb,*tau,*r,*work;
inta_sz,qtb_sz,tau_sz,r_sz,tot_sz;
inta_sz,qtb_sz,tau_sz,r_sz,tot_sz;
registerinti,j;
registerinti,j;
...
@@ -123,7 +128,15 @@ register LM_REAL sum;
...
@@ -123,7 +128,15 @@ register LM_REAL sum;
qtb_sz=m;
qtb_sz=m;
tau_sz=m;
tau_sz=m;
r_sz=m*m;/* only the upper triangular part really needed */
r_sz=m*m;/* only the upper triangular part really needed */
worksz=3*m;/* this is probably too much */
if(!nb){
LM_REALtmp;
worksz=-1;// workspace query; optimal size is returned
GEQRF((int*)&m,(int*)&m,NULL,(int*)&m,NULL,(LM_
nb=((int)tmp)/m;// optimal worksize is m*nb
}
worksz=nb*m;
tot_sz=a_sz+qtb_sz+tau_sz+r_sz+worksz;
tot_sz=a_sz+qtb_sz+tau_sz+r_sz+worksz;
#ifdef LINSOLVERS_RETAIN_MEMORY
#ifdef LINSOLVERS_RETAIN_MEMORY
...
@@ -244,7 +257,7 @@ register LM_REAL sum;
...
@@ -244,7 +257,7 @@ register LM_REAL sum;
*
*
* A is mxn, b is mx1
* A is mxn, b is mx1
*
*
* The function returns 0 in case of error, 1 if successfull
* The function returns 0 in case of error, 1 if successful
*
*
* This function is often called repetitively to solve problems of identical
* This function is often called repetitively to solve problems of identical
* dimensions. To avoid repetitive malloc's and free's, allocated memory is
* dimensions. To avoid repetitive malloc's and free's, allocated memory is
...
@@ -256,6 +269,8 @@ int AX_EQ_B_QRLS(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m, int n)
...
@@ -256,6 +269,8 @@ int AX_EQ_B_QRLS(LM_REAL *A, LM_REAL *B, LM_REAL *x, int m, int n)
__STATIC__LM_REAL*buf=NULL;
__STATIC__LM_REAL*buf=NULL;
__STATIC__intbuf_sz=0;
__STATIC__intbuf_sz=0;
staticintnb=0;/* no __STATIC__ decl. here! */
LM_REAL*a,*atb,*tau,*r,*work;
LM_REAL*a,*atb,*tau,*r,*work;
inta_sz,atb_sz,tau_sz,r_sz,tot_sz;
inta_sz,atb_sz,tau_sz,r_sz,tot_sz;
registerinti,j;
registerinti,j;
...
@@ -284,7 +299,14 @@ register LM_REAL sum;
...
@@ -284,7 +299,14 @@ register LM_REAL sum;
atb_sz=n;
atb_sz=n;
tau_sz=n;
tau_sz=n;
r_sz=n*n;
r_sz=n*n;
worksz=3*n;/* this is probably too much */
if(!nb){
LM_REALtmp;
worksz=-1;// workspace query; optimal size is returned
GEQRF((int*)&m,(int*)&m,NULL,(int*)&m,NULL,(LM_
nb=((int)tmp)/m;// optimal worksize is m*nb
}
worksz=nb*m;
tot_sz=a_sz+atb_sz+tau_sz+r_sz+worksz;
tot_sz=a_sz+atb_sz+tau_sz+r_sz+worksz;
#ifdef LINSOLVERS_RETAIN_MEMORY
#ifdef LINSOLVERS_RETAIN_MEMORY
...
@@ -411,7 +433,7 @@ register LM_REAL sum;
...
@@ -411,7 +433,7 @@ register LM_REAL sum;
*
*
* A is mxm, b is mx1
* A is mxm, b is mx1
*
*
* The function returns 0 in case of error, 1 if successfull
* The function returns 0 in case of error, 1 if successful
*
*
* This function is often called repetitively to solve problems of identical
* This function is often called repetitively to solve problems of identical
* dimensions. To avoid repetitive malloc's and free's, allocated memory is
* dimensions. To avoid repetitive malloc's and free's, allocated memory is
...
@@ -425,7 +447,7 @@ __STATIC__ int buf_sz=0;
...
@@ -425,7 +447,7 @@ __STATIC__ int buf_sz=0;
LM_REAL*a,*b;
LM_REAL*a,*b;
inta_sz,b_sz,tot_sz;
inta_sz,b_sz,tot_sz;
registerinti,j;
registerinti;
intinfo,nrhs=1;
intinfo,nrhs=1;
if(!A)
if(!A)
...
@@ -468,24 +490,29 @@ int info, nrhs=1;
...
@@ -468,24 +490,29 @@ int info, nrhs=1;
a=buf;
a=buf;
b=a+a_sz;
b=a+a_sz;
/* store A (column major!) into a anb B into b */
/* store A into a anb B into b. A is assumed symmetric,
for(i=0;i<m;i++){
* hence no transposition is needed
for(j=0;j<m;j++)
*/
a[i+j*m]=A[i*m+j];
for(i=0;i<m;i++){
a[i]=A[i];
b[i]=B[i];
b[i]=B[i];
}
}
for(i=m;i<m*m;i++)
a[i]=A[i];
/* Cholesky decomposition of A */
/* Cholesky decomposition of A */
POTF2("U",(int*)&m,a,(int*)&m,(int*)&info);
//POTF2("U", (int *)&m, a, (int *)&m, (int *)&info);
POTRF("U",(int*)&m,a,(int*)&m,(int*)&info);
/* error treatment */
/* error treatment */
if(info!=0){
if(info!=0){
if(info<0){
if(info<0){
fprintf(stderr,RCAT(RCAT("LAPACK error: illegal value for argument %d of ",POTF2)" in ",AX_EQ_B_CHOL)"()\n",-info);
fprintf(stderr,RCAT(RCAT(RCAT("LAPACK error: illegal value for argument %d of ",POTF2)"/",POTRF)" in ",
AX_EQ_B_CHOL)"()\n",-info);
exit(1);
exit(1);
}
}
else{
else{
fprintf(stderr,RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization could not be completed for ",POTF2)" in ",AX_EQ_B_CHOL)"()\n",info);
fprintf(stderr,RCAT(RCAT(RCAT("LAPACK error: the leading minor of order %d is not positive definite,\nthe factorization could not be completed for ",POTF2)"/",POTRF)," in ",
AX_EQ_B_CHOL)"()\n",info);
#ifndef LINSOLVERS_RETAIN_MEMORY
#ifndef LINSOLVERS_RETAIN_MEMORY
free(buf);
free(buf);
#endif
#endif
...
@@ -494,7 +521,15 @@ int info, nrhs=1;
...
@@ -494,7 +521,15 @@ int info, nrhs=1;
}
}
}
}
/* solve the linear system U^T y = b */
/* solve using the computed Cholesky in one lapack call */