Actual source code: bvimpl.h

slepc-3.8.2 2017-12-01
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2017, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #if !defined(_BVIMPL)
 12: #define _BVIMPL

 14: #include <slepcbv.h>
 15: #include <slepc/private/slepcimpl.h>

 17: PETSC_EXTERN PetscBool BVRegisterAllCalled;
 18: PETSC_EXTERN PetscErrorCode BVRegisterAll(void);

 20: PETSC_EXTERN PetscLogEvent BV_Create,BV_Copy,BV_Mult,BV_MultVec,BV_MultInPlace,BV_Dot,BV_DotVec,BV_Orthogonalize,BV_OrthogonalizeVec,BV_Scale,BV_Norm,BV_NormVec,BV_SetRandom,BV_MatMult,BV_MatMultVec,BV_MatProject;

 22: typedef struct _BVOps *BVOps;

 24: struct _BVOps {
 25:   PetscErrorCode (*mult)(BV,PetscScalar,PetscScalar,BV,Mat);
 26:   PetscErrorCode (*multvec)(BV,PetscScalar,PetscScalar,Vec,PetscScalar*);
 27:   PetscErrorCode (*multinplace)(BV,Mat,PetscInt,PetscInt);
 28:   PetscErrorCode (*multinplacetrans)(BV,Mat,PetscInt,PetscInt);
 29:   PetscErrorCode (*dot)(BV,BV,Mat);
 30:   PetscErrorCode (*dotvec)(BV,Vec,PetscScalar*);
 31:   PetscErrorCode (*dotvec_local)(BV,Vec,PetscScalar*);
 32:   PetscErrorCode (*dotvec_begin)(BV,Vec,PetscScalar*);
 33:   PetscErrorCode (*dotvec_end)(BV,Vec,PetscScalar*);
 34:   PetscErrorCode (*scale)(BV,PetscInt,PetscScalar);
 35:   PetscErrorCode (*norm)(BV,PetscInt,NormType,PetscReal*);
 36:   PetscErrorCode (*norm_local)(BV,PetscInt,NormType,PetscReal*);
 37:   PetscErrorCode (*norm_begin)(BV,PetscInt,NormType,PetscReal*);
 38:   PetscErrorCode (*norm_end)(BV,PetscInt,NormType,PetscReal*);
 39:   PetscErrorCode (*matmult)(BV,Mat,BV);
 40:   PetscErrorCode (*copy)(BV,BV);
 41:   PetscErrorCode (*resize)(BV,PetscInt,PetscBool);
 42:   PetscErrorCode (*getcolumn)(BV,PetscInt,Vec*);
 43:   PetscErrorCode (*restorecolumn)(BV,PetscInt,Vec*);
 44:   PetscErrorCode (*getarray)(BV,PetscScalar**);
 45:   PetscErrorCode (*restorearray)(BV,PetscScalar**);
 46:   PetscErrorCode (*getarrayread)(BV,const PetscScalar**);
 47:   PetscErrorCode (*restorearrayread)(BV,const PetscScalar**);
 48:   PetscErrorCode (*duplicate)(BV,BV*);
 49:   PetscErrorCode (*create)(BV);
 50:   PetscErrorCode (*setfromoptions)(PetscOptionItems*,BV);
 51:   PetscErrorCode (*view)(BV,PetscViewer);
 52:   PetscErrorCode (*destroy)(BV);
 53: };

 55: struct _p_BV {
 56:   PETSCHEADER(struct _BVOps);
 57:   /*------------------------- User parameters --------------------------*/
 58:   Vec                t;            /* template vector */
 59:   PetscInt           n,N;          /* dimensions of vectors (local, global) */
 60:   PetscInt           m;            /* number of vectors */
 61:   PetscInt           l;            /* number of leading columns */
 62:   PetscInt           k;            /* number of active columns */
 63:   PetscInt           nc;           /* number of constraints */
 64:   BVOrthogType       orthog_type;  /* the method of vector orthogonalization */
 65:   BVOrthogRefineType orthog_ref;   /* refinement method */
 66:   PetscReal          orthog_eta;   /* refinement threshold */
 67:   BVOrthogBlockType  orthog_block; /* the method of block orthogonalization */
 68:   Mat                matrix;       /* inner product matrix */
 69:   PetscBool          indef;        /* matrix is indefinite */
 70:   BVMatMultType      vmm;          /* version of matmult operation */

 72:   /*---------------------- Cached data and workspace -------------------*/
 73:   Vec                Bx;           /* result of matrix times a vector x */
 74:   Vec                buffer;       /* buffer vector used in orthogonalization */
 75:   PetscObjectId      xid;          /* object id of vector x */
 76:   PetscObjectState   xstate;       /* state of vector x */
 77:   Vec                cv[2];        /* column vectors obtained with BVGetColumn() */
 78:   PetscInt           ci[2];        /* column indices of obtained vectors */
 79:   PetscObjectState   st[2];        /* state of obtained vectors */
 80:   PetscObjectId      id[2];        /* object id of obtained vectors */
 81:   PetscScalar        *h,*c;        /* orthogonalization coefficients */
 82:   Vec                omega;        /* signature matrix values for indefinite case */
 83:   Mat                B,C;          /* auxiliary dense matrices for matmult operation */
 84:   PetscObjectId      Aid;          /* object id of matrix A of matmult operation */
 85:   PetscBool          defersfo;     /* deferred call to setfromoptions */
 86:   BV                 cached;       /* cached BV to store result of matrix times BV */
 87:   PetscObjectState   bvstate;      /* state of BV when BVApplyMatrixBV() was called */
 88:   PetscRandom        rand;         /* random number generator */
 89:   PetscBool          rrandom;      /* reproducible random vectors */
 90:   Mat                Acreate;      /* matrix given at BVCreateFromMat() */
 91:   PetscBool          cuda;         /* true if GPU must be used in SVEC */
 92:   PetscScalar        *work;
 93:   PetscInt           lwork;
 94:   void               *data;
 95: };

 97: /*
 98:   BV_SafeSqrt - Computes the square root of a scalar value alpha, which is
 99:   assumed to be z'*B*z. The result is
100:     if definite inner product:     res = sqrt(alpha)
101:     if indefinite inner product:   res = sgn(alpha)*sqrt(abs(alpha))
102: */
103: PETSC_STATIC_INLINE PetscErrorCode BV_SafeSqrt(BV bv,PetscScalar alpha,PetscReal *res)
104: {
106:   PetscReal      absal,realp;

109:   absal = PetscAbsScalar(alpha);
110:   realp = PetscRealPart(alpha);
111:   if (absal<PETSC_MACHINE_EPSILON) {
112:     PetscInfo(bv,"Zero norm, either the vector is zero or a semi-inner product is being used\n");
113:   }
114: #if defined(PETSC_USE_COMPLEX)
115:   if (PetscAbsReal(PetscImaginaryPart(alpha))>PETSC_MACHINE_EPSILON && PetscAbsReal(PetscImaginaryPart(alpha))/absal>100*PETSC_MACHINE_EPSILON) SETERRQ1(PetscObjectComm((PetscObject)bv),1,"The inner product is not well defined: nonzero imaginary part %g",PetscImaginaryPart(alpha));
116: #endif
117:   if (bv->indef) {
118:     *res = (realp<0.0)? -PetscSqrtReal(-realp): PetscSqrtReal(realp);
119:   } else {
120:     if (realp<-10*PETSC_MACHINE_EPSILON) SETERRQ(PetscObjectComm((PetscObject)bv),1,"The inner product is not well defined: indefinite matrix");
121:     *res = (realp<0.0)? 0.0: PetscSqrtReal(realp);
122:   }
123:   return(0);
124: }

126: /*
127:   BV_IPMatMult - Multiply a vector x by the inner-product matrix, cache the
128:   result in Bx.
129: */
130: PETSC_STATIC_INLINE PetscErrorCode BV_IPMatMult(BV bv,Vec x)
131: {

135:   if (((PetscObject)x)->id != bv->xid || ((PetscObject)x)->state != bv->xstate) {
136:     MatMult(bv->matrix,x,bv->Bx);
137:     bv->xid = ((PetscObject)x)->id;
138:     bv->xstate = ((PetscObject)x)->state;
139:   }
140:   return(0);
141: }

143: /*
144:   BV_IPMatMultBV - Multiply BV by the inner-product matrix, cache the
145:   result internally in bv->cached.
146: */
147: PETSC_STATIC_INLINE PetscErrorCode BV_IPMatMultBV(BV bv)
148: {

152:   BVGetCachedBV(bv,&bv->cached);
153:   if (((PetscObject)bv)->state != bv->bvstate || bv->l != bv->cached->l || bv->k != bv->cached->k) {
154:     BVSetActiveColumns(bv->cached,bv->l,bv->k);
155:     if (bv->matrix) {
156:       BVMatMult(bv,bv->matrix,bv->cached);
157:     } else {
158:       BVCopy(bv,bv->cached);
159:     }
160:     bv->bvstate = ((PetscObject)bv)->state;
161:   }
162:   return(0);
163: }

165: /*
166:   BV_AllocateCoeffs - Allocate orthogonalization coefficients if not done already.
167: */
168: PETSC_STATIC_INLINE PetscErrorCode BV_AllocateCoeffs(BV bv)
169: {

173:   if (!bv->h) {
174:     PetscMalloc2(bv->nc+bv->m,&bv->h,bv->nc+bv->m,&bv->c);
175:     PetscLogObjectMemory((PetscObject)bv,2*bv->m*sizeof(PetscScalar));
176:   }
177:   return(0);
178: }

180: /*
181:   BV_AllocateSignature - Allocate signature coefficients if not done already.
182: */
183: PETSC_STATIC_INLINE PetscErrorCode BV_AllocateSignature(BV bv)
184: {

188:   if (bv->indef && !bv->omega) {
189:     if (bv->cuda) {
190: #if defined(PETSC_HAVE_VECCUDA)
191:       VecCreateSeqCUDA(PETSC_COMM_SELF,bv->nc+bv->m,&bv->omega);
192: #else
193:       SETERRQ(PetscObjectComm((PetscObject)bv),1,"Something wrong happened");
194: #endif
195:     } else {
196:       VecCreateSeq(PETSC_COMM_SELF,bv->nc+bv->m,&bv->omega);
197:     }
198:     PetscLogObjectParent((PetscObject)bv,(PetscObject)bv->omega);
199:     VecSet(bv->omega,1.0);
200:   }
201:   return(0);
202: }

204: /*
205:   BV_AllocateMatMult - Allocate auxiliary matrices required for BVMatMult if not available.
206: */
207: PETSC_STATIC_INLINE PetscErrorCode BV_AllocateMatMult(BV bv,Mat A,PetscInt m)
208: {
210:   PetscObjectId  Aid;
211:   PetscBool      create=PETSC_FALSE;
212:   PetscInt       cols;

215:   if (!bv->B) create=PETSC_TRUE;
216:   else {
217:     MatGetSize(bv->B,NULL,&cols);
218:     PetscObjectGetId((PetscObject)A,&Aid);
219:     if (cols!=m || bv->Aid!=Aid) {
220:       MatDestroy(&bv->B);
221:       MatDestroy(&bv->C);
222:       create=PETSC_TRUE;
223:     }
224:   }
225:   if (create) {
226:     MatCreateDense(PetscObjectComm((PetscObject)bv),bv->n,PETSC_DECIDE,bv->N,m,NULL,&bv->B);
227:     PetscLogObjectParent((PetscObject)bv,(PetscObject)bv->B);
228:     MatAssemblyBegin(bv->B,MAT_FINAL_ASSEMBLY);
229:     MatAssemblyEnd(bv->B,MAT_FINAL_ASSEMBLY);
230:   }
231:   return(0);
232: }

234: /*
235:   BVAvailableVec: First (0) or second (1) vector available for
236:   getcolumn operation (or -1 if both vectors already fetched).
237: */
238: #define BVAvailableVec (((bv->ci[0]==-bv->nc-1)? 0: (bv->ci[1]==-bv->nc-1)? 1: -1))

240: /*
241:     Macros to test valid BV arguments
242: */
243: #if !defined(PETSC_USE_DEBUG)

245: #define BVCheckSizes(h,arg) do {} while (0)

247: #else

249: #define BVCheckSizes(h,arg) \
250:   do { \
251:     if (!h->m) SETERRQ1(PetscObjectComm((PetscObject)h),PETSC_ERR_ARG_WRONGSTATE,"BV sizes have not been defined: Parameter #%d",arg); \
252:   } while (0)

254: #endif

256: PETSC_INTERN PetscErrorCode BVView_Vecs(BV,PetscViewer);

258: PETSC_INTERN PetscErrorCode BVAllocateWork_Private(BV,PetscInt);

260: PETSC_INTERN PetscErrorCode BVMult_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,PetscInt,PetscScalar,const PetscScalar*,const PetscScalar*,PetscScalar,PetscScalar*);
261: PETSC_INTERN PetscErrorCode BVMultVec_BLAS_Private(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,const PetscScalar*,PetscScalar,PetscScalar*);
262: PETSC_INTERN PetscErrorCode BVMultInPlace_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscScalar*,const PetscScalar*,PetscBool);
263: PETSC_INTERN PetscErrorCode BVMultInPlace_Vecs_Private(BV,PetscInt,PetscInt,PetscInt,Vec*,const PetscScalar*,PetscBool);
264: PETSC_INTERN PetscErrorCode BVAXPY_BLAS_Private(BV,PetscInt,PetscInt,PetscScalar,const PetscScalar*,PetscScalar,PetscScalar*);
265: PETSC_INTERN PetscErrorCode BVDot_BLAS_Private(BV,PetscInt,PetscInt,PetscInt,PetscInt,const PetscScalar*,const PetscScalar*,PetscScalar*,PetscBool);
266: PETSC_INTERN PetscErrorCode BVDotVec_BLAS_Private(BV,PetscInt,PetscInt,const PetscScalar*,const PetscScalar*,PetscScalar*,PetscBool);
267: PETSC_INTERN PetscErrorCode BVScale_BLAS_Private(BV,PetscInt,PetscScalar*,PetscScalar);
268: PETSC_INTERN PetscErrorCode BVNorm_LAPACK_Private(BV,PetscInt,PetscInt,const PetscScalar*,NormType,PetscReal*,PetscBool);
269: PETSC_INTERN PetscErrorCode BVOrthogonalize_LAPACK_Private(BV,PetscInt,PetscInt,PetscScalar*,PetscScalar*);

271: #if defined(PETSC_HAVE_VECCUDA)
272: #include <petsccuda.h>
273: #include <cublas_v2.h>

275: #define WaitForGPU() PetscCUDASynchronize ? cudaThreadSynchronize() : 0

277: /* complex single */
278: #if defined(PETSC_USE_COMPLEX)
279: #if defined(PETSC_USE_REAL_SINGLE)
280: #define cublasXgemm(a,b,c,d,e,f,g,h,i,j,k,l,m,n) cublasCgemm((a),(b),(c),(d),(e),(f),(cuComplex*)(g),(cuComplex*)(h),(i),(cuComplex*)(j),(k),(cuComplex*)(l),(cuComplex*)(m),(n))
281: #define cublasXgemv(a,b,c,d,e,f,g,h,i,j,k,l) cublasCgemv((a),(b),(c),(d),(cuComplex*)(e),(cuComplex*)(f),(g),(cuComplex*)(h),(i),(cuComplex*)(j),(cuComplex*)(k),(l))
282: #define cublasXscal(a,b,c,d,e) cublasCscal(a,b,(const cuComplex*)(c),(cuComplex*)(d),e)
283: #define cublasXnrm2(a,b,c,d,e) cublasScnrm2(a,b,(const cuComplex*)(c),d,e)
284: #define cublasXaxpy(a,b,c,d,e,f,g) cublasCaxpy((a),(b),(cuComplex*)(c),(cuComplex*)(d),(e),(cuComplex*)(f),(g))
285: #define cublasXdotc(a,b,c,d,e,f,g) cublasCdotc((a),(b),(const cuComplex *)(c),(d),(const cuComplex *)(e),(f),(cuComplex *)(g))
286: #else /* complex double */
287: #define cublasXgemm(a,b,c,d,e,f,g,h,i,j,k,l,m,n) cublasZgemm((a),(b),(c),(d),(e),(f),(cuDoubleComplex*)(g),(cuDoubleComplex*)(h),(i),(cuDoubleComplex*)(j),(k),(cuDoubleComplex*)(l),(cuDoubleComplex*)(m),(n))
288: #define cublasXgemv(a,b,c,d,e,f,g,h,i,j,k,l) cublasZgemv((a),(b),(c),(d),(cuDoubleComplex*)(e),(cuDoubleComplex*)(f),(g),(cuDoubleComplex*)(h),(i),(cuDoubleComplex*)(j),(cuDoubleComplex*)(k),(l))
289: #define cublasXscal(a,b,c,d,e) cublasZscal(a,b,(const cuDoubleComplex*)(c),(cuDoubleComplex*)(d),e)
290: #define cublasXnrm2(a,b,c,d,e) cublasDznrm2(a,b,(const cuDoubleComplex*)(c),d,e)
291: #define cublasXaxpy(a,b,c,d,e,f,g) cublasZaxpy((a),(b),(cuDoubleComplex*)(c),(cuDoubleComplex*)(d),(e),(cuDoubleComplex*)(f),(g))
292: #define cublasXdotc(a,b,c,d,e,f,g) cublasZdotc((a),(b),(const cuDoubleComplex *)(c),(d),(const cuDoubleComplex *)(e),(f),(cuDoubleComplex *)(g))
293: #endif
294: #else /* real single */
295: #if defined(PETSC_USE_REAL_SINGLE)
296: #define cublasXgemm cublasSgemm
297: #define cublasXgemv cublasSgemv
298: #define cublasXscal cublasSscal
299: #define cublasXnrm2 cublasSnrm2
300: #define cublasXaxpy cublasSaxpy
301: #define cublasXdotc cublasSdot
302: #else /* real double */
303: #define cublasXgemm cublasDgemm
304: #define cublasXgemv cublasDgemv
305: #define cublasXscal cublasDscal
306: #define cublasXnrm2 cublasDnrm2
307: #define cublasXaxpy cublasDaxpy
308: #define cublasXdotc cublasDdot
309: #endif
310: #endif

312: PETSC_INTERN PetscErrorCode BV_CleanCoefficients_CUDA(BV,PetscInt,PetscScalar*);
313: PETSC_INTERN PetscErrorCode BV_AddCoefficients_CUDA(BV,PetscInt,PetscScalar*,PetscScalar*);
314: PETSC_INTERN PetscErrorCode BV_SetValue_CUDA(BV,PetscInt,PetscInt,PetscScalar*,PetscScalar);
315: PETSC_INTERN PetscErrorCode BV_SquareSum_CUDA(BV,PetscInt,PetscScalar*,PetscReal*);
316: PETSC_INTERN PetscErrorCode BV_ApplySignature_CUDA(BV,PetscInt,PetscScalar*,PetscBool);
317: PETSC_INTERN PetscErrorCode BV_SquareRoot_CUDA(BV,PetscInt,PetscScalar*,PetscReal*);
318: PETSC_INTERN PetscErrorCode BV_StoreCoefficients_CUDA(BV,PetscInt,PetscScalar*,PetscScalar*);

320: #endif /* PETSC_HAVE_VECCUDA */

322: #endif