Actual source code: epskrylov.c

slepc-3.8.2 2017-12-01
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2017, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Common subroutines for all Krylov-type solvers
 12: */

 14: #include <slepc/private/epsimpl.h>
 15: #include <slepc/private/slepcimpl.h>
 16: #include <slepcblaslapack.h>

 18: /*
 19:    EPSBasicArnoldi - Computes an m-step Arnoldi factorization. The first k
 20:    columns are assumed to be locked and therefore they are not modified. On
 21:    exit, the following relation is satisfied:

 23:                     OP * V - V * H = beta*v_m * e_m^T

 25:    where the columns of V are the Arnoldi vectors (which are B-orthonormal),
 26:    H is an upper Hessenberg matrix, e_m is the m-th vector of the canonical basis.
 27:    On exit, beta contains the B-norm of V[m] before normalization.
 28: */
 29: PetscErrorCode EPSBasicArnoldi(EPS eps,PetscBool trans,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
 30: {
 32:   PetscScalar    *a;
 33:   PetscInt       j,nc,n,m = *M;
 34:   Vec            vj,vj1,buf;

 37:   BVSetActiveColumns(eps->V,0,m);
 38:   for (j=k;j<m;j++) {
 39:     BVGetColumn(eps->V,j,&vj);
 40:     BVGetColumn(eps->V,j+1,&vj1);
 41:     if (trans) {
 42:       STApplyTranspose(eps->st,vj,vj1);
 43:     } else {
 44:       STApply(eps->st,vj,vj1);
 45:     }
 46:     BVRestoreColumn(eps->V,j,&vj);
 47:     BVRestoreColumn(eps->V,j+1,&vj1);
 48:     BVOrthonormalizeColumn(eps->V,j+1,PETSC_FALSE,beta,breakdown);
 49:     if (*breakdown) {
 50:       *M = j+1;
 51:       break;
 52:     }
 53:   }
 54:   /* extract Hessenberg matrix from the BV object */
 55:   BVGetNumConstraints(eps->V,&nc);
 56:   BVGetSizes(eps->V,NULL,NULL,&n);
 57:   BVGetBufferVec(eps->V,&buf);
 58:   VecGetArray(buf,&a);
 59:   for (j=k;j<*M;j++) {
 60:     PetscMemcpy(H+j*ldh,a+nc+(j+1)*(nc+n),(j+2)*sizeof(PetscScalar));
 61:   }
 62:   VecRestoreArray(buf,&a);
 63:   return(0);
 64: }

 66: /*
 67:    EPSDelayedArnoldi - This function is equivalent to EPSBasicArnoldi but
 68:    performs the computation in a different way. The main idea is that
 69:    reorthogonalization is delayed to the next Arnoldi step. This version is
 70:    more scalable but in some cases convergence may stagnate.
 71: */
 72: PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
 73: {
 75:   PetscInt       i,j,m=*M;
 76:   Vec            u,t;
 77:   PetscScalar    shh[100],*lhh,dot,dot2;
 78:   PetscReal      norm1=0.0,norm2=1.0;
 79:   Vec            vj,vj1,vj2;

 82:   if (m<=100) lhh = shh;
 83:   else {
 84:     PetscMalloc1(m,&lhh);
 85:   }
 86:   BVCreateVec(eps->V,&u);
 87:   BVCreateVec(eps->V,&t);

 89:   BVSetActiveColumns(eps->V,0,m);
 90:   for (j=k;j<m;j++) {
 91:     BVGetColumn(eps->V,j,&vj);
 92:     BVGetColumn(eps->V,j+1,&vj1);
 93:     STApply(eps->st,vj,vj1);
 94:     BVRestoreColumn(eps->V,j,&vj);
 95:     BVRestoreColumn(eps->V,j+1,&vj1);

 97:     BVDotColumnBegin(eps->V,j+1,H+ldh*j);
 98:     if (j>k) {
 99:       BVDotColumnBegin(eps->V,j,lhh);
100:       BVGetColumn(eps->V,j,&vj);
101:       VecDotBegin(vj,vj,&dot);
102:     }
103:     if (j>k+1) {
104:       BVNormVecBegin(eps->V,u,NORM_2,&norm2);
105:       BVGetColumn(eps->V,j-2,&vj2);
106:       VecDotBegin(u,vj2,&dot2);
107:     }

109:     BVDotColumnEnd(eps->V,j+1,H+ldh*j);
110:     if (j>k) {
111:       BVDotColumnEnd(eps->V,j,lhh);
112:       VecDotEnd(vj,vj,&dot);
113:       BVRestoreColumn(eps->V,j,&vj);
114:     }
115:     if (j>k+1) {
116:       BVNormVecEnd(eps->V,u,NORM_2,&norm2);
117:       VecDotEnd(u,vj2,&dot2);
118:       BVRestoreColumn(eps->V,j-2,&vj2);
119:     }

121:     if (j>k) {
122:       norm1 = PetscSqrtReal(PetscRealPart(dot));
123:       for (i=0;i<j;i++)
124:         H[ldh*j+i] = H[ldh*j+i]/norm1;
125:       H[ldh*j+j] = H[ldh*j+j]/dot;

127:       BVCopyVec(eps->V,j,t);
128:       BVScaleColumn(eps->V,j,1.0/norm1);
129:       BVScaleColumn(eps->V,j+1,1.0/norm1);
130:     }

132:     BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);

134:     if (j>k) {
135:       BVSetActiveColumns(eps->V,0,j);
136:       BVMultVec(eps->V,-1.0,1.0,t,lhh);
137:       BVSetActiveColumns(eps->V,0,m);
138:       for (i=0;i<j;i++)
139:         H[ldh*(j-1)+i] += lhh[i];
140:     }

142:     if (j>k+1) {
143:       BVGetColumn(eps->V,j-1,&vj1);
144:       VecCopy(u,vj1);
145:       BVRestoreColumn(eps->V,j-1,&vj1);
146:       BVScaleColumn(eps->V,j-1,1.0/norm2);
147:       H[ldh*(j-2)+j-1] = norm2;
148:     }

150:     if (j<m-1) {
151:       VecCopy(t,u);
152:     }
153:   }

155:   BVNormVec(eps->V,t,NORM_2,&norm2);
156:   VecScale(t,1.0/norm2);
157:   BVGetColumn(eps->V,m-1,&vj1);
158:   VecCopy(t,vj1);
159:   BVRestoreColumn(eps->V,m-1,&vj1);
160:   H[ldh*(m-2)+m-1] = norm2;

162:   BVDotColumn(eps->V,m,lhh);

164:   BVMultColumn(eps->V,-1.0,1.0,m,lhh);
165:   for (i=0;i<m;i++)
166:     H[ldh*(m-1)+i] += lhh[i];

168:   BVNormColumn(eps->V,m,NORM_2,beta);
169:   BVScaleColumn(eps->V,m,1.0 / *beta);
170:   *breakdown = PETSC_FALSE;

172:   if (m>100) { PetscFree(lhh); }
173:   VecDestroy(&u);
174:   VecDestroy(&t);
175:   return(0);
176: }

178: /*
179:    EPSDelayedArnoldi1 - This function is similar to EPSDelayedArnoldi,
180:    but without reorthogonalization (only delayed normalization).
181: */
182: PetscErrorCode EPSDelayedArnoldi1(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
183: {
185:   PetscInt       i,j,m=*M;
186:   PetscScalar    dot;
187:   PetscReal      norm=0.0;
188:   Vec            vj,vj1;

191:   BVSetActiveColumns(eps->V,0,m);
192:   for (j=k;j<m;j++) {
193:     BVGetColumn(eps->V,j,&vj);
194:     BVGetColumn(eps->V,j+1,&vj1);
195:     STApply(eps->st,vj,vj1);
196:     BVRestoreColumn(eps->V,j+1,&vj1);
197:     BVDotColumnBegin(eps->V,j+1,H+ldh*j);
198:     if (j>k) {
199:       VecDotBegin(vj,vj,&dot);
200:     }
201:     BVDotColumnEnd(eps->V,j+1,H+ldh*j);
202:     if (j>k) {
203:       VecDotEnd(vj,vj,&dot);
204:     }
205:     BVRestoreColumn(eps->V,j,&vj);

207:     if (j>k) {
208:       norm = PetscSqrtReal(PetscRealPart(dot));
209:       BVScaleColumn(eps->V,j,1.0/norm);
210:       H[ldh*(j-1)+j] = norm;

212:       for (i=0;i<j;i++)
213:         H[ldh*j+i] = H[ldh*j+i]/norm;
214:       H[ldh*j+j] = H[ldh*j+j]/dot;
215:       BVScaleColumn(eps->V,j+1,1.0/norm);
216:       *beta = norm;
217:     }
218:     BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);
219:   }

221:   *breakdown = PETSC_FALSE;
222:   return(0);
223: }

225: /*
226:    EPSKrylovConvergence_Filter - Specialized version for STFILTER.
227: */
228: PetscErrorCode EPSKrylovConvergence_Filter(EPS eps,PetscBool getall,PetscInt kini,PetscInt nits,PetscReal beta,PetscReal gamma,PetscInt *kout)
229: {
231:   PetscInt       k,ninside,nconv;
232:   PetscScalar    re,im;
233:   PetscReal      resnorm;

236:   ninside = 0;   /* count how many eigenvalues are located in the interval */
237:   for (k=kini;k<kini+nits;k++) {
238:     if (PetscRealPart(eps->eigr[k]) < gamma) break;
239:     ninside++;
240:   }
241:   eps->nev = ninside+kini;  /* adjust eigenvalue count */
242:   nconv = 0;   /* count how many eigenvalues satisfy the convergence criterion */
243:   for (k=kini;k<kini+ninside;k++) {
244:     /* eigenvalue */
245:     re = eps->eigr[k];
246:     im = eps->eigi[k];
247:     DSVectors(eps->ds,DS_MAT_X,&k,&resnorm);
248:     resnorm *= beta;
249:     /* error estimate */
250:     (*eps->converged)(eps,re,im,resnorm,&eps->errest[k],eps->convergedctx);
251:     if (eps->errest[k] < eps->tol) nconv++;
252:     else break;
253:   }
254:   *kout = kini+nconv;
255:   PetscInfo4(eps,"Found %D eigenvalue approximations inside the inverval (gamma=%g), k=%D nconv=%D\n",ninside,(double)gamma,k,nconv);
256:   return(0);
257: }

259: /*
260:    EPSKrylovConvergence - Implements the loop that checks for convergence
261:    in Krylov methods.

263:    Input Parameters:
264:      eps   - the eigensolver; some error estimates are updated in eps->errest
265:      getall - whether all residuals must be computed
266:      kini  - initial value of k (the loop variable)
267:      nits  - number of iterations of the loop
268:      V     - set of basis vectors (used only if trueresidual is activated)
269:      nv    - number of vectors to process (dimension of Q, columns of V)
270:      beta  - norm of f (the residual vector of the Arnoldi/Lanczos factorization)
271:      corrf - correction factor for residual estimates (only in harmonic KS)

273:    Output Parameters:
274:      kout  - the first index where the convergence test failed
275: */
276: PetscErrorCode EPSKrylovConvergence(EPS eps,PetscBool getall,PetscInt kini,PetscInt nits,PetscReal beta,PetscReal corrf,PetscInt *kout)
277: {
279:   PetscInt       k,newk,marker,ld,inside;
280:   PetscScalar    re,im,*Zr,*Zi,*X;
281:   PetscReal      resnorm,gamma;
282:   PetscBool      isshift,isfilter,refined,istrivial;
283:   Vec            x,y,w[3];

286:   if (eps->which == EPS_ALL) {
287:     PetscObjectTypeCompare((PetscObject)eps->st,STFILTER,&isfilter);
288:     if (isfilter) {
289:       STFilterGetThreshold(eps->st,&gamma);
290:       EPSKrylovConvergence_Filter(eps,getall,kini,nits,beta,gamma,kout);
291:       return(0);
292:     }
293:   }
294:   RGIsTrivial(eps->rg,&istrivial);
295:   if (eps->trueres) {
296:     BVCreateVec(eps->V,&x);
297:     BVCreateVec(eps->V,&y);
298:     BVCreateVec(eps->V,&w[0]);
299:     BVCreateVec(eps->V,&w[2]);
300: #if !defined(PETSC_USE_COMPLEX)
301:     BVCreateVec(eps->V,&w[1]);
302: #else
303:     w[1] = NULL;
304: #endif
305:   }
306:   DSGetLeadingDimension(eps->ds,&ld);
307:   DSGetRefined(eps->ds,&refined);
308:   PetscObjectTypeCompare((PetscObject)eps->st,STSHIFT,&isshift);
309:   marker = -1;
310:   if (eps->trackall) getall = PETSC_TRUE;
311:   for (k=kini;k<kini+nits;k++) {
312:     /* eigenvalue */
313:     re = eps->eigr[k];
314:     im = eps->eigi[k];
315:     if (!istrivial || eps->trueres || isshift || eps->conv==EPS_CONV_NORM) {
316:       STBackTransform(eps->st,1,&re,&im);
317:     }
318:     if (!istrivial) {
319:       RGCheckInside(eps->rg,1,&re,&im,&inside);
320:       if (marker==-1 && inside<0) marker = k;
321:       if (!(eps->trueres || isshift || eps->conv==EPS_CONV_NORM)) {  /* make sure eps->converged below uses the right value */
322:         re = eps->eigr[k];
323:         im = eps->eigi[k];
324:       }
325:     }
326:     newk = k;
327:     DSVectors(eps->ds,DS_MAT_X,&newk,&resnorm);
328:     if (eps->trueres) {
329:       DSGetArray(eps->ds,DS_MAT_X,&X);
330:       Zr = X+k*ld;
331:       if (newk==k+1) Zi = X+newk*ld;
332:       else Zi = NULL;
333:       EPSComputeRitzVector(eps,Zr,Zi,eps->V,x,y);
334:       DSRestoreArray(eps->ds,DS_MAT_X,&X);
335:       EPSComputeResidualNorm_Private(eps,re,im,x,y,w,&resnorm);
336:     }
337:     else if (!refined) resnorm *= beta*corrf;
338:     /* error estimate */
339:     (*eps->converged)(eps,re,im,resnorm,&eps->errest[k],eps->convergedctx);
340:     if (marker==-1 && eps->errest[k] >= eps->tol) marker = k;
341:     if (newk==k+1) {
342:       eps->errest[k+1] = eps->errest[k];
343:       k++;
344:     }
345:     if (marker!=-1 && !getall) break;
346:   }
347:   if (marker!=-1) k = marker;
348:   *kout = k;
349:   if (eps->trueres) {
350:     VecDestroy(&x);
351:     VecDestroy(&y);
352:     VecDestroy(&w[0]);
353:     VecDestroy(&w[2]);
354: #if !defined(PETSC_USE_COMPLEX)
355:     VecDestroy(&w[1]);
356: #endif
357:   }
358:   return(0);
359: }

361: /*
362:    EPSFullLanczos - Computes an m-step Lanczos factorization with full
363:    reorthogonalization.  At each Lanczos step, the corresponding Lanczos
364:    vector is orthogonalized with respect to all previous Lanczos vectors.
365:    This is equivalent to computing an m-step Arnoldi factorization and
366:    exploting symmetry of the operator.

368:    The first k columns are assumed to be locked and therefore they are
369:    not modified. On exit, the following relation is satisfied:

371:                     OP * V - V * T = beta_m*v_m * e_m^T

373:    where the columns of V are the Lanczos vectors (which are B-orthonormal),
374:    T is a real symmetric tridiagonal matrix, and e_m is the m-th vector of
375:    the canonical basis. The tridiagonal is stored as two arrays: alpha
376:    contains the diagonal elements, beta the off-diagonal. On exit, the last
377:    element of beta contains the B-norm of V[m] before normalization.
378: */
379: PetscErrorCode EPSFullLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscInt k,PetscInt *M,PetscBool *breakdown)
380: {
382:   PetscScalar    *a;
383:   PetscInt       j,nc,n,m = *M;
384:   Vec            vj,vj1,buf;

387:   BVSetActiveColumns(eps->V,0,m);
388:   for (j=k;j<m;j++) {
389:     BVGetColumn(eps->V,j,&vj);
390:     BVGetColumn(eps->V,j+1,&vj1);
391:     STApply(eps->st,vj,vj1);
392:     BVRestoreColumn(eps->V,j,&vj);
393:     BVRestoreColumn(eps->V,j+1,&vj1);
394:     BVOrthonormalizeColumn(eps->V,j+1,PETSC_FALSE,beta+j,breakdown);
395:     if (*breakdown) {
396:       *M = j+1;
397:       break;
398:     }
399:   }
400:   /* extract tridiagonal matrix from the BV object (only alpha, beta is already in its place) */
401:   BVGetNumConstraints(eps->V,&nc);
402:   BVGetSizes(eps->V,NULL,NULL,&n);
403:   BVGetBufferVec(eps->V,&buf);
404:   VecGetArray(buf,&a);
405:   for (j=k;j<*M;j++) alpha[j] = PetscRealPart(a[nc+j+(j+1)*(nc+n)]);
406:   VecRestoreArray(buf,&a);
407:   return(0);
408: }

410: PetscErrorCode EPSPseudoLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscReal *omega,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscBool *symmlost,PetscReal *cos,Vec w)
411: {
413:   PetscInt       j,m = *M,i,ld,l;
414:   Vec            vj,vj1;
415:   PetscScalar    *hwork,lhwork[100];
416:   PetscReal      norm,norm1,norm2,t,*f,sym=0.0,fro=0.0;
417:   PetscBLASInt   j_,one=1;

420:   DSGetLeadingDimension(eps->ds,&ld);
421:   DSGetDimensions(eps->ds,NULL,NULL,&l,NULL,NULL);
422:   if (cos) *cos = 1.0;
423:   if (m > 100) {
424:     PetscMalloc1(m,&hwork);
425:   } else hwork = lhwork;

427:   BVSetActiveColumns(eps->V,0,m);
428:   for (j=k;j<m;j++) {
429:     BVGetColumn(eps->V,j,&vj);
430:     BVGetColumn(eps->V,j+1,&vj1);
431:     STApply(eps->st,vj,vj1);
432:     BVRestoreColumn(eps->V,j,&vj);
433:     BVRestoreColumn(eps->V,j+1,&vj1);
434:     BVOrthogonalizeColumn(eps->V,j+1,hwork,&norm,breakdown);
435:     alpha[j] = PetscRealPart(hwork[j]);
436:     beta[j] = PetscAbsReal(norm);
437:     DSGetArrayReal(eps->ds,DS_MAT_T,&f);
438:     if (j==k) {
439:       for (i=l;i<j-1;i++) hwork[i]-= f[2*ld+i];
440:       for (i=0;i<l;i++) hwork[i] = 0.0;
441:     }
442:     DSRestoreArrayReal(eps->ds,DS_MAT_T,&f);
443:     hwork[j-1] -= beta[j-1];
444:     PetscBLASIntCast(j,&j_);
445:     sym = SlepcAbs(BLASnrm2_(&j_,hwork,&one),sym);
446:     fro = SlepcAbs(fro,SlepcAbs(alpha[j],beta[j]));
447:     if (j>0) fro = SlepcAbs(fro,beta[j-1]);
448:     if (sym/fro>PetscMax(PETSC_SQRT_MACHINE_EPSILON,10*eps->tol)) { *symmlost = PETSC_TRUE; *M=j+1; break; }
449:     omega[j+1] = (norm<0.0)? -1.0: 1.0;
450:     BVScaleColumn(eps->V,j+1,1.0/norm);
451:     /* */
452:     if (cos) {
453:       BVGetColumn(eps->V,j+1,&vj1);
454:       VecNorm(vj1,NORM_2,&norm1);
455:       BVApplyMatrix(eps->V,vj1,w);
456:       BVRestoreColumn(eps->V,j+1,&vj1);
457:       VecNorm(w,NORM_2,&norm2);
458:       t = 1.0/(norm1*norm2);
459:       if (*cos>t) *cos = t;
460:     }
461:   }
462:   if (m > 100) {
463:     PetscFree(hwork);
464:   }
465:   return(0);
466: }