1 |
#define EXTERN extern |
2 |
|
3 |
// mode |
4 |
#define NONE 0 |
5 |
#define NEWTON 1 |
6 |
#define TNCG 2 |
7 |
#define DTNCG 3 |
8 |
#define AUTO 4 |
9 |
// method |
10 |
#define DIAG 5 |
11 |
#define SSOR 6 |
12 |
#define ICCG 7 |
13 |
#define BLOCK 8 |
14 |
|
15 |
|
16 |
#include "pcwin.h" |
17 |
#include "pcmod.h" |
18 |
#include "utility.h" |
19 |
|
20 |
//#define min(a,b) (((a) < (b) ? (a) : (b)) |
21 |
|
22 |
EXTERN struct t_minvar{ |
23 |
double cappa, stpmin, stpmax, angmax; |
24 |
int intmax; |
25 |
} minvar; |
26 |
|
27 |
void solve(int,int,int,int, double *,double *,double *,double *,int *, |
28 |
int *,int *,double *,int *,int *,int *, double (*)(double *,double *)); |
29 |
|
30 |
|
31 |
static void precond(int,int ,int ,double *,double *,double *,int *,int *,int *,double *); |
32 |
static void cholesky(int , double *, double *); |
33 |
static void column(int, int *, int *,int *,int *,int *,int *, int *); |
34 |
|
35 |
int stable; |
36 |
double *f, *f_diag; |
37 |
int *c_index, *c_value; |
38 |
|
39 |
void solve(int mode,int method,int negtest,int nvar,double *p,double *x, |
40 |
double *g, double *h, int *h_init, int *h_stop, int *h_index, |
41 |
double *h_diag,int *icycle,int *iter_cg,int *fg_call,double (*fgvalue) (double *,double *)) |
42 |
{ |
43 |
int i,j,k, iter,maxiter,maxhess; |
44 |
double *m, *d, *si, *r, *q; // |
45 |
double alpha,beta,delta,sigma; |
46 |
double f_sigma; |
47 |
double *x_sigma; |
48 |
double *g_sigma; |
49 |
double g_norm,g_rms,eps,converge; |
50 |
double hj,gg,dq,rr,dd,rs,rs_new,r_norm; |
51 |
int maxvar = 3*natom; |
52 |
|
53 |
if (natom < 300) |
54 |
maxhess = (3*natom*(3*natom-1))/2; |
55 |
else if (natom < 800) |
56 |
maxhess = (3*natom*(3*natom-1))/3; |
57 |
else |
58 |
maxhess = (3*natom*(3*natom-1))/20; |
59 |
|
60 |
eps = 0.0; |
61 |
maxiter = 0; |
62 |
|
63 |
m = dvector(0,maxvar); |
64 |
r = dvector(0,maxvar); |
65 |
si = dvector(0,maxvar); |
66 |
d = dvector(0,maxvar); |
67 |
q = dvector(0,maxvar); |
68 |
x_sigma = dvector(0,maxvar); |
69 |
g_sigma = dvector(0,maxvar); |
70 |
f_diag = dvector(0,maxvar); |
71 |
f = dvector(0,maxhess); |
72 |
c_index = ivector(0,maxhess); |
73 |
c_value = ivector(0,maxhess); |
74 |
|
75 |
if (mode != DTNCG && method != NONE) |
76 |
{ |
77 |
for (i=0; i < nvar; i++) |
78 |
m[i] = 1.0 / sqrt(fabs(h_diag[i])); |
79 |
|
80 |
for (i=0; i < nvar; i++) |
81 |
{ |
82 |
g[i] = g[i] * m[i]; |
83 |
h_diag[i] *= m[i] * m[i]; |
84 |
for (j = h_init[i]; j < h_stop[i]; j++) |
85 |
{ |
86 |
k = h_index[j]; |
87 |
h[j] *= m[i] * m[k]; |
88 |
} |
89 |
} |
90 |
} |
91 |
|
92 |
iter = 0; |
93 |
gg = 0.0; |
94 |
for (i=0; i < nvar; i++) |
95 |
{ |
96 |
p[i] = 0.0; |
97 |
r[i] = -g[i]; |
98 |
gg += g[i]*g[i]; |
99 |
} |
100 |
g_norm = sqrt(gg); |
101 |
precond(method, iter,nvar, si, r, h, h_init, h_stop, h_index, h_diag); |
102 |
rs = 0.0; |
103 |
for (i=0; i < nvar; i++) |
104 |
{ |
105 |
d[i] = si[i]; |
106 |
rs += r[i]*si[i]; |
107 |
} |
108 |
|
109 |
if (mode == NEWTON) |
110 |
{ |
111 |
eps = 1.0e-10; |
112 |
maxiter = nvar; |
113 |
} else if (mode == TNCG || mode == DTNCG) |
114 |
{ |
115 |
delta = 1.0; |
116 |
eps = delta/(double) *icycle; |
117 |
g_rms = g_norm/sqrt((double) nvar); |
118 |
eps = MinFun(eps,g_rms); |
119 |
converge = 1.0; |
120 |
maxiter = (int) (10.0*sqrt((double)nvar)); |
121 |
} |
122 |
iter = 1; |
123 |
|
124 |
// evaluate the matrix vector product |
125 |
while (TRUE) |
126 |
{ |
127 |
if (mode == TNCG || mode == NEWTON) |
128 |
{ |
129 |
for (i=0; i < nvar; i++) |
130 |
q[i] = 0.0; |
131 |
for (i=0; i < nvar; i++) |
132 |
{ |
133 |
q[i] += h_diag[i]*d[i]; |
134 |
for(j= h_init[i]; j < h_stop[i]; j++) |
135 |
{ |
136 |
k = h_index[j]; |
137 |
hj = h[j]; |
138 |
q[i] += hj*d[k]; |
139 |
q[k] += hj*d[i]; |
140 |
} |
141 |
} |
142 |
}else if (mode == DTNCG) |
143 |
{ |
144 |
dd = 0.0; |
145 |
for (i=0; i < nvar; i++) |
146 |
dd += d[i]*d[i]; |
147 |
|
148 |
sigma = 1.0e-7 / sqrt(dd); |
149 |
|
150 |
for (i=0; i < nvar; i++) |
151 |
x_sigma[i] = x[i] + sigma*d[i]; |
152 |
|
153 |
fg_call = fg_call + 1; |
154 |
f_sigma = fgvalue (x_sigma,g_sigma); |
155 |
|
156 |
for (i=0; i < nvar; i++) |
157 |
q[i] = (g_sigma[i]-g[i]) / sigma; |
158 |
} |
159 |
|
160 |
// test for direction of negative curvature |
161 |
dq = 0.0; |
162 |
for (i=0; i < nvar; i++) |
163 |
dq += d[i]*q[i]; |
164 |
|
165 |
if (negtest) |
166 |
{ |
167 |
if (dq <= 0.0) |
168 |
{ |
169 |
if (iter == 1) |
170 |
{ |
171 |
for (i=0; i < nvar; i++) |
172 |
p[i] = d[i]; |
173 |
} |
174 |
goto L_10; |
175 |
} |
176 |
} |
177 |
|
178 |
// test truncated Newton termination criterion |
179 |
alpha = rs / dq; |
180 |
rr = 0.0; |
181 |
for (i=0; i < nvar; i++) |
182 |
{ |
183 |
p[i] += alpha*d[i]; |
184 |
r[i] -= alpha*q[i]; |
185 |
rr += r[i]*r[i]; |
186 |
} |
187 |
r_norm = sqrt(rr); |
188 |
if (r_norm/g_norm <= eps) |
189 |
goto L_10; |
190 |
|
191 |
// precondition |
192 |
precond(method,iter,nvar, si, r,h,h_init,h_stop, |
193 |
h_index,h_diag); |
194 |
|
195 |
// update direction |
196 |
rs_new = 0.0; |
197 |
for (i=0; i < nvar; i++) |
198 |
rs_new += r[i]*si[i]; |
199 |
|
200 |
beta = rs_new/rs; |
201 |
rs = rs_new; |
202 |
|
203 |
for (i=0; i < nvar; i++) |
204 |
d[i] = si[i] + beta*d[i]; |
205 |
|
206 |
// check for overlimit next iteration |
207 |
if (iter >= maxiter) |
208 |
goto L_10; |
209 |
|
210 |
iter++; |
211 |
} |
212 |
|
213 |
// termination |
214 |
|
215 |
L_10: |
216 |
if (mode != DTNCG && method != NONE) |
217 |
{ |
218 |
for (i=0; i < nvar; i++) |
219 |
{ |
220 |
p[i] *= m[i]; |
221 |
g[i] /= m[i]; |
222 |
} |
223 |
} |
224 |
iter_cg += iter; |
225 |
free_dvector( m ,0,maxvar); |
226 |
free_dvector( r ,0,maxvar); |
227 |
free_dvector( si ,0,maxvar); |
228 |
free_dvector( d ,0,maxvar); |
229 |
free_dvector( q ,0,maxvar); |
230 |
free_dvector( x_sigma ,0,maxvar); |
231 |
free_dvector( g_sigma ,0,maxvar); |
232 |
free_dvector( f_diag ,0,maxvar); |
233 |
free_dvector( f ,0,maxhess); |
234 |
free_ivector( c_index ,0,maxhess); |
235 |
free_ivector( c_value ,0,maxhess); |
236 |
} |
237 |
//================================ |
238 |
// ============================== |
239 |
void column(int nvar, int *h_init, int *h_stop,int *h_index, |
240 |
int *c_init,int *c_stop,int *c_index, int *c_value) |
241 |
{ |
242 |
int i,j,k,m; |
243 |
|
244 |
for (i=0; i < nvar; i++) |
245 |
{ |
246 |
c_init[i] = 0; |
247 |
c_stop[i] = 0; |
248 |
} |
249 |
// count number of elements in each column |
250 |
for (i=0; i < nvar; i++) |
251 |
{ |
252 |
for (j=h_init[i]; j < h_stop[i]; j++) |
253 |
{ |
254 |
k = h_index[j]; |
255 |
c_stop[k]++; |
256 |
} |
257 |
} |
258 |
// set each beginning marker past the last element for its column |
259 |
c_init[0] = c_stop[0] + 1; |
260 |
for (i=1; i <nvar; i++) |
261 |
{ |
262 |
c_init[i] = c_init[i-1] + c_stop[i]; |
263 |
} |
264 |
// set column index scanning rows in reverse order |
265 |
for (i = nvar-1; i >= 0; i--) |
266 |
{ |
267 |
for (j=h_init[i]; j < h_stop[i]; j++) |
268 |
{ |
269 |
k = h_index[j]; |
270 |
m = c_init[k]-1; |
271 |
c_init[k] = m; |
272 |
c_index[m] = i; |
273 |
c_value[m] = j; |
274 |
} |
275 |
} |
276 |
for(i=0; i < nvar; i++) |
277 |
{ |
278 |
c_stop[i] = c_init[i] + c_stop[i]-1; |
279 |
} |
280 |
} |
281 |
// ============================== |
282 |
void precond(int method, int iter, int nvar, double *s, double *r, |
283 |
double *h, int *h_init, int *h_stop,int *h_index, |
284 |
double *h_diag) |
285 |
{ |
286 |
int i,j,k,ii,kk,iii,kkk,nblock,ix,iy,iz,icount; |
287 |
int *c_init, *c_stop; // c_init[maxvar],c_stop[maxvar]; |
288 |
double a[6],b[3]; |
289 |
double omega,factor,*diag; // diag[maxvar]; |
290 |
double maxalpha, alpha, f_i, f_k; |
291 |
int maxvar; |
292 |
|
293 |
maxvar = 3*natom; |
294 |
c_init = ivector(0,maxvar); |
295 |
c_stop = ivector(0,maxvar); |
296 |
diag = dvector(0,maxvar); |
297 |
|
298 |
|
299 |
// no preconditioning |
300 |
if (method == NONE) |
301 |
{ |
302 |
for (i=0; i < nvar; i++) |
303 |
s[i] = r[i]; |
304 |
} |
305 |
// diagonal |
306 |
if (method == DIAG) |
307 |
{ |
308 |
for (i=0; i < nvar; i++) |
309 |
s[i] = r[i]/ fabs(h_diag[i]); |
310 |
} |
311 |
// block diagonal |
312 |
if (method == BLOCK) |
313 |
{ |
314 |
nblock = 3; |
315 |
for (i=1; i <= natom; i++) |
316 |
{ |
317 |
iz = (i-1)*3+2; |
318 |
iy = (i-1)*3+1; |
319 |
ix = (i-1)*3; |
320 |
a[0] = h_diag[ix]; |
321 |
if (h_index[h_init[ix]] == iy) |
322 |
a[1] = h[h_init[ix]]; |
323 |
else |
324 |
a[1] = 0.0; |
325 |
|
326 |
if (h_index[h_init[ix]+1] == iz) |
327 |
a[2] = h[h_init[ix]+1]; |
328 |
else |
329 |
a[2] = 0.0; |
330 |
|
331 |
a[3] = h_diag[iy]; |
332 |
if (h_index[h_init[iy]] == iz) |
333 |
a[4] = h[h_init[iy]]; |
334 |
else |
335 |
a[4] = 0.0; |
336 |
|
337 |
a[5] = h_diag[iz]; |
338 |
b[0] = r[ix]; |
339 |
b[1] = r[iy]; |
340 |
b[2] = r[iz]; |
341 |
cholesky (nblock,a,b); |
342 |
s[ix] = b[0]; |
343 |
s[iy] = b[1]; |
344 |
s[iz] = b[2]; |
345 |
} |
346 |
} |
347 |
|
348 |
// SSOR |
349 |
if (method == SSOR) |
350 |
{ |
351 |
omega = 1.0; |
352 |
factor = 2.0 - omega; |
353 |
for (i=0; i < nvar; i++) |
354 |
{ |
355 |
s[i] = r[i] * factor; |
356 |
diag[i] = h_diag[i] / omega; |
357 |
} |
358 |
for (i=0; i < nvar; i++) |
359 |
{ |
360 |
s[i] = s[i] / diag[i]; |
361 |
for (j=h_init[i]; j< h_stop[i]; j++) |
362 |
{ |
363 |
k = h_index[j]; |
364 |
s[k] = s[k] - h[j]*s[i]; |
365 |
} |
366 |
} |
367 |
for (i=nvar-1; i >= 0; i--) |
368 |
{ |
369 |
s[i] = s[i] * diag[i]; |
370 |
for (j=h_init[i]; j< h_stop[i]; j++) |
371 |
{ |
372 |
k = h_index[j]; |
373 |
s[i] = s[i] - h[j]*s[k]; |
374 |
} |
375 |
s[i] = s[i] / diag[i]; |
376 |
} |
377 |
} |
378 |
// incomplete cholesky preconditioning |
379 |
if (method == ICCG && iter == 0) |
380 |
{ |
381 |
column (nvar,h_init,h_stop,h_index, |
382 |
c_init,c_stop,c_index,c_value); |
383 |
stable = TRUE; |
384 |
icount = 0; |
385 |
maxalpha = 2.1; |
386 |
alpha = -0.001; |
387 |
L_10: |
388 |
if (alpha <= 0.0) |
389 |
alpha = alpha + 0.001; |
390 |
else |
391 |
alpha = 2.0 * alpha; |
392 |
|
393 |
if (alpha > maxalpha) |
394 |
{ |
395 |
stable = FALSE; |
396 |
fprintf(pcmlogfile,"Precond - Incomplete Cholesky is Unstable\n"); |
397 |
} else |
398 |
{ |
399 |
factor = 1.0 + alpha; |
400 |
for (i=0; i < nvar; i++) |
401 |
{ |
402 |
f_diag[i] = factor * h_diag[i]; |
403 |
for(j = c_init[i]; j<= c_stop[i]; j++) |
404 |
{ |
405 |
k = c_index[j]; |
406 |
f_i = f[c_value[j]]; |
407 |
f_diag[i] = f_diag[i] - (f_i*f_i*f_diag[k]); |
408 |
icount++; |
409 |
} |
410 |
if (f_diag[i] <= 0.0) goto L_10; |
411 |
if (f_diag[i] < 1.0e-7) f_diag[i] = 1.0e-7; |
412 |
f_diag[i] = 1.0 / f_diag[i]; |
413 |
for( j = h_init[i]; j < h_stop[i]; j++) |
414 |
{ |
415 |
k = h_index[j]; |
416 |
f[j] = h[j]; |
417 |
ii = c_init[i]; |
418 |
kk = c_init[k]; |
419 |
while (ii <= c_stop[i] && kk <= c_stop[k]) |
420 |
{ |
421 |
iii = c_index[ii]; |
422 |
kkk = c_index[kk]; |
423 |
if (iii < kkk) |
424 |
ii = ii + 1; |
425 |
else if (kkk < iii) |
426 |
kk = kk + 1; |
427 |
else |
428 |
{ |
429 |
f_i = f[c_value[ii]]; |
430 |
f_k = f[c_value[kk]]; |
431 |
f[j] = f[j] - (f_i*f_k*f_diag[iii]); |
432 |
ii = ii + 1; |
433 |
kk = kk + 1; |
434 |
icount = icount + 1; |
435 |
} |
436 |
} |
437 |
} |
438 |
} |
439 |
} |
440 |
} |
441 |
if (method == ICCG ) |
442 |
{ |
443 |
if (stable) |
444 |
{ |
445 |
for (i=0; i < nvar; i++) |
446 |
s[i] = r[i]; |
447 |
|
448 |
for (i=0; i < nvar; i++) |
449 |
{ |
450 |
s[i] = s[i] * f_diag[i]; |
451 |
for( j = h_init[i]; j < h_stop[i]; j++) |
452 |
{ |
453 |
k = h_index[j]; |
454 |
s[k] = s[k] - f[j]*s[i]; |
455 |
} |
456 |
} |
457 |
for (i = nvar-1; i >= 0; i--) |
458 |
{ |
459 |
s[i] = s[i] / f_diag[i]; |
460 |
for( j = h_init[i]; j < h_stop[i]; j++) |
461 |
{ |
462 |
k = h_index[j]; |
463 |
s[i] = s[i] - f[j]*s[k]; |
464 |
} |
465 |
s[i] = s[i] * f_diag[i]; |
466 |
} |
467 |
} else |
468 |
{ |
469 |
for (i=0; i < nvar; i++) |
470 |
s[i] = r[i] / fabs(h_diag[i]); |
471 |
} |
472 |
} |
473 |
free_ivector(c_init ,0,maxvar); |
474 |
free_ivector(c_stop ,0,maxvar); |
475 |
free_dvector(diag ,0,maxvar); |
476 |
} |
477 |
|
478 |
void cholesky(int nvar, double *a, double *b) |
479 |
{ |
480 |
int i,j,k,ii,ij,ik,im,jk,jm,ki,kk; |
481 |
double r,sa,t; |
482 |
|
483 |
ii = 0; |
484 |
for (i=0; i < nvar; i++) |
485 |
{ |
486 |
im = i - 1; |
487 |
if (i != 0) |
488 |
{ |
489 |
ij = i; |
490 |
for( j = 0; j < im; j++) |
491 |
{ |
492 |
r = a[ij]; |
493 |
if (j != 0) |
494 |
{ |
495 |
ik = i; |
496 |
jk = j; |
497 |
jm = j - 1; |
498 |
for ( k = 0; k < jm; k++) |
499 |
{ |
500 |
r = r - a[ik]*a[jk]; |
501 |
ik = nvar-1 - k + ik; |
502 |
jk = nvar-1 - k + jk; |
503 |
} |
504 |
} |
505 |
a[ij] = r; |
506 |
ij = nvar-1 - j + ij; |
507 |
} |
508 |
} |
509 |
r = a[ii]; |
510 |
if (i != 0) |
511 |
{ |
512 |
kk = 0; |
513 |
ik = i; |
514 |
for (k = 0; k < im; k++) |
515 |
{ |
516 |
sa = a[ik]; |
517 |
t = sa * a[kk]; |
518 |
a[ik] = t; |
519 |
r = r - sa*t; |
520 |
ik = nvar-1 - k + ik; |
521 |
kk = nvar-1 - k +1 + kk; |
522 |
} |
523 |
} |
524 |
a[ii] = 1.0/ r; |
525 |
ii = nvar-1 - i + 1 + ii; |
526 |
} |
527 |
|
528 |
// solve linear equations; first solve Ly = b for y |
529 |
|
530 |
for (i=0; i < nvar; i++) |
531 |
{ |
532 |
if (i != 0) |
533 |
{ |
534 |
ik = i; |
535 |
im = i - 1; |
536 |
r = b[i]; |
537 |
for (k = 0; k < im; k++) |
538 |
{ |
539 |
r = r - b[k]*a[ik]; |
540 |
ik = nvar-1 - k + ik; |
541 |
} |
542 |
b[i] = r; |
543 |
} |
544 |
} |
545 |
|
546 |
// now, solve (D)(L transpose)(x) = y for x |
547 |
|
548 |
ii = (nvar*(nvar+1)/2)-1; |
549 |
for ( j = 0; j < nvar; j++) |
550 |
{ |
551 |
i = nvar-1 - j; |
552 |
r = b[i] * a[ii]; |
553 |
if (j != 0) |
554 |
{ |
555 |
im = i + 1; |
556 |
ki = ii + 1; |
557 |
for( k = im; k < nvar; k++) |
558 |
{ |
559 |
r = r - a[ki]*b[k]; |
560 |
ki++; |
561 |
} |
562 |
} |
563 |
b[i] = r; |
564 |
ii = ii - j - 2; |
565 |
} |
566 |
} |