1 |
#define EXTERN extern |
2 |
|
3 |
// mode |
4 |
#define NONE 0 |
5 |
#define NEWTON 1 |
6 |
#define TNCG 2 |
7 |
#define DTNCG 3 |
8 |
#define AUTO 4 |
9 |
// method |
10 |
#define DIAG 5 |
11 |
#define SSOR 6 |
12 |
#define ICCG 7 |
13 |
#define BLOCK 8 |
14 |
|
15 |
|
16 |
#include "pcwin.h" |
17 |
#include "pcmod.h" |
18 |
|
19 |
#include "energies.h" |
20 |
#include "derivs.h" |
21 |
#include "utility.h" |
22 |
|
23 |
//#define min(a,b) (((a) < (b) ? (a) : (b)) |
24 |
|
25 |
EXTERN struct t_minvar{ |
26 |
double cappa, stpmin, stpmax, angmax; |
27 |
int intmax; |
28 |
} minvar; |
29 |
|
30 |
void solve(int,int,int,int, double *,double *,double *,double *,int *, |
31 |
int *,int *,double *,int *,int *,int *, double (*)()); |
32 |
|
33 |
|
34 |
static void precond(int,int ,int ,double *,double *,double *,int *,int *,int *,double *); |
35 |
static void cholesky(int , double *, double *); |
36 |
static void column(int, int *, int *,int *,int *,int *,int *, int *); |
37 |
|
38 |
int stable; |
39 |
double *f, *f_diag; |
40 |
int *c_index, *c_value; |
41 |
|
42 |
void solve(int mode,int method,int negtest,int nvar,double *p,double *x, |
43 |
double *g, double *h, int *h_init, int *h_stop, int *h_index, |
44 |
double *h_diag,int *icycle,int *iter_cg,int *fg_call,double (*fgvalue) ()) |
45 |
{ |
46 |
int i,j,k, iter,maxiter,maxhess; |
47 |
double *m, *d, *si, *r, *q; // |
48 |
double alpha,beta,delta,sigma; |
49 |
double f_sigma; |
50 |
double *x_sigma; |
51 |
double *g_sigma; |
52 |
double g_norm,g_rms,eps,converge; |
53 |
double hj,gg,dq,rr,dd,rs,rs_new,r_norm; |
54 |
int maxvar = 3*natom; |
55 |
|
56 |
if (natom < 300) |
57 |
maxhess = (3*natom*(3*natom-1))/2; |
58 |
else if (natom < 800) |
59 |
maxhess = (3*natom*(3*natom-1))/3; |
60 |
else |
61 |
maxhess = (3*natom*(3*natom-1))/20; |
62 |
|
63 |
|
64 |
m = dvector(0,maxvar); |
65 |
r = dvector(0,maxvar); |
66 |
si = dvector(0,maxvar); |
67 |
d = dvector(0,maxvar); |
68 |
q = dvector(0,maxvar); |
69 |
x_sigma = dvector(0,maxvar); |
70 |
g_sigma = dvector(0,maxvar); |
71 |
f_diag = dvector(0,maxvar); |
72 |
f = dvector(0,maxhess); |
73 |
c_index = ivector(0,maxhess); |
74 |
c_value = ivector(0,maxhess); |
75 |
|
76 |
if (mode != DTNCG && method != NONE) |
77 |
{ |
78 |
for (i=0; i < nvar; i++) |
79 |
m[i] = 1.0 / sqrt(fabs(h_diag[i])); |
80 |
|
81 |
for (i=0; i < nvar; i++) |
82 |
{ |
83 |
g[i] = g[i] * m[i]; |
84 |
h_diag[i] *= m[i] * m[i]; |
85 |
for (j = h_init[i]; j < h_stop[i]; j++) |
86 |
{ |
87 |
k = h_index[j]; |
88 |
h[j] *= m[i] * m[k]; |
89 |
} |
90 |
} |
91 |
} |
92 |
|
93 |
iter = 0; |
94 |
gg = 0.0; |
95 |
for (i=0; i < nvar; i++) |
96 |
{ |
97 |
p[i] = 0.0; |
98 |
r[i] = -g[i]; |
99 |
gg += g[i]*g[i]; |
100 |
} |
101 |
g_norm = sqrt(gg); |
102 |
precond(method, iter,nvar, si, r, h, h_init, h_stop, h_index, h_diag); |
103 |
rs = 0.0; |
104 |
for (i=0; i < nvar; i++) |
105 |
{ |
106 |
d[i] = si[i]; |
107 |
rs += r[i]*si[i]; |
108 |
} |
109 |
|
110 |
if (mode == NEWTON) |
111 |
{ |
112 |
eps = 1.0e-10; |
113 |
maxiter = nvar; |
114 |
} else if (mode == TNCG || mode == DTNCG) |
115 |
{ |
116 |
delta = 1.0; |
117 |
eps = delta/(double) *icycle; |
118 |
g_rms = g_norm/sqrt((double) nvar); |
119 |
eps = MinFun(eps,g_rms); |
120 |
converge = 1.0; |
121 |
maxiter = (int) (10.0*sqrt((double)nvar)); |
122 |
} |
123 |
iter = 1; |
124 |
|
125 |
// evaluate the matrix vector product |
126 |
while (TRUE) |
127 |
{ |
128 |
if (mode == TNCG || mode == NEWTON) |
129 |
{ |
130 |
for (i=0; i < nvar; i++) |
131 |
q[i] = 0.0; |
132 |
for (i=0; i < nvar; i++) |
133 |
{ |
134 |
q[i] += h_diag[i]*d[i]; |
135 |
for(j= h_init[i]; j < h_stop[i]; j++) |
136 |
{ |
137 |
k = h_index[j]; |
138 |
hj = h[j]; |
139 |
q[i] += hj*d[k]; |
140 |
q[k] += hj*d[i]; |
141 |
} |
142 |
} |
143 |
}else if (mode == DTNCG) |
144 |
{ |
145 |
dd = 0.0; |
146 |
for (i=0; i < nvar; i++) |
147 |
dd += d[i]*d[i]; |
148 |
|
149 |
sigma = 1.0e-7 / sqrt(dd); |
150 |
|
151 |
for (i=0; i < nvar; i++) |
152 |
x_sigma[i] = x[i] + sigma*d[i]; |
153 |
|
154 |
fg_call = fg_call + 1; |
155 |
f_sigma = fgvalue (x_sigma,g_sigma); |
156 |
|
157 |
for (i=0; i < nvar; i++) |
158 |
q[i] = (g_sigma[i]-g[i]) / sigma; |
159 |
} |
160 |
|
161 |
// test for direction of negative curvature |
162 |
dq = 0.0; |
163 |
for (i=0; i < nvar; i++) |
164 |
dq += d[i]*q[i]; |
165 |
|
166 |
if (negtest) |
167 |
{ |
168 |
if (dq <= 0.0) |
169 |
{ |
170 |
if (iter == 1) |
171 |
{ |
172 |
for (i=0; i < nvar; i++) |
173 |
p[i] = d[i]; |
174 |
} |
175 |
goto L_10; |
176 |
} |
177 |
} |
178 |
|
179 |
// test truncated Newton termination criterion |
180 |
alpha = rs / dq; |
181 |
rr = 0.0; |
182 |
for (i=0; i < nvar; i++) |
183 |
{ |
184 |
p[i] += alpha*d[i]; |
185 |
r[i] -= alpha*q[i]; |
186 |
rr += r[i]*r[i]; |
187 |
} |
188 |
r_norm = sqrt(rr); |
189 |
if (r_norm/g_norm <= eps) |
190 |
goto L_10; |
191 |
|
192 |
// precondition |
193 |
precond(method,iter,nvar, si, r,h,h_init,h_stop, |
194 |
h_index,h_diag); |
195 |
|
196 |
// update direction |
197 |
rs_new = 0.0; |
198 |
for (i=0; i < nvar; i++) |
199 |
rs_new += r[i]*si[i]; |
200 |
|
201 |
beta = rs_new/rs; |
202 |
rs = rs_new; |
203 |
|
204 |
for (i=0; i < nvar; i++) |
205 |
d[i] = si[i] + beta*d[i]; |
206 |
|
207 |
// check for overlimit next iteration |
208 |
if (iter >= maxiter) |
209 |
goto L_10; |
210 |
|
211 |
iter++; |
212 |
} |
213 |
|
214 |
// termination |
215 |
|
216 |
L_10: |
217 |
if (mode != DTNCG && method != NONE) |
218 |
{ |
219 |
for (i=0; i < nvar; i++) |
220 |
{ |
221 |
p[i] *= m[i]; |
222 |
g[i] /= m[i]; |
223 |
} |
224 |
} |
225 |
iter_cg += iter; |
226 |
free_dvector( m ,0,maxvar); |
227 |
free_dvector( r ,0,maxvar); |
228 |
free_dvector( si ,0,maxvar); |
229 |
free_dvector( d ,0,maxvar); |
230 |
free_dvector( q ,0,maxvar); |
231 |
free_dvector( x_sigma ,0,maxvar); |
232 |
free_dvector( g_sigma ,0,maxvar); |
233 |
free_dvector( f_diag ,0,maxvar); |
234 |
free_dvector( f ,0,maxhess); |
235 |
free_ivector( c_index ,0,maxhess); |
236 |
free_ivector( c_value ,0,maxhess); |
237 |
} |
238 |
//================================ |
239 |
// ============================== |
240 |
void column(int nvar, int *h_init, int *h_stop,int *h_index, |
241 |
int *c_init,int *c_stop,int *c_index, int *c_value) |
242 |
{ |
243 |
int i,j,k,m; |
244 |
|
245 |
for (i=0; i < nvar; i++) |
246 |
{ |
247 |
c_init[i] = 0; |
248 |
c_stop[i] = 0; |
249 |
} |
250 |
// count number of elements in each column |
251 |
for (i=0; i < nvar; i++) |
252 |
{ |
253 |
for (j=h_init[i]; j < h_stop[i]; j++) |
254 |
{ |
255 |
k = h_index[j]; |
256 |
c_stop[k]++; |
257 |
} |
258 |
} |
259 |
// set each beginning marker past the last element for its column |
260 |
c_init[0] = c_stop[0] + 1; |
261 |
for (i=1; i <nvar; i++) |
262 |
{ |
263 |
c_init[i] = c_init[i-1] + c_stop[i]; |
264 |
} |
265 |
// set column index scanning rows in reverse order |
266 |
for (i = nvar-1; i >= 0; i--) |
267 |
{ |
268 |
for (j=h_init[i]; j < h_stop[i]; j++) |
269 |
{ |
270 |
k = h_index[j]; |
271 |
m = c_init[k]-1; |
272 |
c_init[k] = m; |
273 |
c_index[m] = i; |
274 |
c_value[m] = j; |
275 |
} |
276 |
} |
277 |
for(i=0; i < nvar; i++) |
278 |
{ |
279 |
c_stop[i] = c_init[i] + c_stop[i]-1; |
280 |
} |
281 |
} |
282 |
// ============================== |
283 |
void precond(int method, int iter, int nvar, double *s, double *r, |
284 |
double *h, int *h_init, int *h_stop,int *h_index, |
285 |
double *h_diag) |
286 |
{ |
287 |
int i,j,k,ii,kk,iii,kkk,nblock,ix,iy,iz,icount; |
288 |
int *c_init, *c_stop; // c_init[maxvar],c_stop[maxvar]; |
289 |
double a[6],b[3]; |
290 |
double omega,factor,*diag; // diag[maxvar]; |
291 |
double maxalpha, alpha, f_i, f_k; |
292 |
int maxvar; |
293 |
|
294 |
maxvar = 3*natom; |
295 |
c_init = ivector(0,maxvar); |
296 |
c_stop = ivector(0,maxvar); |
297 |
diag = dvector(0,maxvar); |
298 |
|
299 |
|
300 |
// no preconditioning |
301 |
if (method == NONE) |
302 |
{ |
303 |
for (i=0; i < nvar; i++) |
304 |
s[i] = r[i]; |
305 |
} |
306 |
// diagonal |
307 |
if (method == DIAG) |
308 |
{ |
309 |
for (i=0; i < nvar; i++) |
310 |
s[i] = r[i]/ fabs(h_diag[i]); |
311 |
} |
312 |
// block diagonal |
313 |
if (method == BLOCK) |
314 |
{ |
315 |
nblock = 3; |
316 |
for (i=1; i <= natom; i++) |
317 |
{ |
318 |
iz = (i-1)*3+2; |
319 |
iy = (i-1)*3+1; |
320 |
ix = (i-1)*3; |
321 |
a[0] = h_diag[ix]; |
322 |
if (h_index[h_init[ix]] == iy) |
323 |
a[1] = h[h_init[ix]]; |
324 |
else |
325 |
a[1] = 0.0; |
326 |
|
327 |
if (h_index[h_init[ix]+1] == iz) |
328 |
a[2] = h[h_init[ix]+1]; |
329 |
else |
330 |
a[2] = 0.0; |
331 |
|
332 |
a[3] = h_diag[iy]; |
333 |
if (h_index[h_init[iy]] == iz) |
334 |
a[4] = h[h_init[iy]]; |
335 |
else |
336 |
a[4] = 0.0; |
337 |
|
338 |
a[5] = h_diag[iz]; |
339 |
b[0] = r[ix]; |
340 |
b[1] = r[iy]; |
341 |
b[2] = r[iz]; |
342 |
cholesky (nblock,a,b); |
343 |
s[ix] = b[0]; |
344 |
s[iy] = b[1]; |
345 |
s[iz] = b[2]; |
346 |
} |
347 |
} |
348 |
|
349 |
// SSOR |
350 |
if (method == SSOR) |
351 |
{ |
352 |
omega = 1.0; |
353 |
factor = 2.0 - omega; |
354 |
for (i=0; i < nvar; i++) |
355 |
{ |
356 |
s[i] = r[i] * factor; |
357 |
diag[i] = h_diag[i] / omega; |
358 |
} |
359 |
for (i=0; i < nvar; i++) |
360 |
{ |
361 |
s[i] = s[i] / diag[i]; |
362 |
for (j=h_init[i]; j< h_stop[i]; j++) |
363 |
{ |
364 |
k = h_index[j]; |
365 |
s[k] = s[k] - h[j]*s[i]; |
366 |
} |
367 |
} |
368 |
for (i=nvar-1; i >= 0; i--) |
369 |
{ |
370 |
s[i] = s[i] * diag[i]; |
371 |
for (j=h_init[i]; j< h_stop[i]; j++) |
372 |
{ |
373 |
k = h_index[j]; |
374 |
s[i] = s[i] - h[j]*s[k]; |
375 |
} |
376 |
s[i] = s[i] / diag[i]; |
377 |
} |
378 |
} |
379 |
// incomplete cholesky preconditioning |
380 |
if (method == ICCG && iter == 0) |
381 |
{ |
382 |
column (nvar,h_init,h_stop,h_index, |
383 |
c_init,c_stop,c_index,c_value); |
384 |
stable = TRUE; |
385 |
icount = 0; |
386 |
maxalpha = 2.1; |
387 |
alpha = -0.001; |
388 |
L_10: |
389 |
if (alpha <= 0.0) |
390 |
alpha = alpha + 0.001; |
391 |
else |
392 |
alpha = 2.0 * alpha; |
393 |
|
394 |
if (alpha > maxalpha) |
395 |
{ |
396 |
stable = FALSE; |
397 |
fprintf(pcmlogfile,"Precond - Incomplete Cholesky is Unstable\n"); |
398 |
} else |
399 |
{ |
400 |
factor = 1.0 + alpha; |
401 |
for (i=0; i < nvar; i++) |
402 |
{ |
403 |
f_diag[i] = factor * h_diag[i]; |
404 |
for(j = c_init[i]; j<= c_stop[i]; j++) |
405 |
{ |
406 |
k = c_index[j]; |
407 |
f_i = f[c_value[j]]; |
408 |
f_diag[i] = f_diag[i] - (f_i*f_i*f_diag[k]); |
409 |
icount++; |
410 |
} |
411 |
if (f_diag[i] <= 0.0) goto L_10; |
412 |
if (f_diag[i] < 1.0e-7) f_diag[i] = 1.0e-7; |
413 |
f_diag[i] = 1.0 / f_diag[i]; |
414 |
for( j = h_init[i]; j < h_stop[i]; j++) |
415 |
{ |
416 |
k = h_index[j]; |
417 |
f[j] = h[j]; |
418 |
ii = c_init[i]; |
419 |
kk = c_init[k]; |
420 |
while (ii <= c_stop[i] && kk <= c_stop[k]) |
421 |
{ |
422 |
iii = c_index[ii]; |
423 |
kkk = c_index[kk]; |
424 |
if (iii < kkk) |
425 |
ii = ii + 1; |
426 |
else if (kkk < iii) |
427 |
kk = kk + 1; |
428 |
else |
429 |
{ |
430 |
f_i = f[c_value[ii]]; |
431 |
f_k = f[c_value[kk]]; |
432 |
f[j] = f[j] - (f_i*f_k*f_diag[iii]); |
433 |
ii = ii + 1; |
434 |
kk = kk + 1; |
435 |
icount = icount + 1; |
436 |
} |
437 |
} |
438 |
} |
439 |
} |
440 |
} |
441 |
} |
442 |
// incomplete cholesky preconditioning (solution phase) |
443 |
|
444 |
if (method == ICCG) |
445 |
{ |
446 |
if (stable) |
447 |
{ |
448 |
for (i=0; i < nvar; i++) |
449 |
s[i] = r[i]; |
450 |
|
451 |
for (i=0; i < nvar; i++) |
452 |
{ |
453 |
s[i] = s[i] * f_diag[i]; |
454 |
for( j = h_init[i]; j < h_stop[i]; j++) |
455 |
{ |
456 |
k = h_index[j]; |
457 |
s[k] = s[k] - f[j]*s[i]; |
458 |
} |
459 |
} |
460 |
for (i = nvar-1; i >= 0; i--) |
461 |
{ |
462 |
s[i] = s[i] / f_diag[i]; |
463 |
for( j = h_init[i]; j < h_stop[i]; j++) |
464 |
{ |
465 |
k = h_index[j]; |
466 |
s[i] = s[i] - f[j]*s[k]; |
467 |
} |
468 |
s[i] = s[i] * f_diag[i]; |
469 |
} |
470 |
} else |
471 |
{ |
472 |
for (i=0; i < nvar; i++) |
473 |
s[i] = r[i] / fabs(h_diag[i]); |
474 |
} |
475 |
} |
476 |
free_ivector(c_init ,0,maxvar); |
477 |
free_ivector(c_stop ,0,maxvar); |
478 |
free_dvector(diag ,0,maxvar); |
479 |
} |
480 |
|
481 |
void cholesky(int nvar, double *a, double *b) |
482 |
{ |
483 |
int i,j,k,ii,ij,ik,im,jk,jm,ki,kk; |
484 |
double r,sa,t; |
485 |
|
486 |
ii = 0; |
487 |
for (i=0; i < nvar; i++) |
488 |
{ |
489 |
im = i - 1; |
490 |
if (i != 0) |
491 |
{ |
492 |
ij = i; |
493 |
for( j = 0; j < im; j++) |
494 |
{ |
495 |
r = a[ij]; |
496 |
if (j != 0) |
497 |
{ |
498 |
ik = i; |
499 |
jk = j; |
500 |
jm = j - 1; |
501 |
for ( k = 0; k < jm; k++) |
502 |
{ |
503 |
r = r - a[ik]*a[jk]; |
504 |
ik = nvar-1 - k + ik; |
505 |
jk = nvar-1 - k + jk; |
506 |
} |
507 |
} |
508 |
a[ij] = r; |
509 |
ij = nvar-1 - j + ij; |
510 |
} |
511 |
} |
512 |
r = a[ii]; |
513 |
if (i != 0) |
514 |
{ |
515 |
kk = 0; |
516 |
ik = i; |
517 |
for (k = 0; k < im; k++) |
518 |
{ |
519 |
sa = a[ik]; |
520 |
t = sa * a[kk]; |
521 |
a[ik] = t; |
522 |
r = r - sa*t; |
523 |
ik = nvar-1 - k + ik; |
524 |
kk = nvar-1 - k +1 + kk; |
525 |
} |
526 |
} |
527 |
a[ii] = 1.0/ r; |
528 |
ii = nvar-1 - i + 1 + ii; |
529 |
} |
530 |
|
531 |
// solve linear equations; first solve Ly = b for y |
532 |
|
533 |
for (i=0; i < nvar; i++) |
534 |
{ |
535 |
if (i != 0) |
536 |
{ |
537 |
ik = i; |
538 |
im = i - 1; |
539 |
r = b[i]; |
540 |
for (k = 0; k < im; k++) |
541 |
{ |
542 |
r = r - b[k]*a[ik]; |
543 |
ik = nvar-1 - k + ik; |
544 |
} |
545 |
b[i] = r; |
546 |
} |
547 |
} |
548 |
|
549 |
// now, solve (D)(L transpose)(x) = y for x |
550 |
|
551 |
ii = (nvar*(nvar+1)/2)-1; |
552 |
for ( j = 0; j < nvar; j++) |
553 |
{ |
554 |
i = nvar-1 - j; |
555 |
r = b[i] * a[ii]; |
556 |
if (j != 0) |
557 |
{ |
558 |
im = i + 1; |
559 |
ki = ii + 1; |
560 |
for( k = im; k < nvar; k++) |
561 |
{ |
562 |
r = r - a[ki]*b[k]; |
563 |
ki++; |
564 |
} |
565 |
} |
566 |
b[i] = r; |
567 |
ii = ii - j - 2; |
568 |
} |
569 |
} |