ViewVC Help
View File | Revision Log | Show Annotations | View Changeset | Root Listing
root/BioCocoa/BioSwarm/trunk/Models/SubcellularElementModel.m
Revision: 350
Committed: Wed Aug 19 16:45:07 2015 UTC (4 years, 9 months ago) by schristley
File size: 219520 byte(s)
Log Message:
MPI timing
Line File contents
1 /*
2 SubcellularElementModel.m
3
4 subcellular element method for cell spatial representation.
5
6 Copyright (C) 2010-2011 Scott Christley
7
8 Author: Scott Christley <schristley@mac.com>
9 Date: 2010
10
11 This file is part of the BioSwarm Framework.
12
13 This library is free software; you can redistribute it and/or
14 modify it under the terms of the GNU Lesser General Public
15 License as published by the Free Software Foundation; either
16 version 2 of the License, or (at your option) any later version.
17
18 This library is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 Library General Public License for more details.
22
23 You should have received a copy of the GNU Lesser General Public
24 License along with this library; if not, write to the Free
25 Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 Boston, MA 02111 USA.
27 */
28
29 #import "SubcellularElementModel.h"
30 #import "Grid2DArray.h"
31 #import "HybridModel.h"
32 #import "BiologicalModel.h"
33 #import "BioSwarmController.h"
34
35 #import "internal.h"
36
37 @implementation SubcellularElementModel
38
39 - (void)collectParameters
40 {
41 int i, j, k;
42
43 // total up the parameters
44 parameterNames = [NSMutableArray new];
45 [parameterNames addObject: @"MAX_RANGE_SQ"];
46
47 for (i = 0; i < [forces count]; ++i) {
48 NSMutableDictionary *d = [forces objectAtIndex: i];
49
50 if ([[d objectForKey: @"potential"] isEqualToString: @"D_MORSE"]
51 || [[d objectForKey: @"potential"] isEqualToString: @"PD_MORSE"]) {
52 [parameterNames addObject: [NSString stringWithFormat: @"%@_epsilon", [d objectForKey: @"name"]]];
53 [parameterNames addObject: [NSString stringWithFormat: @"%@_equilibrium", [d objectForKey: @"name"]]];
54 [parameterNames addObject: [NSString stringWithFormat: @"%@_interactionMax", [d objectForKey: @"name"]]];
55 }
56
57 if ([[d objectForKey: @"potential"] isEqualToString: @"MORSE"]
58 || [[d objectForKey: @"potential"] isEqualToString: @"PMORSE"]) {
59 [parameterNames addObject: [NSString stringWithFormat: @"%@_U0", [d objectForKey: @"name"]]];
60 [parameterNames addObject: [NSString stringWithFormat: @"%@_U1", [d objectForKey: @"name"]]];
61 [parameterNames addObject: [NSString stringWithFormat: @"%@_ETA0", [d objectForKey: @"name"]]];
62 [parameterNames addObject: [NSString stringWithFormat: @"%@_ETA1", [d objectForKey: @"name"]]];
63 }
64 }
65 numOfParameters = [parameterNames count];
66 }
67
68 - initWithModel: (NSDictionary *)aModel andKey: (NSString *)aKey andController: anObj andParent: aParent
69 {
70 NSString *param;
71 NSArray *a;
72 id pm;
73 int i, j;
74
75 [super initWithModel: aModel andKey: aKey andController: anObj andParent: aParent];
76
77 param = [model objectForKey: @"numOfCells"];
78 if (param) numOfCells = [param intValue];
79 else numOfCells = [aParent numOfModelObjects];
80 //CHECK_PARAM(param, "numOfCells");
81 //numOfCells = [param intValue];
82
83 param = [model objectForKey: @"maxCells"];
84 if (param) maxCells = [param intValue];
85 else {
86 maxCells = numOfCells;
87 printf("WARNING: maxCells not defined, using maxCells = %d\n", maxCells);
88 }
89
90 param = [model objectForKey: @"initialElements"];
91 CHECK_PARAM(param, "initialElements");
92 initialElements = [param intValue];
93
94 param = [model objectForKey: @"numOfElements"];
95 CHECK_PARAM(param, "numOfElements");
96 numElements = [param intValue];
97
98 param = [model objectForKey: @"maxElements"];
99 if (!param) maxElements = initialElements;
100 else maxElements = [param intValue];
101 if (maxElements < (initialElements * maxCells)) {
102 printf("WARNING: maxElements less than (initialElements * maxCells), increasing maxElements = %d\n", initialElements * maxCells);
103 maxElements = initialElements * maxCells;
104 }
105
106 param = [model objectForKey: @"maxElementTypes"];
107 if (!param) {
108 maxElementTypes = 32;
109 printf("WARNING: maxElementTypes not defined, using maxElementTypes = %d\n", maxElementTypes);
110 } else maxElementTypes = [param intValue];
111
112 param = [model objectForKey: @"maxForces"];
113 if (!param) {
114 maxForces = 32;
115 printf("WARNING: maxForces not defined, using maxForces = %d\n", maxForces);
116 } else maxForces = [param intValue];
117
118 param = [model objectForKey: @"maxSectors"];
119 if (!param) {
120 maxSectors = 4096;
121 printf("WARNING: maxSectors not defined, using maxSectors = %d\n", maxSectors);
122 } else maxSectors = [param intValue];
123
124 param = [model objectForKey: @"maxElementsPerSector"];
125 if (!param) {
126 maxElementsPerSector = 1024;
127 printf("WARNING: maxElementsPerSector not defined, using maxElementsPerSector = %d\n", maxSectors);
128 } else maxElementsPerSector = [param intValue];
129
130 readOnlySectorBorder = NO;
131 param = [model objectForKey: @"readOnlySectorBorder"];
132 if (param) readOnlySectorBorder = [param boolValue];
133 if ([self isDebug]) printf("readOnlySectorBorder = %d\n", readOnlySectorBorder);
134
135 sectorSize = 0;
136
137 // cell parameters
138 param = [model objectForKey: @"cellRadius"];
139 CHECK_PARAM(param, "cellRadius");
140 cellRadius = [param doubleValue];
141
142 // physical parameters
143 param = [model objectForKey: @"viscousTimescale"];
144 CHECK_PARAM(param, "viscousTimescale");
145 viscousTimescale = [param doubleValue];
146
147 param = [model objectForKey: @"elasticModulus"];
148 CHECK_PARAM(param, "elasticModulus");
149 elasticModulus = [param doubleValue];
150
151 useMesh = NO;
152 param = [model objectForKey: @"mesh"];
153 CHECK_PARAM(param, "mesh");
154 useMesh = [param boolValue];
155
156 // force parameters
157 a = [model objectForKey: @"forces"];
158 CHECK_PARAM(a, "forces");
159 forces = [NSMutableArray new];
160 for (i = 0; i < [a count]; ++i)
161 [forces addObject: [NSMutableDictionary dictionaryWithDictionary: [a objectAtIndex: i]]];
162
163 int dims = [[super spatialModel] dimensions];
164 if ((dims != 2) && (dims != 3)) {
165 printf("ERROR: Only 2d and 3d supported for SubcellularElementModel\n");
166 return nil;
167 }
168
169 // numerical constants
170 double pi=4.0*atan(1.0);
171 double ot=1.0/3.0;
172 double p3=pi/(3.0*sqrt(2.0));
173 double p2=pi/(2.0*sqrt(3.0));
174
175 // derived quantities
176 double kappa_cell=elasticModulus*cellRadius;
177 double kappa_element=kappa_cell/pow(numElements,ot);
178 double damping_cell=viscousTimescale*kappa_cell;
179 double damping_element=damping_cell/numElements;
180
181 interactionMaxSquared = 0.0;
182 numOfParameters = 1;
183 forceMap = [NSMutableDictionary new];
184 forceTypeMap = [NSMutableDictionary new];
185 for (i = 0; i < [forces count]; ++i) {
186 NSMutableDictionary *d = [forces objectAtIndex: i];
187 NSString *forceName = [d objectForKey: @"name"];
188 if (!forceName) {
189 printf("ERROR: force is missing 'name' attribute.\n");
190 return nil;
191 }
192 if ([forceMap objectForKey: forceName]) {
193 printf("ERROR: Two forces with same name: %s\n", [forceName UTF8String]);
194 return nil;
195 }
196
197 if ([[d objectForKey: @"potential"] isEqualToString: @"D_MORSE"]
198 || [[d objectForKey: @"potential"] isEqualToString: @"PD_MORSE"]) {
199 double epsilon = [[d objectForKey: @"epsilon"] doubleValue];
200 double equilibrium = [[d objectForKey: @"equilibrium"] doubleValue];
201 double interactionMax = [[d objectForKey: @"interactionMax"] doubleValue];
202
203 // look for parameter overrides
204 NSString *s = [NSString stringWithFormat: @"%@_epsilon", forceName];
205 param = [model objectForKey: s];
206 if (param) epsilon = [param doubleValue];
207
208 s = [NSString stringWithFormat: @"%@_equilibrium", forceName];
209 param = [model objectForKey: s];
210 if (param) equilibrium = [param doubleValue];
211
212 s = [NSString stringWithFormat: @"%@_interactionMax", forceName];
213 param = [model objectForKey: s];
214 if (param) interactionMax = [param doubleValue];
215
216 double r_equil, eqSquared, rho, pot_min, factor;
217 if (dims == 2) {
218 r_equil = equilibrium*cellRadius*sqrt(12*p2/(double)numElements);
219 eqSquared = r_equil*r_equil;
220 rho = -(1.0/(interactionMax*interactionMax-1.0))*log(1.0-sqrt(1.0-epsilon));
221 pot_min=(kappa_element/8)*(r_equil/rho)*(r_equil/rho);
222 factor = 4.0*pot_min*rho/(r_equil*r_equil)/damping_element;
223
224 printf("For 2D\n");
225 printf("%s rho = %lf\n", [forceName UTF8String], rho);
226 printf("%s factor = %lf\n", [forceName UTF8String], factor);
227 printf("%s equilibrium point sq = %lf\n", [forceName UTF8String], eqSquared);
228 } else {
229 r_equil = equilibrium*cellRadius*pow(p3/(double)numElements,ot);
230 eqSquared = r_equil*r_equil;
231 rho = -(1.0/(interactionMax*interactionMax-1.0))*log(1.0-sqrt(1.0-epsilon));
232 pot_min=(kappa_element/8)*(r_equil/rho)*(r_equil/rho);
233 factor = 4.0*pot_min*rho/(r_equil*r_equil)/damping_element;
234
235 printf("For 3D\n");
236 printf("%s rho = %lf\n", [forceName UTF8String], rho);
237 printf("%s factor = %lf\n", [forceName UTF8String], factor);
238 printf("%s equilibrium point sq = %lf\n", [forceName UTF8String], eqSquared);
239 }
240
241 // the smallest sector size based upon forces
242 if (i == 0) sectorSize = 1.1 * interactionMax * r_equil;
243 else if (sectorSize > (1.1 * interactionMax * r_equil))
244 sectorSize = 1.1 * interactionMax * r_equil;
245
246 double imax = interactionMax*interactionMax*r_equil*r_equil;
247 if (imax > interactionMaxSquared) interactionMaxSquared = imax;
248
249 [d setObject: [NSNumber numberWithDouble: rho] forKey: @"rho"];
250 [d setObject: [NSNumber numberWithDouble: factor] forKey: @"factor"];
251 [d setObject: [NSNumber numberWithDouble: eqSquared] forKey: @"eqSquared"];
252
253 [forceMap setObject: [NSNumber numberWithInt: numOfParameters] forKey: forceName];
254 if ([[d objectForKey: @"potential"] isEqualToString: @"D_MORSE"])
255 [forceTypeMap setObject: [NSNumber numberWithInt: 0] forKey: forceName];
256 else
257 [forceTypeMap setObject: [NSNumber numberWithInt: 1] forKey: forceName];
258 numOfParameters += 3;
259 }
260
261 if ([[d objectForKey: @"potential"] isEqualToString: @"MORSE"]
262 || [[d objectForKey: @"potential"] isEqualToString: @"PMORSE"]) {
263
264 NSString *s = [NSString stringWithFormat: @"%@_U0", forceName];
265 param = [model objectForKey: s];
266 CHECK_PARAM(param, [s UTF8String]);
267 if (param) [d setObject: param forKey: s];
268
269 s = [NSString stringWithFormat: @"%@_U1", forceName];
270 param = [model objectForKey: s];
271 CHECK_PARAM(param, [s UTF8String]);
272 if (param) [d setObject: param forKey: s];
273
274 s = [NSString stringWithFormat: @"%@_ETA0", forceName];
275 param = [model objectForKey: s];
276 CHECK_PARAM(param, [s UTF8String]);
277 if (param) [d setObject: param forKey: s];
278
279 s = [NSString stringWithFormat: @"%@_ETA1", forceName];
280 param = [model objectForKey: s];
281 CHECK_PARAM(param, [s UTF8String]);
282 if (param) [d setObject: param forKey: s];
283
284 [forceMap setObject: [NSNumber numberWithInt: numOfParameters] forKey: forceName];
285 if ([[d objectForKey: @"potential"] isEqualToString: @"MORSE"])
286 [forceTypeMap setObject: [NSNumber numberWithInt: 2] forKey: forceName];
287 else
288 [forceTypeMap setObject: [NSNumber numberWithInt: 3] forKey: forceName];
289 numOfParameters += 4;
290 }
291 }
292 printf("max range sq = %lf\n", interactionMaxSquared);
293
294 if (sectorSize == 0) {
295 printf("ERROR: sectorSize is zero.\n");
296 return nil;
297 }
298 printf("initial sectorSize = %lf\n", sectorSize);
299 double *domainSize = [[super spatialModel] domainSize];
300 sectorsInDomain = floor(domainSize[0] / sectorSize);
301 sectorSize = domainSize[0] / sectorsInDomain;
302 printf("adjusted sectorSize = %lf, %d\n", sectorSize, sectorsInDomain);
303
304 elementForceTable = malloc(sizeof(int) * maxElementTypes * maxForces);
305 int (*EFT)[maxForces][maxElementTypes] = elementForceTable;
306 elementForceTypeTable = malloc(sizeof(int) * maxElementTypes * maxForces);
307 int (*EFTT)[maxForces][maxElementTypes] = elementForceTypeTable;
308 elementTypeTable = malloc(sizeof(int) * maxElementTypes * maxForces);
309 int (*ETT)[maxForces][maxElementTypes] = elementTypeTable;
310 elementForceFlagTable = malloc(sizeof(int) * maxElementTypes * maxForces);
311 int (*EFFT)[maxForces][maxElementTypes] = elementForceFlagTable;
312
313 int lastEntry[maxElementTypes];
314 for (i = 0; i < maxForces; ++i)
315 for (j = 0; j < maxElementTypes; ++j) {
316 (*EFT)[i][j] = -1;
317 (*EFTT)[i][j] = -1;
318 (*ETT)[i][j] = -1;
319 (*EFFT)[i][j] = -1;
320 lastEntry[j] = 0;
321 }
322
323 NSArray *forceSet = [model objectForKey: @"forceSet"];
324 for (i = 0; i < [forceSet count]; ++i) {
325 NSArray *forceEntry = [forceSet objectAtIndex: i];
326 int t1 = [[forceEntry objectAtIndex: 0] intValue];
327 int t2 = [[forceEntry objectAtIndex: 1] intValue];
328 NSString *fName = [forceEntry objectAtIndex: 2];
329 NSString *fType = [forceEntry objectAtIndex: 3];
330 if (![forceMap objectForKey: fName]) {
331 printf("ERROR: forceSet references unknown force: %s\n", [fName UTF8String]);
332 return nil;
333 }
334 int fNum = [[forceMap objectForKey: fName] intValue];
335 int ftNum = [[forceTypeMap objectForKey: fName] intValue];
336
337 (*EFT)[lastEntry[t1]][t1] = fNum;
338 (*EFTT)[lastEntry[t1]][t1] = ftNum;
339 (*ETT)[lastEntry[t1]][t1] = t2;
340 if ([fType isEqualToString: @"INTRA"]) (*EFFT)[lastEntry[t1]][t1] = FORCE_TYPE_INTRA;
341 else if ([fType isEqualToString: @"INTER"]) (*EFFT)[lastEntry[t1]][t1] = FORCE_TYPE_INTER;
342 else if ([fType isEqualToString: @"COLONY"]) (*EFFT)[lastEntry[t1]][t1] = FORCE_TYPE_COLONY;
343 else if ([fType isEqualToString: @"CELL_CENTER"]) {
344 (*EFFT)[lastEntry[t1]][t1] = FORCE_TYPE_CELL_CENTER;
345 calcCellCenters = YES;
346 } else if ([fType isEqualToString: @"ELEMENT_CENTER"]) {
347 (*EFFT)[lastEntry[t1]][t1] = FORCE_TYPE_ELEMENT_CENTER;
348 calcElementCenters = YES;
349 }
350
351 ++lastEntry[t1];
352 }
353
354 if ([self isDebug]) {
355 printf("elementForceTable =\n");
356 for (i = 0; i < maxForces; ++i) {
357 for (j = 0; j < maxElementTypes; ++j) {
358 if (j) printf(" ");
359 printf("%d", (*EFT)[i][j]);
360 }
361 printf("\n");
362 }
363 printf("elementForceTypeTable =\n");
364 for (i = 0; i < maxForces; ++i) {
365 for (j = 0; j < maxElementTypes; ++j) {
366 if (j) printf(" ");
367 printf("%d", (*EFTT)[i][j]);
368 }
369 printf("\n");
370 }
371 printf("elementTypeTable =\n");
372 for (i = 0; i < maxForces; ++i) {
373 for (j = 0; j < maxElementTypes; ++j) {
374 if (j) printf(" ");
375 printf("%d", (*ETT)[i][j]);
376 }
377 printf("\n");
378 }
379 printf("elementForceFlagTable =\n");
380 for (i = 0; i < maxForces; ++i) {
381 for (j = 0; j < maxElementTypes; ++j) {
382 if (j) printf(" ");
383 printf("%d", (*EFFT)[i][j]);
384 }
385 printf("\n");
386 }
387 }
388
389 // noise/diffusion parameters
390 param = [model objectForKey: @"useDiffusion"];
391 CHECK_PARAM(param, "useDiffusion");
392 useDiffusion = [param boolValue];
393
394 param = [model objectForKey: @"diffusionCoefficient"];
395 if (useDiffusion) CHECK_PARAM(param, "diffusionCoefficient");
396 diffusionCoefficient = [param doubleValue];
397
398 [self collectParameters];
399
400 #if 0
401 rho = (double *)malloc(sizeof(double) * [forces count]);
402 factor = (double *)malloc(sizeof(double) * [forces count]);
403 eqSquared = (double *)malloc(sizeof(double) * [forces count]);
404 interactionMaxSquared = 0.0;
405 for (i = 0; i < [forces count]; ++i) {
406 NSDictionary *d = [forces objectAtIndex: i];
407 double epsilon = [[d objectForKey: @"epsilon"] doubleValue];
408 double equilibrium = [[d objectForKey: @"equilibrium"] doubleValue];
409 double interactionMax = [[d objectForKey: @"interactionMax"] doubleValue];
410
411 double r_equil = equilibrium*cellRadius*pow(p3/(double)numElements,ot);
412 eqSquared[i] = r_equil*r_equil;
413 rho[i] = -(1.0/(interactionMax*interactionMax-1.0))*log(1.0-sqrt(1.0-epsilon));
414 double pot_min=(kappa_element/8)*(r_equil/rho[i])*(r_equil/rho[i]);
415 factor[i] = 4.0*pot_min*rho[i]/(r_equil*r_equil)/damping_element;
416
417 double imax = interactionMax*interactionMax*r_equil*r_equil;
418 if (imax > interactionMaxSquared) interactionMaxSquared = imax;
419
420 printf("%s rho = %lf\n", [[d objectForKey: @"type"] UTF8String], rho[i]);
421 printf("%s factor = %lf\n", [[d objectForKey: @"type"] UTF8String], factor[i]);
422 printf("%s equilibrium point sq = %lf\n", [[d objectForKey: @"type"] UTF8String], eqSquared[i]);
423 }
424 printf("max range sq = %lf\n", interactionMaxSquared);
425 #endif
426
427 // emperical
428 double optdt = 0.01*viscousTimescale/pow(numElements,2*ot);
429 if (dt > optdt) printf("WARNING: dt is larger than optimal empirical dt (%lf > %lf), system might not be stable.\n", dt, optdt);
430
431 // initialization
432 startRange = (double *)malloc(sizeof(double) * dims);
433 range = (double *)malloc(sizeof(double) * dims);
434 offset = (double *)malloc(sizeof(double) * dims);
435 startRange[0] = 0.0;
436 range[0] = 1.5;
437 offset[0] = 3.0;
438 startRange[1] = 0.0;
439 range[1] = 1.5;
440 offset[1] = 3.0;
441 if (dims == 3) {
442 startRange[2] = 0.05;
443 range[2] = 0.05;
444 offset[2] = 0.1;
445 }
446 randMult = 0.1;
447 randOffset = 0.05;
448
449 pm = [model objectForKey: @"initialize"];
450 if (pm) {
451 a = [pm objectForKey: @"startRange"];
452 CHECK_PARAM(a, "initialize startRange");
453 if (a) for (i = 0; i < [a count]; ++i) startRange[i] = [[a objectAtIndex: i] doubleValue];
454 a = [pm objectForKey: @"range"];
455 CHECK_PARAM(a, "initialize range");
456 if (a) for (i = 0; i < [a count]; ++i) range[i] = [[a objectAtIndex: i] doubleValue];
457 a = [pm objectForKey: @"offset"];
458 CHECK_PARAM(a, "initialize offset");
459 if (a) for (i = 0; i < [a count]; ++i) offset[i] = [[a objectAtIndex: i] doubleValue];
460 param = [pm objectForKey: @"randMult"];
461 CHECK_PARAM(param, "initialize randMult");
462 if (param) randMult = [param doubleValue];
463 param = [pm objectForKey: @"randOffset"];
464 CHECK_PARAM(param, "initialize randOffset");
465 if (param) randOffset = [param doubleValue];
466 sectorBorder = 0;
467 param = [pm objectForKey: @"sectorBorder"];
468 if (param) sectorBorder = [param intValue];
469 }
470
471 return self;
472 }
473
474 - (void)dealloc
475 {
476 if (elementData) [elementData release];
477 //if (numOfElements) free(numOfElements);
478 if (dataFile) fclose(dataFile);
479 //if (rho) free(rho);
480 //if (factor) free(factor);
481 //if (eqSquared) free(eqSquared);
482 if (range) free(range);
483 if (offset) free(offset);
484 if (forces) [forces release];
485 int i;
486 for (i = 0; i < 19; ++i) if (meshNeighbors[i]) free(meshNeighbors[i]);
487
488 if (incomingSectors) free(incomingSectors);
489 if (outgoingSectors) free(outgoingSectors);
490 if (incomingElements) free(incomingElements);
491 if (outgoingElements) free(outgoingElements);
492 if (availableElements) [availableElements release];
493
494 [super dealloc];
495 }
496
497 - (int)numOfCells { return numOfCells; }
498 //- (int)maxCells { return maxCells; }
499 //- (int *)numOfElements { return numOfElements; }
500 - (int)maxElements { return maxElements; }
501 - (int)totalElements { return totalElements; }
502 - (void *)elementX { return [[[elementData gridWithName: @"X"] objectForKey: @"matrix"] getMatrix]; }
503 - (void *)elementY { return [[[elementData gridWithName: @"Y"] objectForKey: @"matrix"] getMatrix]; }
504 - (void *)elementZ { return [[[elementData gridWithName: @"Z"] objectForKey: @"matrix"] getMatrix]; }
505 - (void *)elementType { return [[[elementData gridWithName: @"type"] objectForKey: @"matrix"] getMatrix]; }
506 - (void *)elementCellNumber { return [[[elementData gridWithName: @"cell"] objectForKey: @"matrix"] getMatrix]; }
507 - (double)maxInteractionRange { return interactionMaxSquared; }
508
509 - (int)maxElementsPerSector { return maxElementsPerSector; }
510 - (int)maxSectors { return maxSectors; }
511 - (double)sectorSize { return sectorSize; }
512 - (int)numSectors { return numSectors; }
513 - (int *)sectorNeighborTable { return sectorNeighborTable; }
514 - (int *)sectorCoordinates { return sectorCoordinates; }
515
516 - (int)numOfParameters
517 {
518 // just the derived quantities
519 return numOfParameters;
520 }
521
522 - (NSString *)nameOfParameter: (int)theParam
523 {
524 return [parameterNames objectAtIndex: theParam];
525 }
526
527 - (BOOL)setValue: (double)aValue forParameter: (NSString *)pName
528 {
529 printf("perturb parameter: %s = %f\n", [pName UTF8String], aValue);
530 if ([pName isEqualToString: @"MAX_RANGE_SQ"]) {
531 interactionMaxSquared = aValue;
532 return YES;
533 }
534
535 int i;
536 for (i = 0; i < [forces count]; ++i) {
537 NSMutableDictionary *d = [forces objectAtIndex: i];
538 NSString *forceName = [d objectForKey: @"name"];
539
540 if ([[d objectForKey: @"potential"] isEqualToString: @"D_MORSE"]
541 || [[d objectForKey: @"potential"] isEqualToString: @"PD_MORSE"]) {
542 // TODO: have to recalculate
543 return NO;
544 }
545
546 if ([[d objectForKey: @"potential"] isEqualToString: @"MORSE"]
547 || [[d objectForKey: @"potential"] isEqualToString: @"PMORSE"]) {
548 NSString *s = [NSString stringWithFormat: @"%@_U0", forceName];
549 if ([pName isEqualToString: s]) {
550 [d setObject: [NSNumber numberWithDouble: aValue] forKey: s];
551 //printf("found: %s\n", [[d description] UTF8String]);
552 return YES;
553 }
554
555 s = [NSString stringWithFormat: @"%@_U1", forceName];
556 if ([pName isEqualToString: s]) {
557 [d setObject: [NSNumber numberWithDouble: aValue] forKey: s];
558 return YES;
559 }
560
561 s = [NSString stringWithFormat: @"%@_ETA0", forceName];
562 if ([pName isEqualToString: s]) {
563 [d setObject: [NSNumber numberWithDouble: aValue] forKey: s];
564 return YES;
565 }
566
567 s = [NSString stringWithFormat: @"%@_ETA1", forceName];
568 if ([pName isEqualToString: s]) {
569 [d setObject: [NSNumber numberWithDouble: aValue] forKey: s];
570 return YES;
571 }
572 }
573 }
574
575 return NO;
576 }
577
578 //
579 //
580 //
581
582 // TODO
583 #if 0
584 #define X_RANGE 1.5
585 #define X_OFFSET 3.0
586 #define Y_RANGE 1.5
587 #define Y_OFFSET 3.0
588 #define Z_RANGE 0.05
589 #define Z_OFFSET 0.1
590 //#define RAND_MULT 0.1
591 //#define RAND_OFFSET 0.05
592 #define RAND_MULT 0.1
593 #define RAND_OFFSET 0.05
594 #endif
595
596 - (void)addElementForCell: (int)cellNum
597 {
598 int i;
599
600 if (cellNum >= numOfCells) return;
601
602 Grid2DArray *X = [[elementData gridWithName: @"X"] objectForKey: @"matrix"];
603 Grid2DArray *Y = [[elementData gridWithName: @"Y"] objectForKey: @"matrix"];
604 Grid2DArray *Z = [[elementData gridWithName: @"Z"] objectForKey: @"matrix"];
605 Grid2DArray *T = [[elementData gridWithName: @"type"] objectForKey: @"matrix"];
606 Grid2DArray *C = [[elementData gridWithName: @"cell"] objectForKey: @"matrix"];
607 int (*etGrid)[1][maxElements] = [T getMatrix];
608 int (*cGrid)[1][maxElements] = [C getMatrix];
609
610 SpatialModel *spatialModel = [super spatialModel];
611 int dims = [spatialModel dimensions];
612 double *domainSize = [spatialModel domainSize];
613
614 // put new element at random offset of first element
615 if ([dataEncode isEqual: [BioSwarmModel floatEncode]]) {
616 float (*xGrid)[1][maxElements] = [X getMatrix];
617 float (*yGrid)[1][maxElements] = [Y getMatrix];
618 float (*zGrid)[1][maxElements] = [Z getMatrix];
619
620 for (i = 0; i < totalElements; ++i) {
621 if ((*cGrid)[0][i] == cellNum) {
622 (*xGrid)[0][totalElements] = (*xGrid)[0][i] + RAND_NUM * randMult - randOffset;
623 if ((*xGrid)[0][totalElements] < 0.0) (*xGrid)[0][totalElements] = 0.0;
624 if ((*xGrid)[0][totalElements] > domainSize[0]) (*xGrid)[0][totalElements] = domainSize[0];
625 (*yGrid)[0][totalElements] = (*yGrid)[0][i] + RAND_NUM * randMult - randOffset;
626 if ((*yGrid)[0][totalElements] < 0.0) (*yGrid)[0][totalElements] = 0.0;
627 if ((*yGrid)[0][totalElements] > domainSize[1]) (*yGrid)[0][totalElements] = domainSize[1];
628 if (dims == 2) (*zGrid)[0][totalElements] = 0.0;
629 else {
630 (*zGrid)[0][totalElements] = (*zGrid)[0][i] + RAND_NUM * randMult - randOffset;
631 if ((*zGrid)[0][totalElements] < 0.0) (*zGrid)[0][totalElements] = 0.0;
632 if ((*zGrid)[0][totalElements] > domainSize[2]) (*zGrid)[0][totalElements] = domainSize[2];
633 }
634 (*etGrid)[0][totalElements] = 0;
635 (*cGrid)[0][totalElements] = cellNum;
636
637 printf("add element (%d) at (%f, %f, %f)\n", cellNum,
638 (*xGrid)[0][totalElements], (*yGrid)[0][totalElements],
639 (*zGrid)[0][totalElements]);
640
641 ++totalElements;
642 break;
643 }
644 }
645
646 } else if ([dataEncode isEqual: [BioSwarmModel doubleEncode]]) {
647 double (*xGrid)[1][maxElements] = [X getMatrix];
648 double (*yGrid)[1][maxElements] = [Y getMatrix];
649 double (*zGrid)[1][maxElements] = [Z getMatrix];
650
651 for (i = 0; i < totalElements; ++i) {
652 if ((*cGrid)[0][i] == cellNum) {
653 (*xGrid)[0][totalElements] = (*xGrid)[0][i] + RAND_NUM * randMult - randOffset;
654 if ((*xGrid)[0][totalElements] < 0.0) (*xGrid)[0][totalElements] = 0.0;
655 if ((*xGrid)[0][totalElements] > domainSize[0]) (*xGrid)[0][totalElements] = domainSize[0];
656 (*yGrid)[0][totalElements] = (*yGrid)[0][i] + RAND_NUM * randMult - randOffset;
657 if ((*yGrid)[0][totalElements] < 0.0) (*yGrid)[0][totalElements] = 0.0;
658 if ((*yGrid)[0][totalElements] > domainSize[1]) (*yGrid)[0][totalElements] = domainSize[1];
659 if (dims == 2) (*zGrid)[0][totalElements] = 0.0;
660 else {
661 (*zGrid)[0][totalElements] = (*zGrid)[0][i] + RAND_NUM * randMult - randOffset;
662 if ((*zGrid)[0][totalElements] < 0.0) (*zGrid)[0][totalElements] = 0.0;
663 if ((*zGrid)[0][totalElements] > domainSize[2]) (*zGrid)[0][totalElements] = domainSize[2];
664 }
665 (*etGrid)[0][totalElements] = 0;
666 (*cGrid)[0][totalElements] = cellNum;
667
668 printf("add element (%d) at (%f, %f, %f)\n", cellNum,
669 (*xGrid)[0][totalElements], (*yGrid)[0][totalElements],
670 (*zGrid)[0][totalElements]);
671
672 ++totalElements;
673 break;
674 }
675 }
676 }
677 }
678
679 - (void)divideCell: (int)cellNum
680 {
681 int i;
682
683 if (cellNum >= numOfCells) return;
684
685 Grid2DArray *X = [[elementData gridWithName: @"X"] objectForKey: @"matrix"];
686 Grid2DArray *Y = [[elementData gridWithName: @"Y"] objectForKey: @"matrix"];
687 Grid2DArray *Z = [[elementData gridWithName: @"Z"] objectForKey: @"matrix"];
688 Grid2DArray *C = [[elementData gridWithName: @"cell"] objectForKey: @"matrix"];
689 int (*cGrid)[1][maxElements] = [C getMatrix];
690
691 if ([dataEncode isEqual: [BioSwarmModel floatEncode]]) {
692 float (*xGrid)[1][maxElements] = [X getMatrix];
693 float (*yGrid)[1][maxElements] = [Y getMatrix];
694 float (*zGrid)[1][maxElements] = [Z getMatrix];
695
696 // count the elements for the cell
697 int n = 0;
698 for (i = 0; i < totalElements; ++i) {
699 if ((*cGrid)[0][i] == cellNum) ++n;
700 }
701 int newn = n / 2;
702 if (newn == 0) return;
703
704 // cleave cell in random angle of XY plane
705 // sort elements by translation of cleavage plane
706 sort_helper *sortHelper = (sort_helper *)malloc(sizeof(sort_helper) * n);
707 double theta = RAND_NUM * M_PI;
708 double costheta = cos(theta);
709 double sintheta = sin(theta);
710 int sidx = 0;
711 for (i = 0; i < totalElements; ++i) {
712 if ((*cGrid)[0][i] == cellNum) {
713 sortHelper[sidx].fValue = costheta * (*xGrid)[0][i] + sintheta * (*yGrid)[0][i];
714 sortHelper[sidx].idx = i;
715 ++sidx;
716 }
717 }
718 qsort(sortHelper, n, sizeof(sort_helper), &float_cmp_sort_helper);
719 for (i = 0; i < n; ++i) {
720 printf("%d: %f\n", sortHelper[i].idx, sortHelper[i].fValue);
721 }
722 printf("n = %d\n", n);
723
724 // assign elements to new cell
725 for (i = 0; i < newn; ++i) {
726 (*cGrid)[0][sortHelper[i].idx] = numOfCells;
727 }
728
729 for (i = 0; i < totalElements; ++i) {
730 if ((*cGrid)[0][i] == cellNum) {
731 printf("element (%d, %d) at (%f, %f, %f)\n", (*cGrid)[0][i], i,
732 (*xGrid)[0][i], (*yGrid)[0][i],
733 (*zGrid)[0][i]);
734 }
735 }
736
737 for (i = 0; i < totalElements; ++i) {
738 if ((*cGrid)[0][i] == numOfCells) {
739 printf("element (%d, %d) at (%f, %f, %f)\n", (*cGrid)[0][i], i,
740 (*xGrid)[0][i], (*yGrid)[0][i],
741 (*zGrid)[0][i]);
742 }
743 }
744
745 ++numOfCells;
746 free(sortHelper);
747
748 } else if ([dataEncode isEqual: [BioSwarmModel doubleEncode]]) {
749 printf("ERROR: double encoding not supported.\n");
750 }
751 }
752
753 //
754 // Simulation routines
755 //
756
757 - (void)setupDataFilesWithState: (int)aState
758 {
759 // data files
760 dataFileState = aState;
761
762 if (dataFile) {
763 fclose(dataFile);
764 dataFile = NULL;
765 }
766
767 if (meshFile) {
768 fclose(meshFile);
769 meshFile = NULL;
770 }
771
772 NSString *fileName, *meshFileName = nil;
773 if ((dataFileState == BCMODEL_READ_STATE) && ([modelController useInitializeFile])) {
774 fileName = [NSString stringWithFormat: @"%@_%@_cells.dat", [modelController initializeFile], name];
775 meshFileName = [NSString stringWithFormat: @"%@_%@_mesh.dat", [modelController initializeFile], name];
776 } else {
777 NSString *md = [modelController modelDirectory];
778 if (md) fileName = [NSString stringWithFormat: @"%@/%@_cells_%@.dat", md, name, outputSuffix];
779 else fileName = [NSString stringWithFormat: @"%@_cells_%@.dat", name, outputSuffix];
780 }
781 //printf("fileName = %s\n", [fileName UTF8String]);
782
783 switch (aState) {
784 case BCMODEL_NOFILE_STATE:
785 // no data files setup
786 break;
787 case BCMODEL_WRITE_STATE:
788 {
789 // data files for writing
790 dataFile = fopen([fileName UTF8String], "wb+");
791 if (dataFile == NULL) {
792 NSLog(@"Cannot open file: %@", fileName);
793 exit(0);
794 }
795 break;
796 }
797 case BCMODEL_READ_STATE:
798 {
799 // data files for reading
800 dataFile = fopen([fileName UTF8String], "rb+");
801 if (dataFile == NULL) {
802 NSLog(@"Cannot open file: %@", fileName);
803 exit(0);
804 }
805 if ((useMesh) & (meshFileName != nil)) {
806 meshFile = fopen([meshFileName UTF8String], "rb+");
807 if (meshFile == NULL) {
808 NSLog(@"Cannot open file: %@", meshFileName);
809 exit(0);
810 }
811 }
812 break;
813 }
814 }
815 }
816
817 - (void)allocateSimulationWithEncode: (NSString *)anEncode
818 {
819 //dataEncode = anEncode;
820
821 printf("SEM: allocateSimulationWithEncode:\n");
822 int i;
823 int dims = [[super spatialModel] dimensions];
824
825 if (elementData) {
826 [elementData release];
827 elementData = nil;
828 }
829
830 printf("childModels = %ld\n", [childModels count]);
831 printf("multiplicity = %d\n", multiplicity);
832
833 // use grid to store element data
834 elementData = [[MultiScale2DGrid alloc] initWithWidth: maxElements andHeight: 1];
835 [elementData newGridWithName: @"X" atScale: 1 withEncode: dataEncode];
836 [elementData newGridWithName: @"Y" atScale: 1 withEncode: dataEncode];
837 [elementData newGridWithName: @"Z" atScale: 1 withEncode: dataEncode];
838 [elementData newGridWithName: @"type" atScale: 1 withEncode: [BioSwarmModel intEncode]];
839 [elementData newGridWithName: @"cell" atScale: 1 withEncode: [BioSwarmModel intEncode]];
840 [elementData newGridWithName: @"sector" atScale: 1 withEncode: [BioSwarmModel intEncode]];
841
842 if (useMesh) {
843 for (i = 0; i < 19; ++i) meshNeighbors[i] = malloc(sizeof(int) * maxElements);
844 //printf("meshNeighbors[%d] = %p\n", i, meshNeighbors[i]);
845 }
846 //numOfElements = malloc(maxCells * sizeof(int));
847
848 // sector tables
849 if (sectorCoordinates) free(sectorCoordinates);
850 sectorCoordinates = malloc(maxSectors * 3 * sizeof(int));
851 for (i = 0; i < (maxSectors * 3); ++i) sectorCoordinates[i] = 0;
852
853 if (sectorReadOnlyFlag) free(sectorReadOnlyFlag);
854 sectorReadOnlyFlag = malloc(maxSectors * sizeof(int));
855 for (i = 0; i < maxSectors; ++i) sectorReadOnlyFlag[i] = 0;
856
857 if (sectorElementTable) free(sectorElementTable);
858 sectorElementTable = malloc(maxElementsPerSector * maxSectors * sizeof(int));
859 for (i = 0; i < (maxElementsPerSector * maxSectors); ++i) sectorElementTable[i] = -1;
860
861 if (sectorNeighborTable) free(sectorNeighborTable);
862 int nSize;
863 if (dims == 2) nSize = S_XY_TOT;
864 else nSize = S_XYZ_TOT;
865 sectorNeighborTable = malloc(maxSectors * nSize * sizeof(int));
866 for (i = 0; i < (maxSectors * nSize); ++i) sectorNeighborTable[i] = -1;
867 #if 0
868 // element neighbor table
869 if (elementNeighborTable) free(elementNeighborTable);
870 elementNeighborTable = malloc(maxElements * nSize * maxElementsPerSector * sizeof(int));
871 for (i = 0; i < (maxElements * nSize * maxElementsPerSector); ++i) elementNeighborTable[i] = -1;
872 #endif
873 }
874
875 - (BOOL)writeDataWithCheck: (BOOL)aFlag
876 {
877 if (dataFileState != BCMODEL_WRITE_STATE) return NO;
878
879 int i;
880 Grid2DArray *X = [[elementData gridWithName: @"X"] objectForKey: @"matrix"];
881 Grid2DArray *Y = [[elementData gridWithName: @"Y"] objectForKey: @"matrix"];
882 Grid2DArray *Z = [[elementData gridWithName: @"Z"] objectForKey: @"matrix"];
883 Grid2DArray *T = [[elementData gridWithName: @"type"] objectForKey: @"matrix"];
884 Grid2DArray *C = [[elementData gridWithName: @"cell"] objectForKey: @"matrix"];
885 //Grid2DArray *S = [[elementData gridWithName: @"sector"] objectForKey: @"matrix"];
886 int (*etGrid)[1][maxElements] = [T getMatrix];
887 int (*cGrid)[1][maxElements] = [C getMatrix];
888 //int (*sGrid)[maxElements] = [S getMatrix];
889
890 SpatialModel *spatialModel = [modelController spatialModel];
891 int dims = [spatialModel dimensions];
892 double *domainSize = [spatialModel domainSize];
893 double ct = [modelController currentTime];
894 if (isBinaryFile) {
895 // write data in binary file
896 if ([dataEncode isEqual: [BioSwarmModel floatEncode]]) {
897 float (*xGrid)[1][maxElements] = [X getMatrix];
898 float (*yGrid)[1][maxElements] = [Y getMatrix];
899 float (*zGrid)[1][maxElements] = [Z getMatrix];
900 float coords[3];
901
902 // only write out elements within domain
903 int countElements = 0;
904 for (i = 0; i < totalElements; ++i) {
905 coords[0] = (*xGrid)[0][i]; coords[1] = (*yGrid)[0][i]; coords[2] = (*zGrid)[0][i];
906 if ((coords[0] < 0) || (coords[0] >= domainSize[0])) continue;
907 if ((coords[1] < 0) || (coords[1] >= domainSize[1])) continue;
908 if ((dims == 3) && ((coords[2] < 0) || (coords[2] >= domainSize[2]))) continue;
909 ++countElements;
910 }
911
912 fwrite(&(countElements), sizeof(int), 1, dataFile);
913 for (i = 0; i < totalElements; ++i) {
914 coords[0] = (*xGrid)[0][i]; coords[1] = (*yGrid)[0][i]; coords[2] = (*zGrid)[0][i];
915 if ((coords[0] < 0) || (coords[0] >= domainSize[0])) continue;
916 if ((coords[1] < 0) || (coords[1] >= domainSize[1])) continue;
917 if ((dims == 3) && ((coords[2] < 0) || (coords[2] >= domainSize[2]))) continue;
918
919 if ([spatialModel isSubspace]) [spatialModel translateFloatCoordinatesToWorld: coords forModel: self];
920
921 fwrite(coords, sizeof(float), 3, dataFile);
922 fwrite(&((*etGrid)[0][i]), sizeof(int), 1, dataFile);
923 fwrite(&((*cGrid)[0][i]), sizeof(int), 1, dataFile);
924 //fwrite(&((*sGrid)[i]), sizeof(int), 1, dataFile);
925 }
926 } else if ([dataEncode isEqual: [BioSwarmModel doubleEncode]]) {
927 double (*xGrid)[1][maxElements] = [X getMatrix];
928 double (*yGrid)[1][maxElements] = [Y getMatrix];
929 double (*zGrid)[1][maxElements] = [Z getMatrix];
930 double coords[3];
931
932 // only write out elements within domain
933 int countElements = 0;
934 for (i = 0; i < totalElements; ++i) {
935 coords[0] = (*xGrid)[0][i]; coords[1] = (*yGrid)[0][i]; coords[2] = (*zGrid)[0][i];
936 if ((coords[0] < 0) || (coords[0] >= domainSize[0])) continue;
937 if ((coords[1] < 0) || (coords[1] >= domainSize[1])) continue;
938 if ((dims == 3) && ((coords[2] < 0) || (coords[2] >= domainSize[2]))) continue;
939 ++countElements;
940 }
941
942 fwrite(&(countElements), sizeof(int), 1, dataFile);
943 for (i = 0; i < totalElements; ++i) {
944 coords[0] = (*xGrid)[0][i]; coords[1] = (*yGrid)[0][i]; coords[2] = (*zGrid)[0][i];
945 if ((coords[0] < 0) || (coords[0] >= domainSize[0])) continue;
946 if ((coords[1] < 0) || (coords[1] >= domainSize[1])) continue;
947 if ((dims == 3) && ((coords[2] < 0) || (coords[2] >= domainSize[2]))) continue;
948
949 if ([spatialModel isSubspace]) [spatialModel translateDoubleCoordinatesToWorld: coords forModel: self];
950
951 fwrite(coords, sizeof(double), 3, dataFile);
952 //fwrite(&((*yGrid)[0][i]), sizeof(double), 1, dataFile);
953 //fwrite(&((*zGrid)[0][i]), sizeof(double), 1, dataFile);
954 fwrite(&((*etGrid)[0][i]), sizeof(int), 1, dataFile);
955 fwrite(&((*cGrid)[0][i]), sizeof(int), 1, dataFile);
956 //fwrite(&((*sGrid)[i]), sizeof(int), 1, dataFile);
957 }
958 }
959 fflush(dataFile);
960
961 } else {
962 // write data in text file
963 if ([dataEncode isEqual: [BioSwarmModel floatEncode]]) {
964 float (*xGrid)[1][maxElements] = [X getMatrix];
965 float (*yGrid)[1][maxElements] = [Y getMatrix];
966 float (*zGrid)[1][maxElements] = [Z getMatrix];
967 float coords[3];
968
969 // only write out elements within domain
970 int countElements = 0;
971 for (i = 0; i < totalElements; ++i) {
972 coords[0] = (*xGrid)[0][i]; coords[1] = (*yGrid)[0][i]; coords[2] = (*zGrid)[0][i];
973 if ((coords[0] < 0) || (coords[0] >= domainSize[0])) continue;
974 if ((coords[1] < 0) || (coords[1] >= domainSize[1])) continue;
975 if ((dims == 3) && ((coords[2] < 0) || (coords[2] >= domainSize[2]))) continue;
976 ++countElements;
977 }
978
979 //fprintf(dataFile, "%f %d", ct, totalElements);
980 fprintf(dataFile, "%f %d", ct, countElements);
981 for (i = 0; i < totalElements; ++i) {
982 coords[0] = (*xGrid)[0][i]; coords[1] = (*yGrid)[0][i]; coords[2] = (*zGrid)[0][i];
983 if ((coords[0] < 0) || (coords[0] >= domainSize[0])) continue;
984 if ((coords[1] < 0) || (coords[1] >= domainSize[1])) continue;
985 if ((dims == 3) && ((coords[2] < 0) || (coords[2] >= domainSize[2]))) continue;
986
987 if ([spatialModel isSubspace]) [spatialModel translateFloatCoordinatesToWorld: coords forModel: self];
988
989 //fprintf(dataFile, " %f %f %f %d %d %d", coords[0], coords[1], coords[2], (*etGrid)[0][i], (*cGrid)[0][i], (*sGrid)[i]);
990 fprintf(dataFile, " %f %f %f %d %d", coords[0], coords[1], coords[2], (*etGrid)[0][i], (*cGrid)[0][i]);
991 }
992 fprintf(dataFile, "\n");
993 } else if ([dataEncode isEqual: [BioSwarmModel doubleEncode]]) {
994 double (*xGrid)[1][maxElements] = [X getMatrix];
995 double (*yGrid)[1][maxElements] = [Y getMatrix];
996 double (*zGrid)[1][maxElements] = [Z getMatrix];
997 double coords[3];
998
999 // only write out elements within domain
1000 int countElements = 0;
1001 for (i = 0; i < totalElements; ++i) {
1002 coords[0] = (*xGrid)[0][i]; coords[1] = (*yGrid)[0][i]; coords[2] = (*zGrid)[0][i];
1003 if ((coords[0] < 0) || (coords[0] >= domainSize[0])) continue;
1004 if ((coords[1] < 0) || (coords[1] >= domainSize[1])) continue;
1005 if ((dims == 3) && ((coords[2] < 0) || (coords[2] >= domainSize[2]))) continue;
1006 ++countElements;
1007 }
1008
1009 fprintf(dataFile, "%f %d", ct, totalElements);
1010 for (i = 0; i < totalElements; ++i) {
1011 coords[0] = (*xGrid)[0][i]; coords[1] = (*yGrid)[0][i]; coords[2] = (*zGrid)[0][i];
1012 if ((coords[0] < 0) || (coords[0] >= domainSize[0])) continue;
1013 if ((coords[1] < 0) || (coords[1] >= domainSize[1])) continue;
1014 if ((dims == 3) && ((coords[2] < 0) || (coords[2] >= domainSize[2]))) continue;
1015
1016 if ([spatialModel isSubspace]) [spatialModel translateDoubleCoordinatesToWorld: coords forModel: self];
1017
1018 //fprintf(dataFile, " %f %f %f %d %d %d", coords[0], coords[1], coords[2], (*etGrid)[0][i], (*cGrid)[0][i], (*sGrid)[i]);
1019 fprintf(dataFile, " %f %f %f %d %d", coords[0], coords[1], coords[2], (*etGrid)[0][i], (*cGrid)[0][i]);
1020 }
1021 fprintf(dataFile, "\n");
1022 }
1023 fflush(dataFile);
1024 }
1025
1026 return YES;
1027 }
1028
1029 - (BOOL)readData
1030 {
1031 if (dataFileState != BCMODEL_READ_STATE) return NO;
1032
1033 int dims = [[super spatialModel] dimensions];
1034
1035 int i, m;
1036 Grid2DArray *X = [[elementData gridWithName: @"X"] objectForKey: @"matrix"];
1037 Grid2DArray *Y = [[elementData gridWithName: @"Y"] objectForKey: @"matrix"];
1038 Grid2DArray *Z = [[elementData gridWithName: @"Z"] objectForKey: @"matrix"];
1039 Grid2DArray *T = [[elementData gridWithName: @"type"] objectForKey: @"matrix"];
1040 Grid2DArray *C = [[elementData gridWithName: @"cell"] objectForKey: @"matrix"];
1041 //Grid2DArray *S = [[elementData gridWithName: @"sector"] objectForKey: @"matrix"];
1042 int (*etGrid)[1][maxElements] = [T getMatrix];
1043 int (*cGrid)[1][maxElements] = [C getMatrix];
1044 //int (*sGrid)[maxElements] = [S getMatrix];
1045 SpatialModel *spatialModel = [modelController spatialModel];
1046 double *domainSize = [spatialModel domainSize];
1047 int currentElement = 0;
1048
1049 double lowerBound, upperBound[3];
1050 lowerBound = 0 - (double)sectorBorder * sectorSize;
1051 upperBound[0] = domainSize[0] + (double)sectorBorder * sectorSize;
1052 upperBound[1] = domainSize[1] + (double)sectorBorder * sectorSize;
1053 upperBound[2] = domainSize[2] + (double)sectorBorder * sectorSize;
1054
1055 if (isBinaryFile) {
1056 // read data from binary file
1057 if ([dataEncode isEqual: [BioSwarmModel floatEncode]]) {
1058 float (*xGrid)[1][maxElements] = [X getMatrix];
1059 float (*yGrid)[1][maxElements] = [Y getMatrix];
1060 float (*zGrid)[1][maxElements] = [Z getMatrix];
1061 float coords[3];
1062 currentElement = 0;
1063 fread(&(totalElements), sizeof(int), 1, dataFile);
1064 for (i = 0; i < totalElements; ++i) {
1065 fread(coords, sizeof(float), 3, dataFile);
1066 //fread(&((*yGrid)[0][i]), sizeof(float), 1, dataFile);
1067 //fread(&((*zGrid)[0][i]), sizeof(float), 1, dataFile);
1068 fread(&((*etGrid)[0][currentElement]), sizeof(int), 1, dataFile);
1069 fread(&((*cGrid)[0][currentElement]), sizeof(int), 1, dataFile);
1070 //fread(&((*sGrid)[currentElement]), sizeof(int), 1, dataFile);
1071
1072 if ([spatialModel isSubspace]) {
1073 [spatialModel translateFloatCoordinatesToSubspace: coords forModel: self];
1074 if ((coords[0] < lowerBound) || (coords[0] >= upperBound[0])) continue;
1075 if ((coords[1] < lowerBound) || (coords[1] >= upperBound[1])) continue;
1076 if ((dims == 3) && ((coords[2] < lowerBound) || (coords[2] >= upperBound[2]))) continue;
1077 }
1078 (*xGrid)[0][currentElement] = coords[0]; (*yGrid)[0][currentElement] = coords[1]; (*zGrid)[0][currentElement] = coords[2];
1079 ++currentElement;
1080 }
1081 } else if ([dataEncode isEqual: [BioSwarmModel doubleEncode]]) {
1082 double (*xGrid)[1][maxElements] = [X getMatrix];
1083 double (*yGrid)[1][maxElements] = [Y getMatrix];
1084 double (*zGrid)[1][maxElements] = [Z getMatrix];
1085 double coords[3];
1086 currentElement = 0;
1087 fread(&(totalElements), sizeof(int), 1, dataFile);
1088 for (i = 0; i < totalElements; ++i) {
1089 fread(coords, sizeof(double), 3, dataFile);
1090 //fread(&((*yGrid)[0][i]), sizeof(double), 1, dataFile);
1091 //fread(&((*zGrid)[0][i]), sizeof(double), 1, dataFile);
1092 fread(&((*etGrid)[0][currentElement]), sizeof(int), 1, dataFile);
1093 fread(&((*cGrid)[0][currentElement]), sizeof(int), 1, dataFile);
1094 //fread(&((*sGrid)[currentElement]), sizeof(int), 1, dataFile);
1095
1096 if ([spatialModel isSubspace]) {
1097 [spatialModel translateDoubleCoordinatesToSubspace: coords forModel: self];
1098 if ((coords[0] < lowerBound) || (coords[0] >= upperBound[0])) continue;
1099 if ((coords[1] < lowerBound) || (coords[1] >= upperBound[1])) continue;
1100 if ((dims == 3) && ((coords[2] < lowerBound) || (coords[2] >= upperBound[2]))) continue;
1101 }
1102 (*xGrid)[0][currentElement] = coords[0]; (*yGrid)[0][currentElement] = coords[1]; (*zGrid)[0][currentElement] = coords[2];
1103 ++currentElement;
1104 }
1105 }
1106 printf("totalElements = %d, currentElement = %d\n", totalElements, currentElement);
1107 totalElements = currentElement;
1108
1109 // mesh neighbors
1110 if ((useMesh) & (meshFile != NULL)) {
1111 if (dims == 2) {
1112 for (i = 0; i < totalElements; ++i) {
1113 for (m = MESH_CURRENT; m < MESH_XY_TOT; ++m) {
1114 int *meshData = meshNeighbors[m];
1115 fread(&(meshData[i]), sizeof(int), 1, meshFile);
1116 }
1117 }
1118 } else {
1119 for (i = 0; i < totalElements; ++i) {
1120 for (m = MESH_CURRENT; m < MESH_XYZ_TOT; ++m) {
1121 int *meshData = meshNeighbors[m];
1122 fread(&(meshData[i]), sizeof(int), 1, meshFile);
1123 }
1124 }
1125 }
1126 }
1127
1128 numOfCells = 0;
1129 for (i = 0; i < totalElements; ++i) {
1130 if ((*cGrid)[0][i] > numOfCells) numOfCells = (*cGrid)[0][i];
1131 }
1132 ++numOfCells;
1133
1134 } else {
1135 // read data in text file
1136 if ([dataEncode isEqual: [BioSwarmModel floatEncode]]) {
1137 float ct;
1138 float (*xGrid)[1][maxElements] = [X getMatrix];
1139 float (*yGrid)[1][maxElements] = [Y getMatrix];
1140 float (*zGrid)[1][maxElements] = [Z getMatrix];
1141 float coords[3];
1142 currentElement = 0;
1143 fscanf(dataFile, "%f %d", &ct, &totalElements);
1144 for (i = 0; i < totalElements; ++i) {
1145 fscanf(dataFile, "%f", &(coords[0]));
1146 fscanf(dataFile, "%f", &(coords[1]));
1147 fscanf(dataFile, "%f", &(coords[2]));
1148 fscanf(dataFile, "%d", &((*etGrid)[0][currentElement]));
1149 fscanf(dataFile, "%d", &((*cGrid)[0][currentElement]));
1150 //fscanf(dataFile, "%d", &((*sGrid)[currentElement]));
1151
1152 if ([spatialModel isSubspace]) {
1153 [spatialModel translateFloatCoordinatesToSubspace: coords forModel: self];
1154 if ((coords[0] < lowerBound) || (coords[0] >= upperBound[0])) continue;
1155 if ((coords[1] < lowerBound) || (coords[1] >= upperBound[1])) continue;
1156 if ((dims == 3) && ((coords[2] < lowerBound) || (coords[2] >= upperBound[2]))) continue;
1157 }
1158 (*xGrid)[0][currentElement] = coords[0]; (*yGrid)[0][currentElement] = coords[1]; (*zGrid)[0][currentElement] = coords[2];
1159 //printf("(%d) %f %f %f\n", currentElement, coords[0], coords[1], coords[2]);
1160 ++currentElement;
1161 }
1162 } else if ([dataEncode isEqual: [BioSwarmModel doubleEncode]]) {
1163 double ct;
1164 double (*xGrid)[1][maxElements] = [X getMatrix];
1165 double (*yGrid)[1][maxElements] = [Y getMatrix];
1166 double (*zGrid)[1][maxElements] = [Z getMatrix];
1167 double coords[3];
1168 currentElement = 0;
1169 fscanf(dataFile, "%lf %d", &ct, &totalElements);
1170 for (i = 0; i < totalElements; ++i) {
1171 fscanf(dataFile, "%lf", &(coords[0]));
1172 fscanf(dataFile, "%lf", &(coords[1]));
1173 fscanf(dataFile, "%lf", &(coords[2]));
1174 fscanf(dataFile, "%d", &((*etGrid)[0][currentElement]));
1175 fscanf(dataFile, "%d", &((*cGrid)[0][currentElement]));
1176 //fscanf(dataFile, "%d", &((*sGrid)[currentElement]));
1177
1178 if ([spatialModel isSubspace]) {
1179 [spatialModel translateDoubleCoordinatesToSubspace: coords forModel: self];
1180 if ((coords[0] < lowerBound) || (coords[0] >= upperBound[0])) continue;
1181 if ((coords[1] < lowerBound) || (coords[1] >= upperBound[1])) continue;
1182 if ((dims == 3) && ((coords[2] < lowerBound) || (coords[2] >= upperBound[2]))) continue;
1183 }
1184 (*xGrid)[0][currentElement] = coords[0]; (*yGrid)[0][currentElement] = coords[1]; (*zGrid)[0][currentElement] = coords[2];
1185 ++currentElement;
1186 }
1187 }
1188 printf("totalElements = %d, currentElement = %d\n", totalElements, currentElement);
1189 totalElements = currentElement;
1190
1191 // mesh neighbors
1192 if ((useMesh) & (meshFile != NULL)) {
1193 if (dims == 2) {
1194 for (i = 0; i < totalElements; ++i) {
1195 for (m = MESH_CURRENT; m < MESH_XY_TOT; ++m) {
1196 int *meshData = meshNeighbors[m];
1197 fscanf(meshFile, "%d", &(meshData[i]));
1198 }
1199 }
1200 } else {
1201 for (i = 0; i < totalElements; ++i) {
1202 for (m = MESH_CURRENT; m < MESH_XYZ_TOT; ++m) {
1203 int *meshData = meshNeighbors[m];
1204 fscanf(meshFile, "%d", &(meshData[i]));
1205 }
1206 }
1207 }
1208 }
1209
1210 numOfCells = 0;
1211 for (i = 0; i < totalElements; ++i) {
1212 if ((*cGrid)[0][i] > numOfCells) numOfCells = (*cGrid)[0][i];
1213 }
1214 ++numOfCells;
1215 }
1216
1217 if (totalElements > maxElements) {
1218 printf("ERROR: exceeded maximum number of elements: %d > %d\n", totalElements, maxElements);
1219 abort();
1220 }
1221
1222 // check end of file
1223 if (feof(dataFile)) return NO;
1224
1225 return YES;
1226 }
1227
1228 - (void)rewindFiles
1229 {
1230 if (dataFileState != BCMODEL_READ_STATE) return;
1231 rewind(dataFile);
1232 }
1233
1234 - (void)generateSectors
1235 {
1236 int dims = [[super spatialModel] dimensions];
1237 int i;
1238
1239 printf("generateSectors\n");
1240
1241 // sector coordinates
1242 numSectors = 0;
1243 int (*sCoordinates)[3][maxSectors] = (void *)sectorCoordinates;
1244
1245 // create read only sector border if needed
1246 if (readOnlySectorBorder) {
1247 if (dims == 2) {
1248 // walk around the outside border
1249 int xc, yc;
1250 for (xc = -1; xc < sectorsInDomain; ++xc) {
1251 (*sCoordinates)[0][numSectors] = xc;
1252 (*sCoordinates)[1][numSectors] = -1;
1253 (*sCoordinates)[2][numSectors] = 0;
1254 sectorReadOnlyFlag[numSectors] = 1;
1255 if ([self isDebug]) printf("read only border sector %d (%d, %d, %d).\n", numSectors,
1256 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1257 ++numSectors;
1258 }
1259 for (yc = -1; yc < sectorsInDomain; ++yc) {
1260 (*sCoordinates)[0][numSectors] = sectorsInDomain;
1261 (*sCoordinates)[1][numSectors] = yc;
1262 (*sCoordinates)[2][numSectors] = 0;
1263 sectorReadOnlyFlag[numSectors] = 1;
1264 if ([self isDebug]) printf("read only border sector %d (%d, %d, %d).\n", numSectors,
1265 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1266 ++numSectors;
1267 }
1268 for (xc = sectorsInDomain; xc > -1; --xc) {
1269 (*sCoordinates)[0][numSectors] = xc;
1270 (*sCoordinates)[1][numSectors] = sectorsInDomain;
1271 (*sCoordinates)[2][numSectors] = 0;
1272 sectorReadOnlyFlag[numSectors] = 1;
1273 if ([self isDebug]) printf("read only border sector %d (%d, %d, %d).\n", numSectors,
1274 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1275 ++numSectors;
1276 }
1277 for (yc = sectorsInDomain; yc > -1; --yc) {
1278 (*sCoordinates)[0][numSectors] = -1;
1279 (*sCoordinates)[1][numSectors] = yc;
1280 (*sCoordinates)[2][numSectors] = 0;
1281 sectorReadOnlyFlag[numSectors] = 1;
1282 if ([self isDebug]) printf("read only border sector %d (%d, %d, %d).\n", numSectors,
1283 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1284 ++numSectors;
1285 }
1286
1287 // walk around inside border
1288 for (xc = 0; xc < sectorsInDomain - 1; ++xc) {
1289 (*sCoordinates)[0][numSectors] = xc;
1290 (*sCoordinates)[1][numSectors] = 0;
1291 (*sCoordinates)[2][numSectors] = 0;
1292 if ([self isDebug]) printf("inside border sector %d (%d, %d, %d).\n", numSectors,
1293 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1294 ++numSectors;
1295 }
1296 for (yc = 0; yc < sectorsInDomain - 1; ++yc) {
1297 (*sCoordinates)[0][numSectors] = sectorsInDomain - 1;
1298 (*sCoordinates)[1][numSectors] = yc;
1299 (*sCoordinates)[2][numSectors] = 0;
1300 if ([self isDebug]) printf("inside border sector %d (%d, %d, %d).\n", numSectors,
1301 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1302 ++numSectors;
1303 }
1304 for (xc = sectorsInDomain - 1; xc > 0; --xc) {
1305 (*sCoordinates)[0][numSectors] = xc;
1306 (*sCoordinates)[1][numSectors] = sectorsInDomain - 1;
1307 (*sCoordinates)[2][numSectors] = 0;
1308 if ([self isDebug]) printf("inside border sector %d (%d, %d, %d).\n", numSectors,
1309 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1310 ++numSectors;
1311 }
1312 for (yc = sectorsInDomain - 1; yc > 0; --yc) {
1313 (*sCoordinates)[0][numSectors] = 0;
1314 (*sCoordinates)[1][numSectors] = yc;
1315 (*sCoordinates)[2][numSectors] = 0;
1316 if ([self isDebug]) printf("inside border sector %d (%d, %d, %d).\n", numSectors,
1317 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1318 ++numSectors;
1319 }
1320
1321 } else {
1322 // 3D
1323
1324 // walk around the outside border
1325 int xc, yc, zc;
1326 for (xc = -1; xc < sectorsInDomain; ++xc) {
1327 for (yc = -1; yc < sectorsInDomain; ++yc) {
1328 (*sCoordinates)[0][numSectors] = xc;
1329 (*sCoordinates)[1][numSectors] = yc;
1330 (*sCoordinates)[2][numSectors] = -1;
1331 sectorReadOnlyFlag[numSectors] = 1;
1332 if ([self isDebug]) printf("read only border sector %d (%d, %d, %d).\n", numSectors,
1333 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1334 ++numSectors;
1335 }
1336 }
1337 for (yc = -1; yc < sectorsInDomain; ++yc) {
1338 for (zc = -1; zc < sectorsInDomain; ++zc) {
1339 (*sCoordinates)[0][numSectors] = sectorsInDomain;
1340 (*sCoordinates)[1][numSectors] = yc;
1341 (*sCoordinates)[2][numSectors] = zc;
1342 sectorReadOnlyFlag[numSectors] = 1;
1343 if ([self isDebug]) printf("read only border sector %d (%d, %d, %d).\n", numSectors,
1344 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1345 ++numSectors;
1346 }
1347 }
1348 for (xc = sectorsInDomain; xc > -1; --xc) {
1349 for (yc = -1; yc < sectorsInDomain; ++yc) {
1350 (*sCoordinates)[0][numSectors] = xc;
1351 (*sCoordinates)[1][numSectors] = yc;
1352 (*sCoordinates)[2][numSectors] = sectorsInDomain;
1353 sectorReadOnlyFlag[numSectors] = 1;
1354 if ([self isDebug]) printf("read only border sector %d (%d, %d, %d).\n", numSectors,
1355 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1356 ++numSectors;
1357 }
1358 }
1359 for (yc = -1; yc < sectorsInDomain; ++yc) {
1360 for (zc = sectorsInDomain; zc > -1; --zc) {
1361 (*sCoordinates)[0][numSectors] = -1;
1362 (*sCoordinates)[1][numSectors] = yc;
1363 (*sCoordinates)[2][numSectors] = zc;
1364 sectorReadOnlyFlag[numSectors] = 1;
1365 if ([self isDebug]) printf("read only border sector %d (%d, %d, %d).\n", numSectors,
1366 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1367 ++numSectors;
1368 }
1369 }
1370 // fill top and bottom
1371 for (xc = -1; xc <= sectorsInDomain; ++xc) {
1372 for (zc = -1; zc <= sectorsInDomain; ++zc) {
1373 (*sCoordinates)[0][numSectors] = xc;
1374 (*sCoordinates)[1][numSectors] = sectorsInDomain;
1375 (*sCoordinates)[2][numSectors] = zc;
1376 sectorReadOnlyFlag[numSectors] = 1;
1377 if ([self isDebug]) printf("read only border sector %d (%d, %d, %d).\n", numSectors,
1378 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1379 ++numSectors;
1380 }
1381 }
1382 for (xc = 0; xc < sectorsInDomain; ++xc) {
1383 for (zc = 0; zc < sectorsInDomain; ++zc) {
1384 (*sCoordinates)[0][numSectors] = xc;
1385 (*sCoordinates)[1][numSectors] = -1;
1386 (*sCoordinates)[2][numSectors] = zc;
1387 sectorReadOnlyFlag[numSectors] = 1;
1388 if ([self isDebug]) printf("read only border sector %d (%d, %d, %d).\n", numSectors,
1389 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1390 ++numSectors;
1391 }
1392 }
1393
1394 // walk around inside border
1395 for (xc = 0; xc < sectorsInDomain - 1; ++xc) {
1396 for (yc = 0; yc < sectorsInDomain - 1; ++yc) {
1397 (*sCoordinates)[0][numSectors] = xc;
1398 (*sCoordinates)[1][numSectors] = yc;
1399 (*sCoordinates)[2][numSectors] = 0;
1400 if ([self isDebug]) printf("inside border sector %d (%d, %d, %d).\n", numSectors,
1401 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1402 ++numSectors;
1403 }
1404 }
1405 for (yc = 0; yc < sectorsInDomain - 1; ++yc) {
1406 for (zc = 0; zc < sectorsInDomain - 1; ++zc) {
1407 (*sCoordinates)[0][numSectors] = sectorsInDomain - 1;
1408 (*sCoordinates)[1][numSectors] = yc;
1409 (*sCoordinates)[2][numSectors] = zc;
1410 if ([self isDebug]) printf("inside border sector %d (%d, %d, %d).\n", numSectors,
1411 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1412 ++numSectors;
1413 }
1414 }
1415 for (xc = sectorsInDomain - 1; xc > 0; --xc) {
1416 for (yc = 0; yc < sectorsInDomain - 1; ++yc) {
1417 (*sCoordinates)[0][numSectors] = xc;
1418 (*sCoordinates)[1][numSectors] = yc;
1419 (*sCoordinates)[2][numSectors] = sectorsInDomain - 1;
1420 if ([self isDebug]) printf("inside border sector %d (%d, %d, %d).\n", numSectors,
1421 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1422 ++numSectors;
1423 }
1424 }
1425 for (yc = 0; yc < sectorsInDomain - 1; ++yc) {
1426 for (zc = sectorsInDomain - 1; zc > 0; --zc) {
1427 (*sCoordinates)[0][numSectors] = 0;
1428 (*sCoordinates)[1][numSectors] = yc;
1429 (*sCoordinates)[2][numSectors] = zc;
1430 if ([self isDebug]) printf("inside border sector %d (%d, %d, %d).\n", numSectors,
1431 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1432 ++numSectors;
1433 }
1434 }
1435 // fill top and bottom
1436 for (xc = 0; xc < sectorsInDomain; ++xc) {
1437 for (zc = 0; zc < sectorsInDomain; ++zc) {
1438 (*sCoordinates)[0][numSectors] = xc;
1439 (*sCoordinates)[1][numSectors] = sectorsInDomain - 1;
1440 (*sCoordinates)[2][numSectors] = zc;
1441 sectorReadOnlyFlag[numSectors] = 1;
1442 if ([self isDebug]) printf("inside border sector %d (%d, %d, %d).\n", numSectors,
1443 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1444 ++numSectors;
1445 }
1446 }
1447 for (xc = 1; xc < sectorsInDomain - 1; ++xc) {
1448 for (zc = 1; zc < sectorsInDomain - 1; ++zc) {
1449 (*sCoordinates)[0][numSectors] = xc;
1450 (*sCoordinates)[1][numSectors] = 0;
1451 (*sCoordinates)[2][numSectors] = zc;
1452 sectorReadOnlyFlag[numSectors] = 1;
1453 if ([self isDebug]) printf("inside border sector %d (%d, %d, %d).\n", numSectors,
1454 (*sCoordinates)[0][numSectors], (*sCoordinates)[1][numSectors], (*sCoordinates)[2][numSectors]);
1455 ++numSectors;
1456 }
1457 }
1458
1459 }
1460 }
1461 }
1462
1463 - (void)assignElementsToSectors
1464 {
1465 int dims = [[super spatialModel] dimensions];
1466 int i, j;
1467 Grid2DArray *X = [[elementData gridWithName: @"X"] objectForKey: @"matrix"];
1468 Grid2DArray *Y = [[elementData gridWithName: @"Y"] objectForKey: @"matrix"];
1469 Grid2DArray *Z = [[elementData gridWithName: @"Z"] objectForKey: @"matrix"];
1470 Grid2DArray *S = [[elementData gridWithName: @"sector"] objectForKey: @"matrix"];
1471 int (*sGrid)[maxElements] = [S getMatrix];
1472 int (*sCoordinates)[3][maxSectors] = (void *)sectorCoordinates;
1473
1474 if ([dataEncode isEqual: [BioSwarmModel floatEncode]]) {
1475 float (*xGrid)[maxElements] = [X getMatrix];
1476 float (*yGrid)[maxElements] = [Y getMatrix];
1477 float (*zGrid)[maxElements] = [Z getMatrix];
1478
1479 // assign each element to its sector, create new sectors if needed
1480 for (i = 0; i < totalElements; ++i) {
1481 int xc = floor((*xGrid)[i] / sectorSize);
1482 int yc = floor((*yGrid)[i] / sectorSize);
1483 int zc = floor((*zGrid)[i] / sectorSize);
1484 BOOL found = NO;
1485 for (j = 0; j < numSectors; ++j) {
1486 if (dims == 2) {
1487 if (((*sCoordinates)[0][j] == xc) && ((*sCoordinates)[1][j] == yc)) { found = YES; break; }
1488 } else {
1489 if (((*sCoordinates)[0][j] == xc) && ((*sCoordinates)[1][j] == yc) && ((*sCoordinates)[2][j] == zc)) { found = YES; break; }
1490 }
1491 }
1492
1493 // sector numbers are sequential
1494 // so need to save sector coordinates
1495 if (found) (*sGrid)[i] = j;
1496 else {
1497 if (numSectors == maxSectors) { printf("ERROR: exceeded max number of sectors = %d\n", numSectors); abort(); }
1498
1499 (*sCoordinates)[0][numSectors] = xc;
1500 (*sCoordinates)[1][numSectors] = yc;
1501 if (dims == 2) (*sCoordinates)[2][numSectors] = 0;
1502 else (*sCoordinates)[2][numSectors] = zc;
1503 (*sGrid)[i] = numSectors;
1504
1505 ++numSectors;
1506 }
1507 if ([self isDebug]) printf("element %d (%f, %f %f) is in sector %d (%d, %d, %d), (%d, %d, %d).\n", i, (*xGrid)[i], (*yGrid)[i], (*zGrid)[i], (*sGrid)[i], (*sCoordinates)[0][numSectors-1], (*sCoordinates)[1][numSectors-1], (*sCoordinates)[2][numSectors-1], xc, yc, zc);
1508 //if ([modelController rank] == 2) printf("element %d (%f, %f %f) is in sector %d (%d, %d, %d).\n", i, (*xGrid)[i], (*yGrid)[i], (*zGrid)[i], (*sGrid)[i], xc, yc, zc);
1509 }
1510
1511 } else if ([dataEncode isEqual: [BioSwarmModel doubleEncode]]) {
1512 printf("ERROR: double encoding not supported.\n");
1513 }
1514 }
1515
1516 - (void)initializeNeighborTables
1517 {
1518 int dims = [[super spatialModel] dimensions];
1519 int i, j;
1520 Grid2DArray *S = [[elementData gridWithName: @"sector"] objectForKey: @"matrix"];
1521 int (*sGrid)[maxElements] = [S getMatrix];
1522 int (*sCoordinates)[3][maxSectors] = (void *)sectorCoordinates;
1523
1524 // sector neighbor table
1525 int nSize;
1526 if (dims == 2) nSize = S_XY_TOT;
1527 else nSize = S_XYZ_TOT;
1528 int (*snTable)[nSize][maxSectors] = (void *)sectorNeighborTable;
1529 for (i = 0; i < numSectors; ++i) {
1530 int nx, ny, nz;
1531 int direction;
1532 (*snTable)[S_CURRENT][i] = i;
1533 for (direction = S_START; direction < nSize; ++direction) {
1534 bc_sector_coordinate_shift((*sCoordinates)[0][i], (*sCoordinates)[1][i], (*sCoordinates)[2][i], direction, &nx, &ny, &nz);
1535
1536 BOOL found = NO;
1537 for (j = 0; j < numSectors; ++j) {
1538 if (dims == 2) {
1539 if (((*sCoordinates)[0][j] == nx) && ((*sCoordinates)[1][j] == ny)) { found = YES; break; }
1540 } else {
1541 if (((*sCoordinates)[0][j] == nx) && ((*sCoordinates)[1][j] == ny) && ((*sCoordinates)[2][j] == nz)) { found = YES; break; }
1542 }
1543 }
1544
1545 if (found) (*snTable)[direction][i] = j;
1546 else (*snTable)[direction][i] = -1;
1547
1548 if ([self isDebug]) if ((*snTable)[direction][i] >= 0) printf("sector (%d) has neighbor (%d) in direction (%s).\n", i, (*snTable)[direction][i], bc_sector_direction_string(direction));
1549 }
1550 }
1551
1552 // sector element table
1553 int (*seTable)[maxElementsPerSector][maxSectors] = (void *)sectorElementTable;
1554 for (i = 0; i < (maxElementsPerSector * maxSectors); ++i) sectorElementTable[i] = -1;
1555 for (i = 0; i < numSectors; ++i) {
1556 int sIdx = 0;
1557 for (j = 0; j < totalElements; ++j) {
1558 if ((*sGrid)[j] == i) {
1559
1560 if (sIdx >= maxElementsPerSector) {
1561 printf("ERROR: maxElementsPerSector exceeded!\n");
1562 abort();
1563 }
1564
1565 (*seTable)[sIdx][i] = j;
1566 ++sIdx;
1567 }
1568 }
1569 }
1570
1571 #if 0
1572 // element neighbor table
1573 int (*enTable)[nSize * maxElementsPerSector][maxElements] = (void *)elementNeighborTable;
1574 for (i = 0; i < (maxElements * nSize * maxElementsPerSector); ++i) elementNeighborTable[i] = -1;
1575 for (i = 0; i < totalElements; ++i) {
1576 int mySector = (*sGrid)[i];
1577 int direction;
1578 for (direction = S_CURRENT; direction < nSize; ++direction) {
1579 int currentSector = (*snTable)[direction][mySector];
1580 //printf("element (%d) in sector (%d) direction (%s, %d).\n", i, mySector, bc_sector_direction_string(direction), currentSector);
1581 if (currentSector == S_NONE) continue;
1582
1583 int sIdx = 0;
1584 for (j = 0; j < totalElements; ++j) {
1585 if ((*sGrid)[j] == currentSector) {
1586 if (sIdx >= maxElementsPerSector) {
1587 printf("ERROR: maxElementsPerSector exceeded!\n");
1588 abort();
1589 }
1590 (*enTable)[(direction * maxElementsPerSector) + sIdx][i] = j;
1591 //if (i == 0) printf("init en = (%d, %d, %d)\n", i, sIdx, j);
1592 ++sIdx;
1593 }
1594 }
1595 if ([self isDebug]) printf("element (%d) in sector (%d) has %d elements in direction (%s, %d).\n", i, mySector, sIdx, bc_sector_direction_string(direction), currentSector);
1596 }
1597 }
1598 #endif
1599 }
1600
1601 - (void)initializeSimulation
1602 {
1603 int dims = [[super spatialModel] dimensions];
1604 int i, j;
1605 Grid2DArray *X = [[elementData gridWithName: @"X"] objectForKey: @"matrix"];
1606 Grid2DArray *Y = [[elementData gridWithName: @"Y"] objectForKey: @"matrix"];
1607 Grid2DArray *Z = [[elementData gridWithName: @"Z"] objectForKey: @"matrix"];
1608 Grid2DArray *T = [[elementData gridWithName: @"type"] objectForKey: @"matrix"];
1609 Grid2DArray *C = [[elementData gridWithName: @"cell"] objectForKey: @"matrix"];
1610 Grid2DArray *S = [[elementData gridWithName: @"sector"] objectForKey: @"matrix"];
1611 int (*etGrid)[1][maxElements] = [T getMatrix];
1612 int (*cGrid)[1][maxElements] = [C getMatrix];
1613 int (*sGrid)[maxElements] = [S getMatrix];
1614
1615 SpatialModel *spatialModel = [super spatialModel];
1616 double *domainSize = [spatialModel domainSize];
1617
1618 // what type of initialization?
1619 int initType = 0;
1620 id pm = [model objectForKey: @"initialize"];
1621 if (pm) {
1622 NSString *s = [pm objectForKey: @"type"];
1623 if ([s isEqualToString: @"random"]) initType = 0;
1624 if ([s isEqualToString: @"grid"]) initType = 1;
1625 if ([s isEqualToString: @"hex"]) initType = 2;
1626 }
1627
1628 if ([dataEncode isEqual: [BioSwarmModel floatEncode]]) {
1629 float (*xGrid)[1][maxElements] = [X getMatrix];
1630 float (*yGrid)[1][maxElements] = [Y getMatrix];
1631 float (*zGrid)[1][maxElements] = [Z getMatrix];
1632
1633 switch (initType) {
1634 case 0: {
1635 // initial random positions for elements
1636 int elemNum = 0;
1637 for (j = 0; j < numOfCells; ++j) {
1638 //numOfElements[j] = initialElements;
1639 (*etGrid)[0][elemNum] = 0;
1640 (*cGrid)[0][elemNum] = j;
1641
1642 (*xGrid)[0][elemNum] = range[0] * RAND_NUM + offset[0];
1643 if ((*xGrid)[0][elemNum] < 0.0) (*xGrid)[0][elemNum] = 0.0;
1644 if ((*xGrid)[0][elemNum] > domainSize[0]) (*xGrid)[0][elemNum] = domainSize[0];
1645
1646 (*yGrid)[0][elemNum] = range[1] * RAND_NUM + offset[1];
1647 if ((*yGrid)[0][elemNum] < 0.0) (*yGrid)[0][elemNum] = 0.0;
1648 if ((*yGrid)[0][elemNum] > domainSize[1]) (*yGrid)[0][elemNum] = domainSize[1];
1649
1650 if (dims == 2) (*zGrid)[0][elemNum] = 0.0;
1651 else {
1652 (*zGrid)[0][elemNum] = range[2] * RAND_NUM + offset[2];
1653 if ((*zGrid)[0][elemNum] < 0.0) (*zGrid)[0][elemNum] = 0.0;
1654 if ((*zGrid)[0][elemNum] > domainSize[2]) (*zGrid)[0][elemNum] = domainSize[2];
1655 }
1656
1657 for (i = 1; i < initialElements; ++i) {
1658 (*xGrid)[0][elemNum+i] = (*xGrid)[0][elemNum] + RAND_NUM * randMult - randOffset;
1659 if ((*xGrid)[0][elemNum+i] < 0.0) (*xGrid)[0][elemNum+i] = 0.0;
1660 if ((*xGrid)[0][elemNum+i] > domainSize[0]) (*xGrid)[0][elemNum+i] = domainSize[0];
1661 (*yGrid)[0][elemNum+i] = (*yGrid)[0][elemNum] + RAND_NUM * randMult - randOffset;
1662 if ((*yGrid)[0][elemNum+i] < 0.0) (*yGrid)[0][elemNum+i] = 0.0;
1663 if ((*yGrid)[0][elemNum+i] > domainSize[1]) (*yGrid)[0][elemNum+i] = domainSize[1];
1664 if (dims == 2) (*zGrid)[0][elemNum+i] = 0.0;
1665 else {
1666 (*zGrid)[0][elemNum+i] = (*zGrid)[0][elemNum] + RAND_NUM * randMult - randOffset;
1667 if ((*zGrid)[0][elemNum+i] < 0.0) (*zGrid)[0][elemNum+i] = 0.0;
1668 if ((*zGrid)[0][elemNum+i] > domainSize[2]) (*zGrid)[0][elemNum+i] = domainSize[2];
1669 }
1670 (*etGrid)[0][elemNum+i] = 0;
1671 (*cGrid)[0][elemNum+i] = j;
1672 }
1673 elemNum += initialElements;
1674 }
1675 totalElements = elemNum;
1676 break;
1677 }
1678
1679 case 1: {
1680 // uniform grid positions for cells/elements
1681 int elemNum = 0;
1682 float xpos = startRange[0], ypos = startRange[1], zpos;
1683 if (dims == 3) zpos = startRange[2];
1684
1685 for (j = 0; j < numOfCells; ++j) {
1686 //numOfElements[j] = initialElements;
1687 (*etGrid)[0][elemNum] = 0;
1688 (*cGrid)[0][elemNum] = j;
1689
1690 (*xGrid)[0][elemNum] = xpos + RAND_NUM * randMult - randOffset;
1691 if ((*xGrid)[0][elemNum] < 0.0) (*xGrid)[0][elemNum] = 0.0;
1692 if ((*xGrid)[0][elemNum] > domainSize[0]) (*xGrid)[0][elemNum] = domainSize[0];
1693
1694 (*yGrid)[0][elemNum] = ypos + RAND_NUM * randMult - randOffset;
1695 if ((*yGrid)[0][elemNum] < 0.0) (*yGrid)[0][elemNum] = 0.0;
1696 if ((*yGrid)[0][elemNum] > domainSize[1]) (*yGrid)[0][elemNum] = domainSize[1];
1697
1698 if (dims == 2) (*zGrid)[0][elemNum] = 0.0;
1699 else {
1700 (*zGrid)[0][elemNum] = zpos + RAND_NUM * randMult - randOffset;
1701 if ((*zGrid)[0][elemNum] < 0.0) (*zGrid)[0][elemNum] = 0.0;
1702 if ((*zGrid)[0][elemNum] > domainSize[2]) (*zGrid)[0][elemNum] = domainSize[2];
1703 }
1704
1705 for (i = 1; i < initialElements; ++i) {
1706 (*xGrid)[0][elemNum+i] = xpos + RAND_NUM * randMult - randOffset;
1707 if ((*xGrid)[0][elemNum+i] < 0.0) (*xGrid)[0][elemNum+i] = 0.0;
1708 if ((*xGrid)[0][elemNum+i] > domainSize[0]) (*xGrid)[0][elemNum+i] = domainSize[0];
1709 (*yGrid)[0][elemNum+i] = ypos + RAND_NUM * randMult - randOffset;
1710 if ((*yGrid)[0][elemNum+i] < 0.0) (*yGrid)[0][elemNum+i] = 0.0;
1711 if ((*yGrid)[0][elemNum+i] > domainSize[1]) (*yGrid)[0][elemNum+i] = domainSize[1];
1712 if (dims == 2) (*zGrid)[0][elemNum+i] = 0.0;
1713 else {
1714 (*zGrid)[0][elemNum+i] = zpos + RAND_NUM * randMult - randOffset;
1715 if ((*zGrid)[0][elemNum+i] < 0.0) (*zGrid)[0][elemNum+i] = 0.0;
1716 if ((*zGrid)[0][elemNum+i] > domainSize[2]) (*zGrid)[0][elemNum+i] = domainSize[2];
1717 }
1718 (*etGrid)[0][elemNum+i] = 0;
1719 (*cGrid)[0][elemNum+i] = j;
1720 }
1721 elemNum += initialElements;
1722
1723 xpos += offset[0];
1724 if (xpos > range[0]) {
1725 xpos = startRange[0];
1726 ypos += offset[1];
1727 }
1728 }
1729 totalElements = elemNum;
1730 break;
1731 }
1732
1733 case 2: {
1734 // uniform hex positions for cells
1735 BOOL full = NO;
1736 BOOL flip = NO;
1737 int elemNum = 0;
1738 float xpos = startRange[0], ypos = startRange[1], zpos;
1739 if (dims == 3) zpos = startRange[2];
1740
1741 for (j = 0; j < numOfCells; ++j) {
1742
1743 // determine next good position
1744 BOOL good = NO;
1745 while (!good) {
1746 good = YES;
1747
1748 if (sectorBorder > 0) {
1749 if (xpos < (sectorBorder * sectorSize + randMult)) good = NO;
1750 if (xpos > (domainSize[0] - sectorBorder * sectorSize - randMult)) good = NO;
1751
1752 if (ypos < (sectorBorder * sectorSize + randMult)) good = NO;
1753 if (ypos > (domainSize[1] - sectorBorder * sectorSize - randMult)) good = NO;
1754
1755 if (dims == 3) {
1756 if (zpos < (sectorBorder * sectorSize + randMult)) good = NO;
1757 if (zpos > (domainSize[2] - sectorBorder * sectorSize - randMult)) good = NO;
1758 }
1759 }
1760
1761 if (!good) {
1762 xpos += offset[0];
1763 if (xpos > range[0]) {
1764 if (flip) xpos = startRange[0];
1765 else xpos = startRange[0] + offset[0] / 2.0;
1766 flip = !flip;
1767 ypos += offset[1];
1768 }
1769 }
1770
1771 if ((xpos > range[0]) || (ypos > range[1])) { full = YES; break; }
1772 }
1773
1774 if (full) {
1775 printf("WARNING: Not enough room for %d cells, placed %d.\n", numOfCells, j);
1776 numOfCells = j;
1777 break;
1778 }
1779
1780 //numOfElements[j] = initialElements;
1781 (*etGrid)[0][elemNum] = 0;
1782 (*cGrid)[0][elemNum] = j;
1783
1784 good = NO;
1785 while (!good) {
1786 good = YES;
1787
1788 (*xGrid)[0][elemNum] = xpos + RAND_NUM * randMult - randOffset;
1789 if ((*xGrid)[0][elemNum] < 0.0) (*xGrid)[0][elemNum] = 0.0;
1790 if ((*xGrid)[0][elemNum] > domainSize[0]) (*xGrid)[0][elemNum] = domainSize[0];
1791 if (sectorBorder > 0) {
1792 if ((*xGrid)[0][elemNum] < (sectorBorder * sectorSize)) good = NO;
1793 if ((*xGrid)[0][elemNum] > (domainSize[0] - sectorBorder * sectorSize)) good = NO;
1794 }
1795
1796 (*yGrid)[0][elemNum] = ypos + RAND_NUM * randMult - randOffset;
1797 if ((*yGrid)[0][elemNum] < 0.0) (*yGrid)[0][elemNum] = 0.0;
1798 if ((*yGrid)[0][elemNum] > domainSize[1]) (*yGrid)[0][elemNum] = domainSize[1];
1799 if (sectorBorder > 0) {
1800 if ((*yGrid)[0][elemNum] < (sectorBorder * sectorSize)) good = NO;
1801 if ((*yGrid)[0][elemNum] > (domainSize[1] - sectorBorder * sectorSize)) good = NO;
1802 }
1803
1804 if (dims == 2) (*zGrid)[0][elemNum] = 0.0;
1805 else {
1806 (*zGrid)[0][elemNum] = zpos + RAND_NUM * randMult - randOffset;
1807 if ((*zGrid)[0][elemNum] < 0.0) (*zGrid)[0][elemNum] = 0.0;
1808 if ((*zGrid)[0][elemNum] > domainSize[2]) (*zGrid)[0][elemNum] = domainSize[2];
1809 if (sectorBorder > 0) {
1810 if ((*zGrid)[0][elemNum] < (sectorBorder * sectorSize)) good = NO;
1811 if ((*zGrid)[0][elemNum] > (domainSize[2] - sectorBorder * sectorSize)) good = NO;
1812 }
1813 }
1814 }
1815
1816 for (i = 1; i < initialElements; ++i) {
1817 good = NO;
1818 while (!good) {
1819 good = YES;
1820
1821 (*xGrid)[0][elemNum+i] = xpos + RAND_NUM * randMult - randOffset;
1822 if ((*xGrid)[0][elemNum+i] < 0.0) (*xGrid)[0][elemNum+i] = 0.0;
1823 if ((*xGrid)[0][elemNum+i] > domainSize[0]) (*xGrid)[0][elemNum+i] = domainSize[0];
1824 if (sectorBorder > 0) {
1825 if ((*xGrid)[0][elemNum+i] < (sectorBorder * sectorSize)) good = NO;
1826 if ((*xGrid)[0][elemNum+i] > (domainSize[0] - sectorBorder * sectorSize)) good = NO;
1827 }
1828
1829 (*yGrid)[0][elemNum+i] = ypos + RAND_NUM * randMult - randOffset;
1830 if ((*yGrid)[0][elemNum+i] < 0.0) (*yGrid)[0][elemNum+i] = 0.0;
1831 if ((*yGrid)[0][elemNum+i] > domainSize[1]) (*yGrid)[0][elemNum+i] = domainSize[1];
1832 if (sectorBorder > 0) {
1833 if ((*yGrid)[0][elemNum+i] < (sectorBorder * sectorSize)) good = NO;
1834 if ((*yGrid)[0][elemNum+i] > (domainSize[1] - sectorBorder * sectorSize)) good = NO;
1835 }
1836
1837 if (dims == 2) (*zGrid)[0][elemNum+i] = 0.0;
1838 else {
1839 (*zGrid)[0][elemNum+i] = zpos + RAND_NUM * randMult - randOffset;
1840 if ((*zGrid)[0][elemNum+i] < 0.0) (*zGrid)[0][elemNum+i] = 0.0;
1841 if ((*zGrid)[0][elemNum+i] > domainSize[2]) (*zGrid)[0][elemNum+i] = domainSize[2];
1842 if (sectorBorder > 0) {
1843 if ((*zGrid)[0][elemNum+i] < (sectorBorder * sectorSize)) good = NO;
1844 if ((*zGrid)[0][elemNum+i] > (domainSize[2] - sectorBorder * sectorSize)) good = NO;
1845 }
1846 }
1847 }
1848
1849 (*etGrid)[0][elemNum+i] = 0;
1850 (*cGrid)[0][elemNum+i] = j;
1851 }
1852 elemNum += initialElements;
1853
1854 xpos += offset[0];
1855 if (xpos > range[0]) {
1856 if (flip) xpos = startRange[0];
1857 else xpos = startRange[0] + offset[0] / 2.0;
1858 flip = !flip;
1859 ypos += offset[1];
1860 }
1861 }
1862 totalElements = elemNum;
1863 break;
1864 }
1865 }
1866
1867 } else if ([dataEncode isEqual: [BioSwarmModel doubleEncode]]) {
1868 printf("ERROR: double encoding not supported.\n");
1869 #if 0
1870 double (*xGrid)[maxElements][maxCells] = [X getMatrix];
1871 double (*yGrid)[maxElements][maxCells] = [Y getMatrix];
1872 double (*zGrid)[maxElements][maxCells] = [Z getMatrix];
1873
1874 // initial random positions for elements
1875 for (j = 0; j < numOfCells; ++j) {
1876 numOfElements[j] = initialElements;
1877
1878 //(*xGrid)[0][j] = BOUNDARY_X * RAND_NUM;
1879 (*xGrid)[0][j] = range[0] * RAND_NUM + offset[0];
1880 if ((*xGrid)[0][j] < 0.0) (*xGrid)[0][j] = 0.0;
1881 if ((*xGrid)[0][j] > domainSize[0]) (*xGrid)[0][j] = domainSize[0];
1882
1883 //(*yGrid)[0][j] = BOUNDARY_Y * RAND_NUM;
1884 (*yGrid)[0][j] = range[1] * RAND_NUM + offset[1];
1885 if ((*yGrid)[0][j] < 0.0) (*yGrid)[0][j] = 0.0;
1886 if ((*yGrid)[0][j] > domainSize[1]) (*yGrid)[0][j] = domainSize[1];
1887
1888 //(*zGrid)[0][j] = BOUNDARY_Z * RAND_NUM;
1889 if (dims == 2) (*zGrid)[0][j] = 0.0;
1890 else {
1891 (*zGrid)[0][j] = range[2] * RAND_NUM + offset[2];
1892 if ((*zGrid)[0][j] < 0.0) (*zGrid)[0][j] = 0.0;
1893 if ((*zGrid)[0][j] > domainSize[2]) (*zGrid)[0][j] = domainSize[2];
1894 }
1895 //(*zGrid)[0][j] = 2.0;
1896 //(*zGrid)[0][j] = RAND_NUM * 100;
1897
1898 for (i = 1; i < numOfElements[j]; ++i) {
1899 //if (i < 4) (*etGrid)[i][j] = 1;
1900 (*xGrid)[i][j] = (*xGrid)[0][j] + RAND_NUM * randMult - randOffset;
1901 if ((*xGrid)[i][j] < 0.0) (*xGrid)[i][j] = 0.0;
1902 if ((*xGrid)[i][j] > domainSize[0]) (*xGrid)[i][j] = domainSize[0];
1903 (*yGrid)[i][j] = (*yGrid)[0][j] + RAND_NUM * randMult - randOffset;
1904 if ((*yGrid)[i][j] < 0.0) (*yGrid)[i][j] = 0.0;
1905 if ((*yGrid)[i][j] > domainSize[1]) (*yGrid)[i][j] = domainSize[1];
1906 if (dims == 2) (*zGrid)[i][j] = 0.0;
1907 else {
1908 (*zGrid)[i][j] = (*zGrid)[0][j] + RAND_NUM * randMult - randOffset;
1909 if ((*zGrid)[i][j] < 0.0) (*zGrid)[i][j] = 0.0;
1910 if ((*zGrid)[i][j] > domainSize[2]) (*zGrid)[i][j] = domainSize[2];
1911 }
1912 //(*zGrid)[i][j] = 1.0;
1913 //(*zGrid)[i][j] = (*zGrid)[0][j] + normal_dist_rand(0, 0.1);
1914 (*etGrid)[i][j] = 0;
1915 }
1916 }
1917 #endif
1918 }
1919
1920 if (totalElements > maxElements) {
1921 printf("ERROR: exceeded maximum number of elements %d > %d\n", totalElements, maxElements);
1922 abort();
1923 }
1924
1925 // create sectors
1926 [self generateSectors];
1927
1928 // place elements in sectors
1929 [self assignElementsToSectors];
1930
1931 // neighbor tables are calculated from element and sector coordinates
1932 [self initializeNeighborTables];
1933 }
1934
1935 - (void)initializeFileCompleted
1936 {
1937 // create sectors
1938 [self generateSectors];
1939
1940 // place elements in sectors
1941 [self assignElementsToSectors];
1942
1943 // neighbor tables are calculated from element and sector coordinates
1944 [self initializeNeighborTables];
1945 }
1946
1947 - (void)cleanupSimulation
1948 {
1949 }
1950
1951 @end
1952
1953 //
1954 // GPU
1955 //
1956
1957 @implementation SubcellularElementModel (GPU)
1958
1959 - (NSMutableString *)definitionsGPUCode: (NSString *)prefixName
1960 {
1961 int dims = [[super spatialModel] dimensions];
1962 printf("definitionsGPUCode: %d\n", dims);
1963
1964 NSMutableString *GPUCode = [NSMutableString new];
1965
1966 [GPUCode appendFormat: @"__constant__ float %@_sem_parameters[%d];\n\n", prefixName, [self numOfParameters]];
1967
1968 [GPUCode appendFormat: @"\n__constant__ BC_SEM_GPUgrids %@_sem_grids[1];\n", prefixName];
1969 [GPUCode appendString: @"\n"];
1970
1971 return GPUCode;
1972 }
1973
1974 - (NSDictionary *)controllerGPUCode: (NSString *)prefixName
1975 {
1976 int dims = [[super spatialModel] dimensions];
1977
1978 NSMutableDictionary *GPUDictionary = [NSMutableDictionary new];
1979 NSMutableString *GPUCode;
1980
1981 //
1982 // Headers and defines
1983 //
1984
1985 GPUCode = [NSMutableString new];
1986 [GPUCode appendString: @"#include <stdio.h>\n"];
1987 [GPUCode appendString: @"#include <unistd.h>\n\n"];
1988 [GPUCode appendString: @"extern \"C\" {\n"];
1989 [GPUCode appendString: @"#include <BioSwarm/GPUDefines.h>\n"];
1990 //[GPUCode appendString: @"#include <BioSwarm/GPU_sem.h>\n"];
1991 //[GPUCode appendString: @"#include \"GPU_sem.h\"\n"];
1992 [GPUDictionary setObject: GPUCode forKey: @"header"];
1993
1994 GPUCode = [NSMutableString new];
1995 [GPUCode appendString: @"#include <BioSwarm/GPU_sem.h>\n"];
1996 [GPUCode appendFormat: @"#include \"%@_defines.cu\"\n\n", prefixName];
1997 [GPUCode appendFormat: @"#include \"%@_kernel.cu\"\n\n", prefixName];
1998 [GPUDictionary setObject: GPUCode forKey: @"defines"];
1999
2000 //
2001 // Allocation: generic
2002 //
2003
2004 //
2005 // Data Transfer
2006 //
2007 GPUCode = [NSMutableString new];
2008 [GPUCode appendFormat: @"%@_transferGPUKernel(void *model, void *g, int aFlag, float *hostParameters, int *cellNumber, int numOfCells, int totalElements, void *hostX, void *hostY, void *hostZ, void *hostType, void *hostSector, void *hostElementNeighborTable, void *EFT, void *EFTT, void *ETT, void *EFFT, void *hostData, void *meshElements, void *hostSectorCoordinates, void *hostNumSectors, void *hostSectorSize, void *hostSectorElementTable, void *hostSectorNeighborTable, void *hostReadOnlySector)", prefixName];
2009 [GPUDictionary setObject: GPUCode forKey: @"transfer function"];
2010
2011 GPUCode = [NSMutableString new];
2012 [GPUCode appendString: @"\n{\n"];
2013 [GPUCode appendString: @"\n BC_SEM_transferGPUKernel(model, g, aFlag, hostParameters, cellNumber, numOfCells, totalElements, hostX, hostY, hostZ, hostType, hostSector, hostElementNeighborTable, EFT, EFTT, ETT, EFFT, hostData, meshElements, hostSectorCoordinates, hostNumSectors, hostSectorSize, hostSectorElementTable, hostSectorNeighborTable, hostReadOnlySector);\n"];
2014 [GPUCode appendString: @"\n if (aFlag) {\n"];
2015 [GPUCode appendString: @" // Copy data structure\n"];
2016 [GPUCode appendFormat: @" cudaMemcpyToSymbol(%@_sem_grids, g, sizeof(BC_SEM_GPUgrids), 0, cudaMemcpyHostToDevice);\n", prefixName];
2017
2018 [GPUCode appendString: @" }\n"];
2019 [GPUCode appendString: @"}\n"];
2020 [GPUDictionary setObject: GPUCode forKey: @"transfer code"];
2021
2022 //
2023 // Execution
2024 //
2025
2026 GPUCode = [NSMutableString new];
2027 [GPUCode appendFormat: @"%@_invokeGPUKernel(void *model, void *%@_data, double startTime, double nextTime)", prefixName, prefixName];
2028 [GPUDictionary setObject: GPUCode forKey: @"execution function"];
2029
2030 GPUCode = [NSMutableString new];
2031 [GPUCode appendFormat: @" BC_SEM_GPUgrids *%@_ptrs = (BC_SEM_GPUgrids *)%@_data;\n", prefixName, prefixName];
2032 [GPUDictionary setObject: GPUCode forKey: @"structure"];
2033
2034 GPUCode = [NSMutableString new];
2035 #if 0
2036 [GPUCode appendFormat: @" int %@_x = 256;\n", prefixName];
2037 [GPUCode appendFormat: @" int %@_y = 1;\n", prefixName];
2038 //[GPUCode appendFormat: @" int %@_threadsPerBlock1D = %@_xx;\n", prefixName, prefixName];
2039 //[GPUCode appendFormat: @" int %@_blocksPerGrid1D = (%@_ptrs->numOfCells + %@_threadsPerBlock1D - 1) / %@_threadsPerBlock1D;\n", prefixName, prefixName, prefixName, prefixName];
2040 [GPUCode appendFormat: @" if (%@_x > %@_ptrs->totalElements) %@_x = %@_ptrs->totalElements;\n", prefixName, prefixName, prefixName, prefixName];
2041 [GPUCode appendFormat: @" if (%@_y > %@_ptrs->numOfModels) %@_y = %@_ptrs->numOfModels;\n", prefixName, prefixName, prefixName, prefixName];
2042 [GPUCode appendFormat: @" dim3 %@_threadsPerBlock(%@_x, %@_y);\n", prefixName, prefixName, prefixName];
2043 [GPUCode appendFormat: @" dim3 %@_threadsPerGrid((%@_ptrs->totalElements + %@_threadsPerBlock.x - 1) / %@_threadsPerBlock.x, (%@_ptrs->numOfModels + %@_threadsPerBlock.y - 1) / %@_threadsPerBlock.y);\n", prefixName, prefixName, prefixName, prefixName, prefixName, prefixName, prefixName];
2044 [GPUCode appendString: @"\n"];
2045
2046 [GPUCode appendFormat: @" int %@_xx = 256;\n", prefixName];
2047 [GPUCode appendFormat: @" if (%@_xx > %@_ptrs->numOfCells) %@_xx = %@_ptrs->numOfCells;\n", prefixName, prefixName, prefixName, prefixName];
2048 [GPUCode appendFormat: @" dim3 %@_cell_threadsPerBlock(%@_xx, %@_y);\n", prefixName, prefixName, prefixName];
2049 [GPUCode appendFormat: @" dim3 %@_cell_blocksPerGrid((%@_ptrs->numOfCells + %@_cell_threadsPerBlock.x - 1) / %@_cell_threadsPerBlock.x, (%@_ptrs->numOfModels + %@_cell_threadsPerBlock.y - 1) / %@_cell_threadsPerBlock.y);\n", prefixName, prefixName, prefixName, prefixName, prefixName, prefixName, prefixName];
2050 [GPUCode appendString: @"\n"];
2051 #else
2052 [GPUCode appendFormat: @" int %@_x = 256;\n", prefixName];
2053 [GPUCode appendFormat: @" int %@_y = 1;\n", prefixName];
2054 [GPUCode appendFormat: @" if (%@_x > %@_ptrs->totalElements) %@_x = %@_ptrs->totalElements;\n", prefixName, prefixName, prefixName, prefixName];
2055 [GPUCode appendFormat: @" if (%@_y > %@_ptrs->numOfModels) %@_y = %@_ptrs->numOfModels;\n", prefixName, prefixName, prefixName, prefixName];
2056 [GPUCode appendFormat: @" dim3 %@_threadsPerBlock(%@_x, %@_y);\n", prefixName, prefixName, prefixName];
2057 [GPUCode appendFormat: @" dim3 %@_threadsPerGrid((%@_ptrs->totalElements + %@_threadsPerBlock.x - 1) / %@_threadsPerBlock.x, (%@_ptrs->numOfModels + %@_threadsPerBlock.y - 1) / %@_threadsPerBlock.y);\n", prefixName, prefixName, prefixName, prefixName, prefixName, prefixName, prefixName];
2058 [GPUCode appendString: @"\n"];
2059
2060 [GPUCode appendFormat: @" int %@_xx = 256;\n", prefixName];
2061 [GPUCode appendFormat: @" if (%@_xx > %@_ptrs->numOfCells) %@_xx = %@_ptrs->numOfCells;\n", prefixName, prefixName, prefixName, prefixName];
2062 [GPUCode appendFormat: @" dim3 %@_cell_threadsPerBlock(%@_xx, %@_y);\n", prefixName, prefixName, prefixName];
2063 [GPUCode appendFormat: @" dim3 %@_cell_blocksPerGrid((%@_ptrs->numOfCells + %@_cell_threadsPerBlock.x - 1) / %@_cell_threadsPerBlock.x, (%@_ptrs->numOfModels + %@_cell_threadsPerBlock.y - 1) / %@_cell_threadsPerBlock.y);\n", prefixName, prefixName, prefixName, prefixName, prefixName, prefixName, prefixName];
2064 [GPUCode appendString: @"\n"];
2065
2066 [GPUCode appendFormat: @" int %@_nx = 256;\n", prefixName];
2067 [GPUCode appendFormat: @" int %@_ny = 1;\n", prefixName];
2068 [GPUCode appendFormat: @" int %@_maxy = %@_ptrs->numOfModels * %@_ptrs->sectorNeighborSize * %@_ptrs->maxElementsPerSector;\n", prefixName, prefixName, prefixName, prefixName];
2069 [GPUCode appendFormat: @" if (%@_nx > %@_ptrs->totalElements) %@_nx = %@_ptrs->totalElements;\n", prefixName, prefixName, prefixName, prefixName];
2070 [GPUCode appendFormat: @" if (%@_ny > %@_maxy) %@_ny = %@_maxy;\n", prefixName, prefixName, prefixName, prefixName];
2071 [GPUCode appendFormat: @" dim3 %@n_threadsPerBlock(%@_nx, %@_ny);\n", prefixName, prefixName, prefixName];
2072 [GPUCode appendFormat: @" dim3 %@n_threadsPerGrid((%@_ptrs->totalElements + %@n_threadsPerBlock.x - 1) / %@n_threadsPerBlock.x, (%@_maxy + %@n_threadsPerBlock.y - 1) / %@n_threadsPerBlock.y);\n", prefixName, prefixName, prefixName, prefixName, prefixName, prefixName, prefixName];
2073 [GPUCode appendString: @"\n"];
2074
2075 [GPUCode appendFormat: @" int %@_sx = 256;\n", prefixName];
2076 [GPUCode appendFormat: @" int %@_sy = 1;\n", prefixName];
2077 [GPUCode appendFormat: @" if (%@_sx > %@_ptrs->maxSectors) %@_sx = %@_ptrs->maxSectors;\n", prefixName, prefixName, prefixName, prefixName];
2078 [GPUCode appendFormat: @" if (%@_sy > %@_ptrs->numOfModels) %@_sy = %@_ptrs->numOfModels;\n", prefixName, prefixName, prefixName, prefixName];
2079 [GPUCode appendFormat: @" dim3 %@s_threadsPerBlock(%@_sx, %@_sy);\n", prefixName, prefixName, prefixName];
2080 [GPUCode appendFormat: @" dim3 %@s_threadsPerGrid((%@_ptrs->maxSectors + %@s_threadsPerBlock.x - 1) / %@s_threadsPerBlock.x, (%@_ptrs->numOfModels + %@s_threadsPerBlock.y - 1) / %@s_threadsPerBlock.y);\n", prefixName, prefixName, prefixName, prefixName, prefixName, prefixName, prefixName];
2081 [GPUCode appendString: @"\n"];
2082
2083 [GPUCode appendFormat: @" int %@_mx = 256;\n", prefixName];
2084 [GPUCode appendFormat: @" int %@_my = 1;\n", prefixName];
2085 [GPUCode appendFormat: @" if (%@_mx > %@_ptrs->numOfModels) %@_mx = %@_ptrs->numOfModels;\n", prefixName, prefixName, prefixName, prefixName];
2086 [GPUCode appendFormat: @" dim3 %@m_threadsPerBlock(%@_mx, %@_my);\n", prefixName, prefixName, prefixName];
2087 [GPUCode appendFormat: @" dim3 %@m_threadsPerGrid((%@_ptrs->numOfModels + %@m_threadsPerBlock.x - 1) / %@m_threadsPerBlock.x, 1);\n", prefixName, prefixName, prefixName, prefixName];
2088 [GPUCode appendString: @"\n"];
2089 #endif
2090
2091 [GPUDictionary setObject: GPUCode forKey: @"execution variables"];
2092
2093 GPUCode = [NSMutableString new];
2094 [GPUCode appendFormat: @" if (%@_ptrs->calcCellCenters) %@_SEM_center_kernel<<< %@_cell_blocksPerGrid, %@_cell_threadsPerBlock >>>();\n", prefixName, prefixName, prefixName, prefixName];
2095 if ([self isDebug]) [GPUCode appendString: @" cudaPrintfDisplay(stdout, true);\n"];
2096
2097 [GPUCode appendFormat: @" if (%@_ptrs->calcElementCenters) %@_SEM_element_center_kernel<<< %@_cell_blocksPerGrid, %@_cell_threadsPerBlock >>>();\n", prefixName, prefixName, prefixName, prefixName];
2098 if ([self isDebug]) [GPUCode appendString: @" cudaPrintfDisplay(stdout, true);\n"];
2099
2100 #if 0
2101 [GPUCode appendFormat: @" %@_SEM_kernel1_2rk<<< %@_threadsPerGrid, %@_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2102 if ([self isDebug]) [GPUCode appendString: @" cudaPrintfDisplay(stdout, true);\n"];
2103 [GPUCode appendFormat: @" %@_SEM_kernel2_2rk<<< %@_threadsPerGrid, %@_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2104 if ([self isDebug]) [GPUCode appendString: @" cudaPrintfDisplay(stdout, true);\n"];
2105 #endif
2106 #if 0
2107 [GPUCode appendFormat: @" cudaMemset2D (%@_ptrs->X_tmp, %@_ptrs->pitch, 0, %@_ptrs->maxElements * sizeof(float), %@_ptrs->numOfModels);\n", prefixName, prefixName, prefixName, prefixName];
2108 [GPUCode appendFormat: @" cudaMemset2D (%@_ptrs->Y_tmp, %@_ptrs->pitch, 0, %@_ptrs->maxElements * sizeof(float), %@_ptrs->numOfModels);\n", prefixName, prefixName, prefixName, prefixName];
2109 [GPUCode appendFormat: @" cudaMemset2D (%@_ptrs->Z_tmp, %@_ptrs->pitch, 0, %@_ptrs->maxElements * sizeof(float), %@_ptrs->numOfModels);\n", prefixName, prefixName, prefixName, prefixName];
2110 [GPUCode appendString: @"\n"];
2111
2112 [GPUCode appendFormat: @" %@_SEM_neighbor_kernel1_2rk<<< %@n_threadsPerGrid, %@n_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2113 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_neighbor_kernel1_2rk\");\n\n", prefixName];
2114
2115 [GPUCode appendFormat: @" %@_SEM_neighbor_sum_kernel1_2rk<<< %@_threadsPerGrid, %@_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2116 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_neighbor_sum_kernel1_2rk\");\n\n", prefixName];
2117
2118 [GPUCode appendFormat: @" %@_SEM_neighbor_kernel2_2rk<<< %@n_threadsPerGrid, %@n_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2119 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_neighbor_kernel2_2rk\");\n\n", prefixName];
2120
2121 [GPUCode appendFormat: @" %@_SEM_neighbor_update_kernel_2rk<<< %@_threadsPerGrid, %@_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2122 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_neighbor_update_kernel_2rk\");\n\n", prefixName];
2123
2124 [GPUCode appendFormat: @" %@_SEM_sector_count_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2125 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_sector_count_kernel\");\n\n", prefixName];
2126
2127 [GPUCode appendFormat: @" %@_SEM_create_sectors_kernel<<< %@m_threadsPerGrid, %@m_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2128 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_create_sectors_kernel\");\n\n", prefixName];
2129
2130 [GPUCode appendFormat: @" %@_SEM_build_sector_neighbors_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2131 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_build_sector_neighbors_kernel\");\n\n", prefixName];
2132
2133 [GPUCode appendFormat: @" %@_SEM_move_elements_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2134 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_move_elements_kernel\");\n\n", prefixName];
2135
2136 [GPUCode appendFormat: @" %@_SEM_update_element_sector_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2137 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_update_element_sector_kernel\");\n\n", prefixName];
2138
2139 [GPUCode appendFormat: @" %@_SEM_update_element_neighbor_kernel<<< %@n_threadsPerGrid, %@n_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2140 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_update_element_neighbor_kernel\");\n\n", prefixName];
2141 #endif
2142 //[GPUCode appendFormat: @" cudaMemset2D (%@_ptrs->X_tmp, %@_ptrs->pitch, 0, %@_ptrs->maxElements * sizeof(float), %@_ptrs->numOfModels);\n", prefixName, prefixName, prefixName, prefixName];
2143 //[GPUCode appendFormat: @" cudaMemset2D (%@_ptrs->Y_tmp, %@_ptrs->pitch, 0, %@_ptrs->maxElements * sizeof(float), %@_ptrs->numOfModels);\n", prefixName, prefixName, prefixName, prefixName];
2144 //[GPUCode appendFormat: @" cudaMemset2D (%@_ptrs->Z_tmp, %@_ptrs->pitch, 0, %@_ptrs->maxElements * sizeof(float), %@_ptrs->numOfModels);\n", prefixName, prefixName, prefixName, prefixName];
2145 //[GPUCode appendString: @"\n"];
2146
2147 [GPUCode appendFormat: @" %@_SEM_pairwise_kernel1_2rk<<< %@_threadsPerGrid, %@_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2148 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_pairwise_kernel1_2rk\");\n\n", prefixName];
2149
2150 [GPUCode appendFormat: @" %@_SEM_pairwise_kernel2_2rk<<< %@_threadsPerGrid, %@_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2151 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_pairwise_kernel2_2rk\");\n\n", prefixName];
2152
2153 [GPUCode appendFormat: @" %@_SEM_sector_count_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2154 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_sector_count_kernel\");\n\n", prefixName];
2155
2156 [GPUCode appendFormat: @" %@_SEM_create_sectors_kernel<<< %@m_threadsPerGrid, %@m_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2157 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_create_sectors_kernel\");\n\n", prefixName];
2158
2159 [GPUCode appendFormat: @" %@_SEM_build_sector_neighbors_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2160 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_build_sector_neighbors_kernel\");\n\n", prefixName];
2161
2162 [GPUCode appendFormat: @" %@_SEM_move_elements_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2163 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_move_elements_kernel\");\n\n", prefixName];
2164
2165 [GPUCode appendFormat: @" %@_SEM_update_element_sector_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
2166 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_update_element_sector_kernel\");\n\n", prefixName];
2167
2168 [GPUDictionary setObject: GPUCode forKey: @"execution code"];
2169
2170 //
2171 // Release: generic
2172 //
2173
2174 //
2175 // Footer
2176 //
2177 GPUCode = [NSMutableString new];
2178 [GPUCode appendString: @"\n// GPU function pointers\n"];
2179 //[GPUCode appendFormat: @"SEMfunctions %@_gpuFunctions = {%@_allocGPUKernel, %@_transferGPUKernel, %@_invokeGPUKernel, %@_releaseGPUKernel};\n", prefixName, prefixName, prefixName, prefixName, prefixName];
2180 [GPUCode appendFormat: @"SEMfunctions %@_gpuFunctions = {BC_SEM_allocGPUKernel, %@_transferGPUKernel, %@_invokeGPUKernel, BC_SEM_releaseGPUKernel, NULL, NULL, NULL, NULL};\n", prefixName, prefixName, prefixName];
2181 [GPUDictionary setObject: GPUCode forKey: @"footer"];
2182
2183 return GPUDictionary;
2184 }
2185
2186 - (NSString *)centerKernelGPUCode: (NSString *)prefixName
2187 {
2188 NSMutableString *GPUCode;
2189 int dims = [[super spatialModel] dimensions];
2190
2191 GPUCode = [NSMutableString new];
2192
2193 //
2194 // Center calculation for all elements in each cell
2195 //
2196 [GPUCode appendString: @"\n// Calculate cell center\n"];
2197 [GPUCode appendString: @"__global__ void\n"];
2198 [GPUCode appendFormat: @"%@_SEM_center_kernel()\n", prefixName];
2199 [GPUCode appendString: @"{\n"];
2200
2201 [GPUCode appendString: @" int cellNum = blockIdx.x * blockDim.x + threadIdx.x;\n"];
2202 [GPUCode appendString: @" int modelNum = blockIdx.y * blockDim.y + threadIdx.y;\n"];
2203 [GPUCode appendString: @"\n"];
2204 [GPUCode appendFormat: @" if (cellNum >= %@_sem_grids->numOfCells) return;\n", prefixName];
2205 [GPUCode appendFormat: @" if (modelNum >= %@_sem_grids->numOfModels) return;\n", prefixName];
2206 [GPUCode appendString: @"\n"];
2207
2208 [GPUCode appendString: @" int k;\n"];
2209 [GPUCode appendString: @" float cX = 0.0;\n"];
2210 [GPUCode appendString: @" float cY = 0.0;\n"];
2211 if (dims == 3) [GPUCode appendString: @" float cZ = 0.0;\n"];
2212 [GPUCode appendFormat: @" int cidx = modelNum*%@_sem_grids->idx_cPitch+cellNum;\n", prefixName];
2213 //[GPUCode appendString: @" int first = 1;\n"];
2214 [GPUCode appendString: @" float numOfElements = 0.0;\n"];
2215 [GPUCode appendString: @"\n"];
2216
2217 [GPUCode appendFormat: @" for (k = 0; k < %@_sem_grids->totalElements; ++k) {\n", prefixName];
2218 [GPUCode appendFormat: @" int idx = modelNum*%@_sem_grids->idx_pitch+k;\n", prefixName];
2219 [GPUCode appendFormat: @" if (%@_sem_grids->cellNumber[idx] != cellNum) continue;\n", prefixName];
2220 [GPUCode appendString: @"\n"];
2221
2222 [GPUCode appendFormat: @" cX += %@_sem_grids->X[idx];\n", prefixName];
2223 [GPUCode appendFormat: @" cY += %@_sem_grids->Y[idx];\n", prefixName];
2224 if (dims == 3) [GPUCode appendFormat: @" cZ += %@_sem_grids->Z[idx];\n", prefixName];
2225 [GPUCode appendString: @" numOfElements += 1.0f;\n"];
2226 [GPUCode appendString: @" }\n"];
2227 [GPUCode appendString: @"\n"];
2228
2229 [GPUCode appendString: @" cX = cX / numOfElements;\n"];
2230 [GPUCode appendString: @" cY = cY / numOfElements;\n"];
2231 if (dims == 3) [GPUCode appendString: @" cZ = cZ / numOfElements;\n"];
2232
2233 [GPUCode appendFormat: @" %@_sem_grids->cellCenterX[cidx] = cX;\n", prefixName];
2234 [GPUCode appendFormat: @" %@_sem_grids->cellCenterY[cidx] = cY;\n", prefixName];
2235 if (dims == 3) [GPUCode appendFormat: @" %@_sem_grids->cellCenterZ[cidx] = cZ;\n", prefixName];
2236
2237 [GPUCode appendString: @"}\n"];
2238
2239 //
2240 // Center calculation for each element type in each cell
2241 // To avoid using a 3D array, the element types and models are combined in the rows
2242 //
2243 [GPUCode appendString: @"\n// Calculate element centers\n"];
2244 [GPUCode appendString: @"__global__ void\n"];
2245 [GPUCode appendFormat: @"%@_SEM_element_center_kernel()\n", prefixName];
2246 [GPUCode appendString: @"{\n"];
2247
2248 [GPUCode appendString: @" int cellNum = blockIdx.x * blockDim.x + threadIdx.x;\n"];
2249 [GPUCode appendString: @" int modelNum = blockIdx.y * blockDim.y + threadIdx.y;\n"];
2250 [GPUCode appendString: @"\n"];
2251 [GPUCode appendFormat: @" if (cellNum >= %@_sem_grids->numOfCells) return;\n", prefixName];
2252 [GPUCode appendFormat: @" if (modelNum >= %@_sem_grids->numOfModels) return;\n", prefixName];
2253 [GPUCode appendString: @"\n"];
2254
2255 [GPUCode appendString: @" int k;\n"];
2256
2257 [GPUCode appendString: @"\n // zero out element centers\n"];
2258 [GPUCode appendFormat: @" for (k = 0; k < %@_sem_grids->maxElementTypes; ++k) {\n", prefixName];
2259 [GPUCode appendFormat: @" int cidx = ((modelNum*%@_sem_grids->maxElementTypes)+k)*%@_sem_grids->idx_cPitch+cellNum;\n", prefixName, prefixName];
2260 [GPUCode appendFormat: @" %@_sem_grids->elementCenterX[cidx] = 0;\n", prefixName];
2261 [GPUCode appendFormat: @" %@_sem_grids->elementCenterY[cidx] = 0;\n", prefixName];
2262 if (dims == 3) [GPUCode appendFormat: @" %@_sem_grids->elementCenterZ[cidx] = 0;\n", prefixName];
2263 [GPUCode appendFormat: @" %@_sem_grids->elementCenterCount[cidx] = 0;\n", prefixName];
2264 [GPUCode appendString: @" }\n"];
2265
2266 [GPUCode appendString: @"\n // calc element centers\n"];
2267 [GPUCode appendFormat: @" for (k = 0; k < %@_sem_grids->totalElements; ++k) {\n", prefixName];
2268 [GPUCode appendFormat: @" int idx = modelNum*%@_sem_grids->idx_pitch+k;\n", prefixName];
2269 [GPUCode appendFormat: @" if (%@_sem_grids->cellNumber[idx] != cellNum) continue;\n", prefixName];
2270 [GPUCode appendFormat: @" int cidx = ((modelNum*%@_sem_grids->maxElementTypes)+%@_sem_grids->elementType[idx])*%@_sem_grids->idx_cPitch+cellNum;\n", prefixName, prefixName, prefixName];
2271 [GPUCode appendString: @"\n"];
2272
2273 [GPUCode appendFormat: @" %@_sem_grids->elementCenterX[cidx] += %@_sem_grids->X[idx];\n", prefixName, prefixName];
2274 [GPUCode appendFormat: @" %@_sem_grids->elementCenterY[cidx] += %@_sem_grids->Y[idx];\n", prefixName, prefixName];
2275 if (dims == 3) [GPUCode appendFormat: @" %@_sem_grids->elementCenterZ[cidx] += %@_sem_grids->Z[idx];\n", prefixName, prefixName];
2276 [GPUCode appendFormat: @" %@_sem_grids->elementCenterCount[cidx] += 1.0f;\n", prefixName];
2277
2278 [GPUCode appendString: @" }\n"];
2279
2280 [GPUCode appendFormat: @" for (k = 0; k < %@_sem_grids->maxElementTypes; ++k) {\n", prefixName];
2281 [GPUCode appendFormat: @" int cidx = ((modelNum*%@_sem_grids->maxElementTypes)+k)*%@_sem_grids->idx_cPitch+cellNum;\n", prefixName, prefixName];
2282 [GPUCode appendFormat: @" if (%@_sem_grids->elementCenterCount[cidx] != 0) {\n", prefixName];
2283 [GPUCode appendFormat: @" %@_sem_grids->elementCenterX[cidx] /= %@_sem_grids->elementCenterCount[cidx];\n", prefixName, prefixName];
2284 [GPUCode appendFormat: @" %@_sem_grids->elementCenterY[cidx] /= %@_sem_grids->elementCenterCount[cidx];\n", prefixName, prefixName];
2285 if (dims == 3) [GPUCode appendFormat: @" %@_sem_grids->elementCenterZ[cidx] /= %@_sem_grids->elementCenterCount[cidx];\n", prefixName, prefixName];
2286 [GPUCode appendString: @" }\n"];
2287 [GPUCode appendString: @" }\n"];
2288
2289 [GPUCode appendString: @"}\n"];
2290
2291 return GPUCode;
2292 }
2293
2294 - (NSString *)functionKernelGPUCode: (NSString *)prefixName
2295 {
2296 int i;
2297 NSMutableString *GPUCode;
2298 int dims = [[super spatialModel] dimensions];
2299
2300 GPUCode = [NSMutableString new];
2301
2302 [GPUCode appendString: @"\n// SEM movement function used by Runge-Kutta solvers\n"];
2303 [GPUCode appendString: @"__device__ void\n"];
2304 if (dims == 2) [GPUCode appendFormat: @"%@_SEM_kernel_function(int modelNum, int elemNum, float *X_in, float *Y_in, float *X_out, float *Y_out, float mh, float mf)\n", prefixName];
2305 else [GPUCode appendFormat: @"%@_SEM_kernel_function(int modelNum, int elemNum, float *X_in, float *Y_in, float *Z_in, float *X_out, float *Y_out, float *Z_out, float mh, float mf)\n", prefixName];
2306 [GPUCode appendString: @"{\n"];
2307 [GPUCode appendFormat: @" int idx = modelNum*%@_sem_grids->idx_pitch+elemNum;\n", prefixName];
2308 [GPUCode appendString: @" int j, k, fIndex, fNum;\n"];
2309 [GPUCode appendString: @" float F_X1, F_X2;\n"];
2310 [GPUCode appendString: @" float F_Y1, F_Y2;\n"];
2311 if (dims == 3) [GPUCode appendString: @" float F_Z1, F_Z2;\n"];
2312 [GPUCode appendString: @" float r, V;\n"];
2313 [GPUCode appendString: @" float forceX = 0.0;\n"];
2314 [GPUCode appendString: @" float forceY = 0.0;\n"];
2315 if (dims == 3) [GPUCode appendString: @" float forceZ = 0.0;\n"];
2316
2317 [GPUCode appendString: @"\n // element position\n"];
2318 [GPUCode appendFormat: @" F_X1 = %@_sem_grids->X[idx] + mf * X_in[idx];\n", prefixName];
2319 [GPUCode appendFormat: @" F_Y1 = %@_sem_grids->Y[idx] + mf * Y_in[idx];\n", prefixName];
2320 if (dims == 3) [GPUCode appendFormat: @" F_Z1 = %@_sem_grids->Z[idx] + mf * Z_in[idx];\n", prefixName];
2321 [GPUCode appendFormat: @" int eType = %@_sem_grids->elementType[idx];\n", prefixName];
2322
2323 [GPUCode appendString: @"\n // forces between elements\n"];
2324 [GPUCode appendFormat: @" for (k = 0; k < %@_sem_grids->totalElements; ++k) {\n", prefixName];
2325 [GPUCode appendString: @" if (k == elemNum) continue;\n"];
2326 [GPUCode appendString: @"\n"];
2327
2328 [GPUCode appendString: @" fIndex = 0;\n"];
2329 [GPUCode appendFormat: @" fNum = %@_sem_grids->elementForceTable[FINDEX];\n", prefixName];
2330 [GPUCode appendString: @" while (fNum >= 0) {\n"];
2331 [GPUCode appendFormat: @" switch (%@_sem_grids->elementForceFlagTable[FINDEX]) {\n", prefixName];
2332 [GPUCode appendString: @" case FORCE_TYPE_INTRA: {\n"];
2333 [GPUCode appendString: @" // elements of same cell\n"];
2334 [GPUCode appendFormat: @" int oidx = modelNum*%@_sem_grids->idx_pitch+k;\n", prefixName];
2335 [GPUCode appendFormat: @" int oeType = %@_sem_grids->elementType[oidx];\n", prefixName];
2336 [GPUCode appendString: @"\n"];
2337
2338 [GPUCode appendFormat: @" if (oeType == %@_sem_grids->elementTypeTable[FINDEX]) {\n", prefixName];
2339 [GPUCode appendFormat: @" F_X2 = %@_sem_grids->X[oidx] + mf * X_in[oidx];\n", prefixName];
2340 [GPUCode appendFormat: @" F_Y2 = %@_sem_grids->Y[oidx] + mf * Y_in[oidx];\n", prefixName];
2341 if (dims == 3) [GPUCode appendFormat: @" F_Z2 = %@_sem_grids->Z[oidx] + mf * Z_in[oidx];\n", prefixName];
2342 [GPUCode appendString: @"\n"];
2343
2344 [GPUCode appendString: @" V = 0;\n"];
2345 [GPUCode appendFormat: @" switch (%@_sem_grids->elementForceTypeTable[FINDEX]) {\n", prefixName];
2346 [GPUCode appendString: @" case 0:\n"];
2347 [GPUCode appendString: @" // D_MORSE\n"];
2348 if (dims == 3) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, F_Z1, F_X2, F_Y2, F_Z2);\n"];
2349 if (dims == 2) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, 0.0, F_X2, F_Y2, 0.0);\n"];
2350 [GPUCode appendString: @" V = D_MORSE(r, PARAM_RHO, PARAM_EQUIL_SQ, PARAM_FACTOR);\n"];
2351 [GPUCode appendString: @" break;\n"];
2352 [GPUCode appendString: @"\n"];
2353
2354 [GPUCode appendString: @" case 1:\n"];
2355 [GPUCode appendString: @" // PD_MORSE\n"];
2356 if (dims == 3) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, F_Z1, F_X2, F_Y2, F_Z2);\n"];
2357 if (dims == 2) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, 0.0, F_X2, F_Y2, 0.0);\n"];
2358 [GPUCode appendString: @" V = PD_MORSE(r, PARAM_RHO, PARAM_EQUIL_SQ, PARAM_FACTOR);\n"];
2359 [GPUCode appendString: @" break;\n"];
2360 [GPUCode appendString: @"\n"];
2361
2362 [GPUCode appendString: @" case 2:\n"];
2363 [GPUCode appendString: @" // MORSE\n"];
2364 if (dims == 3) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, F_Z1, F_X2, F_Y2, F_Z2);\n"];
2365 if (dims == 2) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, 0.0, F_X2, F_Y2, 0.0);\n"];
2366 [GPUCode appendString: @" V = MORSE(r, PARAM_U0, PARAM_ETA0, PARAM_U1, PARAM_ETA1);\n"];
2367 [GPUCode appendString: @" break;\n"];
2368 [GPUCode appendString: @"\n"];
2369
2370 [GPUCode appendString: @" case 3:\n"];
2371 [GPUCode appendString: @" // PMORSE\n"];
2372 if (dims == 3) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, F_Z1, F_X2, F_Y2, F_Z2);\n"];
2373 if (dims == 2) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, 0.0, F_X2, F_Y2, 0.0);\n"];
2374 [GPUCode appendString: @" V = PMORSE(r, PARAM_U0, PARAM_ETA0, PARAM_U1, PARAM_ETA1);\n"];
2375 [GPUCode appendString: @" break;\n"];
2376 [GPUCode appendString: @" }\n"];
2377 [GPUCode appendString: @" forceX += V * (F_X1 - F_X2);\n"];
2378 [GPUCode appendString: @" forceY += V * (F_Y1 - F_Y2);\n"];
2379 if (dims == 3) [GPUCode appendString: @" forceZ += V * (F_Z1 - F_Z2);\n"];
2380
2381 [GPUCode appendString: @" }\n"];
2382 [GPUCode appendString: @" break;\n"];
2383 [GPUCode appendString: @" }\n"];
2384 [GPUCode appendString: @"\n"];
2385
2386 [GPUCode appendString: @" case FORCE_TYPE_INTER: {\n"];
2387 [GPUCode appendString: @" // elements of different cells\n"];
2388 [GPUCode appendString: @" break;\n"];
2389 [GPUCode appendString: @" }\n"];
2390 [GPUCode appendString: @"\n"];
2391
2392 [GPUCode appendString: @" case FORCE_TYPE_COLONY: {\n"];
2393 [GPUCode appendString: @" // elements of different colonies\n"];
2394 [GPUCode appendString: @" break;\n"];
2395 [GPUCode appendString: @" }\n"];
2396 [GPUCode appendString: @"\n"];
2397
2398 [GPUCode appendString: @" }\n"];
2399 [GPUCode appendString: @"\n"];
2400
2401 [GPUCode appendString: @"\n // next force\n"];
2402 [GPUCode appendString: @" ++fIndex;\n"];
2403 [GPUCode appendFormat: @" if (fIndex > %@_sem_grids->maxForces) fNum = -1;\n", prefixName];
2404 [GPUCode appendFormat: @" else fNum = %@_sem_grids->elementForceTable[FINDEX];\n", prefixName];
2405
2406 [GPUCode appendString: @" }\n"];
2407 [GPUCode appendString: @" }\n"];
2408
2409 [GPUCode appendString: @"\n // check singular forces\n"];
2410 [GPUCode appendString: @" fIndex = 0;\n"];
2411 [GPUCode appendFormat: @" fNum = %@_sem_grids->elementForceTable[FINDEX];\n", prefixName];
2412 [GPUCode appendString: @" while (fNum >= 0) {\n"];
2413 if (dims == 3) [GPUCode appendString: @" F_X2 = 0; F_Y2 = 0; F_Z2 = 0;\n"];
2414 if (dims == 2) [GPUCode appendString: @" F_X2 = 0; F_Y2 = 0;\n"];
2415 [GPUCode appendFormat: @" switch (%@_sem_grids->elementForceFlagTable[FINDEX]) {\n", prefixName];
2416 [GPUCode appendString: @" case FORCE_TYPE_CELL_CENTER: {\n"];
2417 [GPUCode appendString: @" // cell center\n"];
2418 [GPUCode appendFormat: @" int cidx = modelNum*%@_sem_grids->idx_cPitch+%@_sem_grids->cellNumber[idx];\n", prefixName, prefixName];
2419 [GPUCode appendFormat: @" F_X2 = %@_sem_grids->cellCenterX[cidx];\n", prefixName];
2420 [GPUCode appendFormat: @" F_Y2 = %@_sem_grids->cellCenterY[cidx];\n", prefixName];
2421 if (dims == 3) [GPUCode appendFormat: @" F_Z2 = %@_sem_grids->cellCenterZ[cidx];\n", prefixName];
2422 [GPUCode appendString: @" break;\n"];
2423 [GPUCode appendString: @" }\n"];
2424 [GPUCode appendString: @"\n"];
2425
2426 [GPUCode appendString: @" case FORCE_TYPE_ELEMENT_CENTER: {\n"];
2427 [GPUCode appendString: @" // element center\n"];
2428 [GPUCode appendFormat: @" int oeType = %@_sem_grids->elementTypeTable[FINDEX];\n", prefixName];
2429 [GPUCode appendFormat: @" int cidx = ((modelNum*%@_sem_grids->maxElementTypes)+oeType)*%@_sem_grids->idx_cPitch+%@_sem_grids->cellNumber[idx];\n", prefixName, prefixName, prefixName];
2430 [GPUCode appendFormat: @" F_X2 = %@_sem_grids->elementCenterX[cidx];\n", prefixName];
2431 [GPUCode appendFormat: @" F_Y2 = %@_sem_grids->elementCenterY[cidx];\n", prefixName];
2432 if (dims == 3) [GPUCode appendFormat: @" F_Z2 = %@_sem_grids->elementCenterZ[cidx];\n", prefixName];
2433 [GPUCode appendString: @" break;\n"];
2434 [GPUCode appendString: @" }\n"];
2435 [GPUCode appendString: @"\n"];
2436
2437 [GPUCode appendString: @" }\n"];
2438
2439 [GPUCode appendString: @" V = 0;\n"];
2440 [GPUCode appendFormat: @" switch (%@_sem_grids->elementForceTypeTable[FINDEX]) {\n", prefixName];
2441 [GPUCode appendString: @" case 0:\n"];
2442 [GPUCode appendString: @" // D_MORSE\n"];
2443 if (dims == 3) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, F_Z1, F_X2, F_Y2, F_Z2);\n"];
2444 if (dims == 2) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, 0.0, F_X2, F_Y2, 0.0);\n"];
2445 [GPUCode appendString: @" V = D_MORSE(r, PARAM_RHO, PARAM_EQUIL_SQ, PARAM_FACTOR);\n"];
2446 [GPUCode appendString: @" break;\n"];
2447 [GPUCode appendString: @"\n"];
2448
2449 [GPUCode appendString: @" case 1:\n"];
2450 [GPUCode appendString: @" // PD_MORSE\n"];
2451 if (dims == 3) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, F_Z1, F_X2, F_Y2, F_Z2);\n"];
2452 if (dims == 2) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, 0.0, F_X2, F_Y2, 0.0);\n"];
2453 [GPUCode appendString: @" V = PD_MORSE(r, PARAM_RHO, PARAM_EQUIL_SQ, PARAM_FACTOR);\n"];
2454 [GPUCode appendString: @" break;\n"];
2455 [GPUCode appendString: @"\n"];
2456
2457 [GPUCode appendString: @" case 2:\n"];
2458 [GPUCode appendString: @" // MORSE\n"];
2459 if (dims == 3) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, F_Z1, F_X2, F_Y2, F_Z2);\n"];
2460 if (dims == 2) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, 0.0, F_X2, F_Y2, 0.0);\n"];
2461 [GPUCode appendString: @" V = MORSE(r, PARAM_U0, PARAM_ETA0, PARAM_U1, PARAM_ETA1);\n"];
2462 [GPUCode appendString: @" break;\n"];
2463 [GPUCode appendString: @"\n"];
2464
2465 [GPUCode appendString: @" case 3:\n"];
2466 [GPUCode appendString: @" // PMORSE\n"];
2467 if (dims == 3) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, F_Z1, F_X2, F_Y2, F_Z2);\n"];
2468 if (dims == 2) [GPUCode appendString: @" r = dist2(F_X1, F_Y1, 0.0, F_X2, F_Y2, 0.0);\n"];
2469 [GPUCode appendString: @" V = PMORSE(r, PARAM_U0, PARAM_ETA0, PARAM_U1, PARAM_ETA1);\n"];
2470 [GPUCode appendString: @" break;\n"];
2471 [GPUCode appendString: @" }\n"];
2472 [GPUCode appendString: @"\n"];
2473
2474 [GPUCode appendString: @" forceX += V * (F_X1 - F_X2);\n"];
2475 [GPUCode appendString: @" forceY += V * (F_Y1 - F_Y2);\n"];
2476 if (dims == 3) [GPUCode appendString: @" forceZ += V * (F_Z1 - F_Z2);\n"];
2477
2478
2479 [GPUCode appendString: @"\n // next force\n"];
2480 [GPUCode appendString: @" ++fIndex;\n"];
2481 [GPUCode appendFormat: @" if (fIndex > %@_sem_grids->maxForces) fNum = -1;\n", prefixName];
2482 [GPUCode appendFormat: @" else fNum = %@_sem_grids->elementForceTable[FINDEX];\n", prefixName];
2483 [GPUCode appendString: @" }\n"];
2484
2485
2486 #if 0
2487 // loop through all the forces
2488 for (i = 0; i < [forces count]; ++i) {
2489 NSDictionary *d = [forces objectAtIndex: i];
2490
2491 //
2492 // INTRACELLULAR force, between elements within cell
2493 //
2494 if ([[d objectForKey: @"type"] isEqualToString: @"INTRA"]) {
2495 [GPUCode appendString: @"\n // intracellular\n"];
2496 [GPUCode appendFormat: @" for (k = 0; k < %@_sem_grids->numOfElements[cellNum]; ++k) {\n", prefixName];
2497 [GPUCode appendString: @" if (k == elemNum) continue;\n"];
2498 [GPUCode appendFormat: @" int oidx = k*%@_sem_grids->idx_pitch+cellNum;\n", prefixName];
2499 [GPUCode appendFormat: @" F_X2 = %@_sem_grids->X[oidx] + mf * X_in[oidx];\n", prefixName];
2500 [GPUCode appendFormat: @" F_Y2 = %@_sem_grids->Y[oidx] + mf * Y_in[oidx];\n", prefixName];
2501 if (dims == 3) [GPUCode appendFormat: @" F_Z2 = %@_sem_grids->Z[oidx] + mf * Z_in[oidx];\n", prefixName];
2502 [GPUCode appendString: @"\n"];
2503
2504 if (dims == 2) [GPUCode appendString: @" r = dist2d(F_X1, F_Y1, F_X2, F_Y2);\n"];
2505 else [GPUCode appendString: @" r = dist2(F_X1, F_Y1, F_Z1, F_X2, F_Y2, F_Z2);\n"];
2506
2507 if ([[d objectForKey: @"potential"] isEqualToString: @"D_MORSE"]
2508 || [[d objectForKey: @"potential"] isEqualToString: @"PD_MORSE"]) {
2509 [GPUCode appendFormat: @" V = %@(r, %@_%@_RHO, %@_%@_EQUIL_SQ, %@_%@_FACTOR);\n",
2510 [d objectForKey: @"potential"], prefixName, [d objectForKey: @"type"], prefixName,
2511 [d objectForKey: @"type"], prefixName, [d objectForKey: @"type"]];
2512 }
2513
2514 if ([[d objectForKey: @"potential"] isEqualToString: @"MORSE"]
2515 || [[d objectForKey: @"potential"] isEqualToString: @"PMORSE"]) {
2516 [GPUCode appendFormat: @" V = %@(r, %@_%@_U0, %@_%@_ETA0, %@_%@_U1, %@_%@_ETA1);\n",
2517 [d objectForKey: @"potential"], prefixName, [d objectForKey: @"type"], prefixName,
2518 [d objectForKey: @"type"], prefixName, [d objectForKey: @"type"], prefixName, [d objectForKey: @"type"]];
2519 }
2520
2521 [GPUCode appendString: @"\n"];
2522 [GPUCode appendString: @" forceX += V * (F_X1 - F_X2);\n"];
2523 [GPUCode appendString: @" forceY += V * (F_Y1 - F_Y2);\n"];
2524 if (dims == 3) [GPUCode appendString: @" forceZ += V * (F_Z1 - F_Z2);\n"];
2525
2526 // TODO: periodic boundary conditions
2527
2528 [GPUCode appendString: @" }\n"];
2529
2530 continue;
2531 }
2532
2533 //
2534 // INTERCELLULAR force, between elements of another cell
2535 //
2536 if ([[d objectForKey: @"type"] isEqualToString: @"INTER"]) {
2537 [GPUCode appendString: @"\n // intercellular\n"];
2538 [GPUCode appendFormat: @" for (j = 0; j < %@_sem_grids->numOfCells; ++j) {\n", prefixName];
2539 [GPUCode appendString: @" if (j == cellNum) continue;\n"];
2540
2541 // TODO: optimize
2542
2543 [GPUCode appendFormat: @" for (k = 0; k < %@_sem_grids->numOfElements[j]; ++k) {\n", prefixName];
2544 [GPUCode appendFormat: @" int oidx = k*%@_sem_grids->idx_pitch+j;\n", prefixName];
2545 [GPUCode appendFormat: @" F_X2 = %@_sem_grids->X[oidx] + mf * X_in[oidx];\n", prefixName];
2546 [GPUCode appendFormat: @" F_Y2 = %@_sem_grids->Y[oidx] + mf * Y_in[oidx];\n", prefixName];
2547 if (dims == 3) [GPUCode appendFormat: @" F_Z2 = %@_sem_grids->Z[oidx] + mf * Z_in[oidx];\n", prefixName];
2548 [GPUCode appendString: @"\n"];
2549
2550 if (dims == 2) [GPUCode appendString: @" r = dist2d(F_X1, F_Y1, F_X2, F_Y2);\n"];
2551 else [GPUCode appendString: @" r = dist2(F_X1, F_Y1, F_Z1, F_X2, F_Y2, F_Z2);\n"];
2552
2553 if ([[d objectForKey: @"potential"] isEqualToString: @"D_MORSE"]
2554 || [[d objectForKey: @"potential"] isEqualToString: @"PD_MORSE"]) {
2555 [GPUCode appendFormat: @" V = %@(r, %@_%@_RHO, %@_%@_EQUIL_SQ, %@_%@_FACTOR);\n",
2556 [d objectForKey: @"potential"], prefixName, [d objectForKey: @"type"], prefixName,
2557 [d objectForKey: @"type"], prefixName, [d objectForKey: @"type"]];
2558 }
2559
2560 if ([[d objectForKey: @"potential"] isEqualToString: @"MORSE"]
2561 || [[d objectForKey: @"potential"] isEqualToString: @"PMORSE"]) {
2562 [GPUCode appendFormat: @" V = %@(r, %@_%@_U0, %@_%@_ETA0, %@_%@_U1, %@_%@_ETA1);\n",
2563 [d objectForKey: @"potential"], prefixName, [d objectForKey: @"type"], prefixName,
2564 [d objectForKey: @"type"], prefixName, [d objectForKey: @"type"], prefixName, [d objectForKey: @"type"]];
2565 }
2566
2567 // TODO: repulse only flag
2568
2569 [GPUCode appendString: @"\n"];
2570 [GPUCode appendString: @" forceX += V * (F_X1 - F_X2);\n"];
2571 [GPUCode appendString: @" forceY += V * (F_Y1 - F_Y2);\n"];
2572 if (dims == 3) [GPUCode appendString: @" forceZ += V * (F_Z1 - F_Z2);\n"];
2573
2574 // TODO: periodic boundary conditions
2575
2576 [GPUCode appendString: @" }\n"];
2577 [GPUCode appendString: @" }\n"];
2578
2579 continue;
2580 }
2581
2582 #if 0
2583 //
2584 // BOUNDARY force, between elements and some boundary
2585 //
2586 // TODO: generalize
2587 if ([[d objectForKey: @"type"] isEqualToString: @"BOUNDARY"]) {
2588 //NSString *bm = [d objectForKey: @"boundary"];
2589 [GPUCode appendString: @"\n// boundary\n"];
2590 id et = [d objectForKey: @"element"];
2591 if (et) [GPUCode appendFormat: @"if (%@_sem_grids->elementType[idx] == %d) {\n", prefixName, [et intValue]];
2592
2593 if ([[d objectForKey: @"potential"] isEqualToString: @"D_MORSE"]
2594 || [[d objectForKey: @"potential"] isEqualToString: @"PD_MORSE"]) {
2595 [GPUCode appendFormat: @" r = %@_sem_grids->Z[idx] * %@_sem_grids->Z[idx];\n", prefixName, prefixName];
2596 [GPUCode appendFormat: @" V = %@(r, %@_%@_RHO, %@_%@_EQUIL_SQ, %@_%@_FACTOR);\n",
2597 [d objectForKey: @"potential"], prefixName, [d objectForKey: @"type"], prefixName,
2598 [d objectForKey: @"type"], prefixName, [d objectForKey: @"type"]];
2599 }
2600
2601 if ([[d objectForKey: @"potential"] isEqualToString: @"MORSE"]
2602 || [[d objectForKey: @"potential"] isEqualToString: @"PMORSE"]) {
2603 [GPUCode appendFormat: @" r = %@_sem_grids->Z[idx];\n", prefixName];
2604 [GPUCode appendFormat: @" V = %@(r, %@_%@_U0, %@_%@_ETA0, %@_%@_U1, %@_%@_ETA1);\n",
2605 [d objectForKey: @"potential"], prefixName, [d objectForKey: @"type"], prefixName,
2606 [d objectForKey: @"type"], prefixName, [d objectForKey: @"type"], prefixName, [d objectForKey: @"type"]];
2607 }
2608
2609 [GPUCode appendFormat: @" forceZ += V * %@_sem_grids->Z[idx];\n", prefixName, prefixName];
2610 if (et) [GPUCode appendString: @"}\n"];
2611 continue;
2612 }
2613 #endif
2614
2615 }
2616 #endif
2617
2618 [GPUCode appendString: @"\n // result\n"];
2619 [GPUCode appendString: @" X_out[idx] = mh * forceX;\n"];
2620 [GPUCode appendString: @" Y_out[idx] = mh * forceY;\n"];
2621 if (dims == 3) [GPUCode appendString: @" Z_out[idx] = mh * forceZ;\n"];
2622
2623 // TODO: boundary conditions
2624 #if 0
2625 #if PERIODIC_BOUNDARY
2626 if (X_F1[elemNum*pitch+cellNum] < 0.0) X_F1[elemNum*pitch+cellNum] = X_F1[elemNum*pitch+cellNum] + BOUNDARY_X;
2627 if (X_F1[elemNum*pitch+cellNum] > BOUNDARY_X) X_F1[elemNum*pitch+cellNum] = X_F1[elemNum*pitch+cellNum] - BOUNDARY_X;
2628 if (Y_F1[elemNum*pitch+cellNum] < 0.0) Y_F1[elemNum*pitch+cellNum] = Y_F1[elemNum*pitch+cellNum] + BOUNDARY_Y;
2629 if (Y_F1[elemNum*pitch+cellNum] > BOUNDARY_Y) Y_F1[elemNum*pitch+cellNum] = Y_F1[elemNum*pitch+cellNum] - BOUNDARY_Y;
2630 //if (Z_F1[elemNum*pitch+cellNum] < 0.0) Z_F1[elemNum*pitch+cellNum] = Z_F1[elemNum*pitch+cellNum] + BOUNDARY_Z;
2631 //if (Z_F1[elemNum*pitch+cellNum] > BOUNDARY_Z) Z_F1[elemNum*pitch+cellNum] = Z_F1[elemNum*pitch+cellNum] - BOUNDARY_Z;
2632 #endif
2633
2634 #if HARD_Z
2635 if (Z_F1[elemNum*pitch+cellNum] < 0.0) Z_F1[elemNum*pitch+cellNum] = 0.0;
2636 if (Z_F1[elemNum*pitch+cellNum] > BOUNDARY_Z) Z_F1[elemNum*pitch+cellNum] = BOUNDARY_Z;
2637 #endif
2638 #endif
2639
2640 [GPUCode appendString: @"}\n"];
2641
2642 return GPUCode;
2643 }
2644
2645 - (NSMutableString *)kernelGPUCode: (NSString *)prefixName
2646 {
2647 int dims = [[super spatialModel] dimensions];
2648
2649 NSMutableString *GPUCode = [NSMutableString new];
2650
2651 // kernel to calculate centers
2652 [GPUCode appendString: [self centerKernelGPUCode: prefixName]];
2653
2654 // kernel to calculate SEM movement function
2655 //[GPUCode appendString: [self functionKernelGPUCode: prefixName]];
2656
2657 // kernel for numerical method
2658 if ([numericalScheme isEqualToString: @"RungeKutta2ndOrder"]) {
2659 [GPUCode appendString: @"\n// F1 Kernel for 2nd order Runge-Kutta\n"];
2660 [GPUCode appendString: @"__global__ void\n"];
2661 [GPUCode appendFormat: @"%@_SEM_kernel1_2rk(float currentTime, float finalTime)\n", prefixName];
2662 [GPUCode appendString: @"{\n"];
2663 if (useMesh)
2664 [GPUCode appendFormat: @" BC_SEM_mesh_kernel1_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2665 else
2666 [GPUCode appendFormat: @" BC_SEM_kernel1_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2667 [GPUCode appendString: @"}\n"];
2668
2669 [GPUCode appendString: @"\n// F2 Kernel for 2nd order Runge-Kutta\n"];
2670 [GPUCode appendString: @"__global__ void\n"];
2671 [GPUCode appendFormat: @"%@_SEM_kernel2_2rk(float currentTime, float finalTime)\n", prefixName];
2672 [GPUCode appendString: @"{\n"];
2673 if (useMesh)
2674 [GPUCode appendFormat: @" BC_SEM_mesh_kernel2_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2675 else
2676 [GPUCode appendFormat: @" BC_SEM_kernel2_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2677 [GPUCode appendString: @"}\n"];
2678
2679 [GPUCode appendString: @"\n// F1 Kernel for 2nd order Runge-Kutta, pairwise algorithm\n"];
2680 [GPUCode appendString: @"__global__ void\n"];
2681 [GPUCode appendFormat: @"%@_SEM_pairwise_kernel1_2rk(float currentTime, float finalTime)\n", prefixName];
2682 [GPUCode appendString: @"{\n"];
2683 [GPUCode appendFormat: @" BC_SEM_pairwise_kernel1_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2684 [GPUCode appendString: @"}\n"];
2685
2686 [GPUCode appendString: @"\n// F2 Kernel for 2nd order Runge-Kutta, pairwise algorithm\n"];
2687 [GPUCode appendString: @"__global__ void\n"];
2688 [GPUCode appendFormat: @"%@_SEM_pairwise_kernel2_2rk(float currentTime, float finalTime)\n", prefixName];
2689 [GPUCode appendString: @"{\n"];
2690 [GPUCode appendFormat: @" BC_SEM_pairwise_kernel2_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2691 [GPUCode appendString: @"}\n"];
2692
2693 if (!useMesh) {
2694 [GPUCode appendString: @"\n// F1 Kernel for 2nd order Runge-Kutta, neighbor pair algorithm\n"];
2695 [GPUCode appendString: @"__global__ void\n"];
2696 [GPUCode appendFormat: @"%@_SEM_neighbor_kernel1_2rk(float currentTime, float finalTime)\n", prefixName];
2697 [GPUCode appendString: @"{\n"];
2698 [GPUCode appendFormat: @" BC_SEM_neighbor_kernel1_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2699 [GPUCode appendString: @"}\n"];
2700
2701 [GPUCode appendString: @"\n// Sum forces for F1\n"];
2702 [GPUCode appendString: @"__global__ void\n"];
2703 [GPUCode appendFormat: @"%@_SEM_neighbor_sum_kernel1_2rk(float currentTime, float finalTime)\n", prefixName];
2704 [GPUCode appendString: @"{\n"];
2705 [GPUCode appendFormat: @" BC_SEM_neighbor_sum_kernel1_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2706 [GPUCode appendString: @"}\n"];
2707
2708 [GPUCode appendString: @"\n// F2 Kernel for 2nd order Runge-Kutta, neighbor pair algorithm\n"];
2709 [GPUCode appendString: @"__global__ void\n"];
2710 [GPUCode appendFormat: @"%@_SEM_neighbor_kernel2_2rk(float currentTime, float finalTime)\n", prefixName];
2711 [GPUCode appendString: @"{\n"];
2712 [GPUCode appendFormat: @" BC_SEM_neighbor_kernel2_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2713 [GPUCode appendString: @"}\n"];
2714
2715 [GPUCode appendString: @"\n// Sum forces for F2 and update element positions\n"];
2716 [GPUCode appendString: @"__global__ void\n"];
2717 [GPUCode appendFormat: @"%@_SEM_neighbor_update_kernel_2rk(float currentTime, float finalTime)\n", prefixName];
2718 [GPUCode appendString: @"{\n"];
2719 [GPUCode appendFormat: @" BC_SEM_neighbor_update_kernel_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2720 [GPUCode appendString: @"}\n"];
2721
2722 [GPUCode appendString: @"\n// Determine if need new sectors\n"];
2723 [GPUCode appendString: @"__global__ void\n"];
2724 [GPUCode appendFormat: @"%@_SEM_sector_count_kernel(float currentTime, float finalTime)\n", prefixName];
2725 [GPUCode appendString: @"{\n"];
2726 [GPUCode appendFormat: @" BC_SEM_sector_count_kernel(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2727 [GPUCode appendString: @"}\n"];
2728
2729 [GPUCode appendString: @"\n// Create new sectors\n"];
2730 [GPUCode appendString: @"__global__ void\n"];
2731 [GPUCode appendFormat: @"%@_SEM_create_sectors_kernel(float currentTime, float finalTime)\n", prefixName];
2732 [GPUCode appendString: @"{\n"];
2733 [GPUCode appendFormat: @" BC_SEM_create_sectors_kernel(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2734 [GPUCode appendString: @"}\n"];
2735
2736 [GPUCode appendString: @"\n// Update sector neighbor table\n"];
2737 [GPUCode appendString: @"__global__ void\n"];
2738 [GPUCode appendFormat: @"%@_SEM_build_sector_neighbors_kernel(float currentTime, float finalTime)\n", prefixName];
2739 [GPUCode appendString: @"{\n"];
2740 [GPUCode appendFormat: @" BC_SEM_build_sector_neighbors_kernel(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2741 [GPUCode appendString: @"}\n"];
2742
2743 [GPUCode appendString: @"\n// Move elements to their sectors\n"];
2744 [GPUCode appendString: @"__global__ void\n"];
2745 [GPUCode appendFormat: @"%@_SEM_move_elements_kernel(float currentTime, float finalTime)\n", prefixName];
2746 [GPUCode appendString: @"{\n"];
2747 [GPUCode appendFormat: @" BC_SEM_move_elements_kernel(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2748 [GPUCode appendString: @"}\n"];
2749
2750 [GPUCode appendString: @"\n// Update sector tables\n"];
2751 [GPUCode appendString: @"__global__ void\n"];
2752 [GPUCode appendFormat: @"%@_SEM_update_element_sector_kernel(float currentTime, float finalTime)\n", prefixName];
2753 [GPUCode appendString: @"{\n"];
2754 [GPUCode appendFormat: @" BC_SEM_update_element_sector_kernel(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2755 [GPUCode appendString: @"}\n"];
2756
2757 [GPUCode appendString: @"\n// Update element neighbor table\n"];
2758 [GPUCode appendString: @"__global__ void\n"];
2759 [GPUCode appendFormat: @"%@_SEM_update_element_neighbor_kernel(float currentTime, float finalTime)\n", prefixName];
2760 [GPUCode appendString: @"{\n"];
2761 [GPUCode appendFormat: @" BC_SEM_update_element_neighbor_kernel(%@_sem_grids, currentTime, finalTime);\n", prefixName];
2762 [GPUCode appendString: @"}\n"];
2763 }
2764
2765 } else {
2766 printf("ERROR: unknown numerical scheme: %s\n", [numericalScheme UTF8String]);
2767 return nil;
2768 }
2769
2770 return GPUCode;
2771 }
2772
2773 - (NSMutableString *)cleanupGPUCode: (NSString *)prefixName
2774 {
2775 return nil;
2776 }
2777
2778 @end
2779
2780 //
2781 // Run GPU code
2782 //
2783 @implementation SubcellularElementModel (GPURun)
2784
2785 - (void *)allocGPUData: (int)numModels withGPUFunctions: (void *)gpuFunctions
2786 {
2787 ModelGPUData *gpuData = (ModelGPUData *)malloc(sizeof(ModelGPUData));
2788 int dims = [[super spatialModel] dimensions];
2789
2790 id gpuElementData = [[MultiScale2DGrid alloc] initWithWidth: maxElements andHeight: numModels];
2791 [gpuElementData newGridWithName: @"X" atScale: 1 withEncode: dataEncode];
2792 [gpuElementData newGridWithName: @"Y" atScale: 1 withEncode: dataEncode];
2793 [gpuElementData newGridWithName: @"Z" atScale: 1 withEncode: dataEncode];
2794 [gpuElementData newGridWithName: @"type" atScale: 1 withEncode: [BioSwarmModel intEncode]];
2795 [gpuElementData newGridWithName: @"cell" atScale: 1 withEncode: [BioSwarmModel intEncode]];
2796 [gpuElementData newGridWithName: @"sector" atScale: 1 withEncode: [BioSwarmModel intEncode]];
2797 gpuData->data = gpuElementData;
2798 gpuData->numModels = numModels;
2799 gpuData->numParams = [self numOfParameters];
2800 gpuData->gpuFunctions = gpuFunctions;
2801
2802 gpuData->parameters = (float *)malloc(sizeof(float) * numModels * gpuData->numParams);
2803 NSMutableDictionary *d = [NSMutableDictionary new];
2804 gpuData->data2 = (void *)d;
2805
2806 printf("numOfSpecies = %d\n", numOfSpecies);
2807 NSValue *v;
2808 if (numOfSpecies) {
2809 float *sd = (float *)malloc(sizeof(float) * numModels * numOfSpecies);
2810 v = [NSValue value: &sd withObjCType: @encode(float *)];
2811 [d setObject: v forKey: @"speciesData"];
2812 }
2813
2814 int *sc = (int *)malloc(sizeof(int) * 3 * maxSectors * numModels);
2815 v = [NSValue value: &sc withObjCType: @encode(int *)];
2816 [d setObject: v forKey: @"sectorCoordinates"];
2817 //printf("sectorCoordinates = %p\n", sc);
2818
2819 sc = (int *)malloc(sizeof(int) * numModels);
2820 v = [NSValue value: &sc withObjCType: @encode(int *)];
2821 [d setObject: v forKey: @"numSectors"];
2822
2823 float *sd = (float *)malloc(sizeof(float) * numModels);
2824 v = [NSValue value: &sd withObjCType: @encode(float *)];
2825 [d setObject: v forKey: @"sectorSize"];
2826
2827 sc = (int *)malloc(sizeof(int) * maxElementsPerSector * maxSectors * numModels);
2828 v = [NSValue value: &sc withObjCType: @encode(int *)];
2829 [d setObject: v forKey: @"sectorElementTable"];
2830
2831 int nSize;
2832 if (dims == 2) nSize = S_XY_TOT;
2833 else nSize = S_XYZ_TOT;
2834
2835 sc = (int *)malloc(sizeof(int) * maxSectors * nSize * numModels);
2836 v = [NSValue value: &sc withObjCType: @encode(int *)];
2837 [d setObject: v forKey: @"sectorNeighborTable"];
2838 //printf("sectorNeighborTable = %p\n", sc);
2839 #if 0
2840 sc = (int *)malloc(sizeof(int) * maxElements * nSize * maxElementsPerSector * numModels);
2841 v = [NSValue value: &sc withObjCType: @encode(int *)];
2842 [d setObject: v forKey: @"elementNeighborTable"];
2843 //printf("elementNeighborTable = %p\n", sc);
2844 #endif
2845 sc = (int *)malloc(sizeof(int) * maxSectors * numModels);
2846 v = [NSValue value: &sc withObjCType: @encode(int *)];
2847 [d setObject: v forKey: @"readOnlySector"];
2848
2849 return gpuData;
2850 }
2851
2852 - (void)freeGPUData: (void *)data
2853 {
2854 ModelGPUData *gpuData = (ModelGPUData *)data;
2855
2856 if (gpuData) {
2857 id m = gpuData->data;
2858 if (m) [m release];
2859 if (gpuData->parameters) free(gpuData->parameters);
2860 if (gpuData->data2) {
2861 NSMutableDictionary *d = (NSMutableDictionary *)gpuData->data2;
2862 void *sd = NULL;
2863 NSValue *v = [d objectForKey: @"speciesData"];
2864 if (v) [v getValue: &sd];
2865 if (sd) free(sd);
2866
2867 sd = NULL;
2868 v = [d objectForKey: @"sectorCoordinates"];
2869 if (v) [v getValue: &sd];
2870 if (sd) free(sd);
2871
2872 sd = NULL;
2873 v = [d objectForKey: @"numSectors"];
2874 if (v) [v getValue: &sd];
2875 if (sd) free(sd);
2876
2877 sd = NULL;
2878 v = [d objectForKey: @"sectorSize"];
2879 if (v) [v getValue: &sd];
2880 if (sd) free(sd);
2881
2882 sd = NULL;
2883 v = [d objectForKey: @"sectorElementTable"];
2884 if (v) [v getValue: &sd];
2885 if (sd) free(sd);
2886
2887 sd = NULL;
2888 v = [d objectForKey: @"sectorNeighborTable"];
2889 if (v) [v getValue: &sd];
2890 if (sd) free(sd);
2891 #if 0
2892 sd = NULL;
2893 v = [d objectForKey: @"elementNeighborTable"];
2894 if (v) [v getValue: &sd];
2895 if (sd) free(sd);
2896 #endif
2897 sd = NULL;
2898 v = [d objectForKey: @"readOnlySector"];
2899 if (v) [v getValue: &sd];
2900 if (sd) free(sd);
2901
2902 [d release];
2903 }
2904
2905 free(gpuData);
2906 }
2907 }
2908
2909 - (void)assignData: (void *)data ofNumber: (int)aNum toGPU: (BOOL)aFlag
2910 {
2911 ModelGPUData *gpuData = (ModelGPUData *)data;
2912 int dims = [[super spatialModel] dimensions];
2913 int i, j, k;
2914
2915 Grid2DArray *X = [[elementData gridWithName: @"X"] objectForKey: @"matrix"];
2916 Grid2DArray *Y = [[elementData gridWithName: @"Y"] objectForKey: @"matrix"];
2917 Grid2DArray *Z = [[elementData gridWithName: @"Z"] objectForKey: @"matrix"];
2918 Grid2DArray *T = [[elementData gridWithName: @"type"] objectForKey: @"matrix"];
2919 Grid2DArray *C = [[elementData gridWithName: @"cell"] objectForKey: @"matrix"];
2920 Grid2DArray *S = [[elementData gridWithName: @"sector"] objectForKey: @"matrix"];
2921 float (*xGrid)[1][maxElements] = [X getMatrix];
2922 float (*yGrid)[1][maxElements] = [Y getMatrix];
2923 float (*zGrid)[1][maxElements] = [Z getMatrix];
2924 int (*etGrid)[1][maxElements] = [T getMatrix];
2925 int (*cGrid)[1][maxElements] = [C getMatrix];
2926 int (*sGrid)[1][maxElements] = [S getMatrix];
2927
2928 id gpuElementData = gpuData->data;
2929 Grid2DArray *gX = [[gpuElementData gridWithName: @"X"] objectForKey: @"matrix"];
2930 Grid2DArray *gY = [[gpuElementData gridWithName: @"Y"] objectForKey: @"matrix"];
2931 Grid2DArray *gZ = [[gpuElementData gridWithName: @"Z"] objectForKey: @"matrix"];
2932 Grid2DArray *gT = [[gpuElementData gridWithName: @"type"] objectForKey: @"matrix"];
2933 Grid2DArray *gC = [[gpuElementData gridWithName: @"cell"] objectForKey: @"matrix"];
2934 Grid2DArray *gS = [[gpuElementData gridWithName: @"sector"] objectForKey: @"matrix"];
2935 float (*gxGrid)[gpuData->numModels][maxElements] = [gX getMatrix];
2936 float (*gyGrid)[gpuData->numModels][maxElements] = [gY getMatrix];
2937 float (*gzGrid)[gpuData->numModels][maxElements] = [gZ getMatrix];
2938 int (*getGrid)[gpuData->numModels][maxElements] = [gT getMatrix];
2939 int (*gcGrid)[gpuData->numModels][maxElements] = [gC getMatrix];
2940 int (*gsGrid)[gpuData->numModels][maxElements] = [gS getMatrix];
2941
2942 float *indData = speciesData;
2943 NSMutableDictionary *d = (NSMutableDictionary *)gpuData->data2;
2944 void *sd = NULL;
2945 NSValue *v = [d objectForKey: @"speciesData"];
2946 if (v) [v getValue: &sd];
2947 float (*hostData)[numOfSpecies][gpuData->numModels] = sd;
2948
2949 sd = NULL;
2950 v = [d objectForKey: @"sectorCoordinates"];
2951 if (v) [v getValue: &sd];
2952 int (*hostSectorCoordinates)[3 * gpuData->numModels][maxSectors] = sd;
2953 int (*sCoordinates)[3][maxSectors] = (void *)sectorCoordinates;
2954
2955 sd = NULL;
2956 v = [d objectForKey: @"numSectors"];
2957 if (v) [v getValue: &sd];
2958 int (*hostNumSectors)[gpuData->numModels] = sd;
2959
2960 sd = NULL;
2961 v = [d objectForKey: @"sectorSize"];
2962 if (v) [v getValue: &sd];
2963 float (*hostSectorSize)[gpuData->numModels] = sd;
2964
2965 sd = NULL;
2966 v = [d objectForKey: @"sectorElementTable"];
2967 if (v) [v getValue: &sd];
2968 int (*hostSectorElementTable)[maxElementsPerSector * gpuData->numModels][maxSectors] = sd;
2969 int (*seTable)[maxElementsPerSector][maxSectors] = (void *)sectorElementTable;
2970
2971 int nSize;
2972 if (dims == 2) nSize = S_XY_TOT;
2973 else nSize = S_XYZ_TOT;
2974
2975 sd = NULL;
2976 v = [d objectForKey: @"sectorNeighborTable"];
2977 if (v) [v getValue: &sd];
2978 //printf("sectorNeighborTable = %p\n", sd);
2979 int (*hostSectorNeighborTable)[nSize * gpuData->numModels][maxSectors] = sd;
2980 int (*snTable)[nSize][maxSectors] = (void *)sectorNeighborTable;
2981
2982 #if 0
2983 sd = NULL;
2984 v = [d objectForKey: @"elementNeighborTable"];
2985 if (v) [v getValue: &sd];
2986 //printf("elementNeighborTable = %p\n", sd);
2987 int (*hostElementNeighborTable)[nSize * maxElementsPerSector * gpuData->numModels][maxElements] = sd;
2988 int (*enTable)[nSize * maxElementsPerSector][maxElements] = (void *)elementNeighborTable;
2989 #endif
2990 sd = NULL;
2991 v = [d objectForKey: @"readOnlySector"];
2992 if (v) [v getValue: &sd];
2993 int (*hostReadOnlySector)[gpuData->numModels][maxSectors] = sd;
2994 int (*roSector)[maxSectors] = (void *)sectorReadOnlyFlag;
2995
2996 if (aFlag) {
2997 // transfer to GPU
2998 for (j = 0; j < totalElements; ++j) {
2999 (*gxGrid)[aNum][j] = (*xGrid)[0][j];
3000 (*gyGrid)[aNum][j] = (*yGrid)[0][j];
3001 (*gzGrid)[aNum][j] = (*zGrid)[0][j];
3002 (*getGrid)[aNum][j] = (*etGrid)[0][j];
3003 (*gcGrid)[aNum][j] = (*cGrid)[0][j];
3004 (*gsGrid)[aNum][j] = (*sGrid)[0][j];
3005 #if 0
3006 for (i = 0; i < nSize; ++i) {
3007 for (k = 0; k < maxElementsPerSector; ++k) {
3008 (*hostElementNeighborTable)[(nSize * maxElementsPerSector * aNum) + (maxElementsPerSector * i) + k][j] = (*enTable)[(maxElementsPerSector * i) + k][j];
3009 }
3010 }
3011 #endif
3012 }
3013 if (numOfSpecies) for (j = 0; j < numOfSpecies; ++j) (*hostData)[j][aNum] = indData[j];
3014
3015 (*hostSectorSize)[aNum] = sectorSize;
3016 (*hostNumSectors)[aNum] = numSectors;
3017 for (i = 0; i < maxSectors; ++i) {
3018 (*hostSectorCoordinates)[0 + (3 * aNum)][i] = (*sCoordinates)[0][i];
3019 (*hostSectorCoordinates)[1 + (3 * aNum)][i] = (*sCoordinates)[1][i];
3020 (*hostSectorCoordinates)[2 + (3 * aNum)][i] = (*sCoordinates)[2][i];
3021 //printf("sector (%d) coordinates (%d, %d, %d)\n", i, (*hostSectorCoordinates)[0 + (3 * aNum)][i], (*hostSectorCoordinates)[1 + (3 * aNum)][i], (*hostSectorCoordinates)[2 + (3 * aNum)][i]);
3022 for (j = 0; j < maxElementsPerSector; ++j) (*hostSectorElementTable)[j + (maxElementsPerSector * aNum)][i] = (*seTable)[j][i];
3023 for (j = 0; j < nSize; ++j) (*hostSectorNeighborTable)[j + (nSize * aNum)][i] = (*snTable)[j][i];
3024 (*hostReadOnlySector)[aNum][i] = (*roSector)[i];
3025 }
3026 } else {
3027 // transfer from GPU
3028 for (j = 0; j < totalElements; ++j) {
3029 (*xGrid)[0][j] = (*gxGrid)[aNum][j];
3030 (*yGrid)[0][j] = (*gyGrid)[aNum][j];
3031 (*zGrid)[0][j] = (*gzGrid)[aNum][j];
3032 (*etGrid)[0][j] = (*getGrid)[aNum][j];
3033 (*cGrid)[0][j] = (*gcGrid)[aNum][j];
3034 (*sGrid)[0][j] = (*gsGrid)[aNum][j];
3035 #if 0
3036 for (i = 0; i < nSize; ++i) {
3037 for (k = 0; k < maxElementsPerSector; ++k) {
3038 (*enTable)[(maxElementsPerSector * i) + k][j] = (*hostElementNeighborTable)[(nSize * maxElementsPerSector * aNum) + (maxElementsPerSector * i) + k][j];
3039 }
3040 }
3041 #endif
3042 }
3043 if (numOfSpecies) for (j = 0; j < numOfSpecies; ++j) indData[j] = (*hostData)[j][aNum];
3044
3045 sectorSize = (*hostSectorSize)[aNum];
3046 numSectors = (*hostNumSectors)[aNum];
3047 for (i = 0; i < maxSectors; ++i) {
3048 (*sCoordinates)[0][i] = (*hostSectorCoordinates)[0 + (3 * aNum)][i];
3049 (*sCoordinates)[1][i] = (*hostSectorCoordinates)[1 + (3 * aNum)][i];
3050 (*sCoordinates)[2][i] = (*hostSectorCoordinates)[2 + (3 * aNum)][i];
3051 for (j = 0; j < maxElementsPerSector; ++j) (*seTable)[j][i] = (*hostSectorElementTable)[j + (maxElementsPerSector * aNum)][i];
3052 for (j = 0; j < nSize; ++j) (*snTable)[j][i] = (*hostSectorNeighborTable)[j + (nSize * aNum)][i];
3053 }
3054 }
3055 #if 0
3056 for (i = 0; i < gpuData->numModels; ++i) {
3057 for (j = 0; j < totalElements; ++j) {
3058 printf("%d %d: %f %f %f\n", i, j, (*gxGrid)[i][j], (*gyGrid)[i][j], (*gzGrid)[i][j]);
3059 }
3060 }
3061 #endif
3062 }
3063
3064 - (int)assignParameters: (void *)data ofNumber: (int)aNum toGPU: (BOOL)aFlag
3065 {
3066 ModelGPUData *gpuData = (ModelGPUData *)data;
3067 int i, p = 0;
3068 NSString *s;
3069
3070 float (*hostParameters)[gpuData->numParams][gpuData->numModels] = (void *)gpuData->parameters;
3071
3072 if (aFlag) {
3073 (*hostParameters)[p][aNum] = interactionMaxSquared;
3074 ++p;
3075
3076 for (i = 0; i < [forces count]; ++i) {
3077 NSMutableDictionary *d = [forces objectAtIndex: i];
3078 NSString *forceName = [d objectForKey: @"name"];
3079
3080 if ([[d objectForKey: @"potential"] isEqualToString: @"D_MORSE"]
3081 || [[d objectForKey: @"potential"] isEqualToString: @"PD_MORSE"]) {
3082 (*hostParameters)[p][aNum] = [[d objectForKey: @"rho"] floatValue];
3083 printf("assign parameter (%d, %d) rho = %f\n", p, aNum, (*hostParameters)[p][aNum]);
3084 ++p;
3085
3086 (*hostParameters)[p][aNum] = [[d objectForKey: @"factor"] floatValue];
3087 printf("assign parameter (%d, %d) factor = %f\n", p, aNum, (*hostParameters)[p][aNum]);
3088 ++p;
3089
3090 (*hostParameters)[p][aNum] = [[d objectForKey: @"eqSquared"] floatValue];
3091 printf("assign parameter (%d, %d) eqSquared = %f\n", p, aNum, (*hostParameters)[p][aNum]);
3092 ++p;
3093 }
3094
3095 if ([[d objectForKey: @"potential"] isEqualToString: @"MORSE"]
3096 || [[d objectForKey: @"potential"] isEqualToString: @"PMORSE"]) {
3097 //printf("force: %s\n", [[d description] UTF8String]);
3098 s = [NSString stringWithFormat: @"%@_U0", forceName];
3099 (*hostParameters)[p][aNum] = [[d objectForKey: s] floatValue];
3100 printf("assign parameter (%d, %d) U0 = %f\n", p, aNum, (*hostParameters)[p][aNum]);
3101 ++p;
3102
3103 s = [NSString stringWithFormat: @"%@_U1", forceName];
3104 (*hostParameters)[p][aNum] = [[d objectForKey: s] floatValue];
3105 printf("assign parameter (%d, %d) U1 = %f\n", p, aNum, (*hostParameters)[p][aNum]);
3106 ++p;
3107
3108 s = [NSString stringWithFormat: @"%@_ETA0", forceName];
3109 (*hostParameters)[p][aNum] = [[d objectForKey: s] floatValue];
3110 printf("assign parameter (%d, %d) ETA0 = %f\n", p, aNum, (*hostParameters)[p][aNum]);
3111 ++p;
3112
3113 s = [NSString stringWithFormat: @"%@_ETA1", forceName];
3114 (*hostParameters)[p][aNum] = [[d objectForKey: s] floatValue];
3115 printf("assign parameter (%d, %d) ETA1 = %f\n", p, aNum, (*hostParameters)[p][aNum]);
3116 ++p;
3117 }
3118 }
3119
3120 } else {
3121 }
3122
3123 return p;
3124 }
3125
3126
3127 - (void)allocGPUKernel: (void *)data
3128 {
3129 ModelGPUData *gpuData = (ModelGPUData *)data;
3130 SEMfunctions *gpuFunctions = (SEMfunctions *)gpuData->gpuFunctions;
3131 gpuData->gpuPtrs = (gpuFunctions->allocGPUKernel)(self, [[super spatialModel] dimensions], gpuData->numModels, maxCells, maxElements, maxElementTypes, maxForces, maxSectors, maxElementsPerSector, gpuData->numParams, (int)calcCellCenters, (int)calcElementCenters, numOfSpecies, (int)useMesh, (float)dt);
3132 }
3133
3134 - (void)transferGPUKernel: (void *)data toGPU: (BOOL)aFlag
3135 {
3136 ModelGPUData *gpuData = (ModelGPUData *)data;
3137 SEMfunctions *gpuFunctions = (SEMfunctions *)gpuData->gpuFunctions;
3138 id gpuElementData = gpuData->data;
3139 Grid2DArray *gX = [[gpuElementData gridWithName: @"X"] objectForKey: @"matrix"];
3140 Grid2DArray *gY = [[gpuElementData gridWithName: @"Y"] objectForKey: @"matrix"];
3141 Grid2DArray *gZ = [[gpuElementData gridWithName: @"Z"] objectForKey: @"matrix"];
3142 Grid2DArray *gT = [[gpuElementData gridWithName: @"type"] objectForKey: @"matrix"];
3143 Grid2DArray *gC = [[gpuElementData gridWithName: @"cell"] objectForKey: @"matrix"];
3144 Grid2DArray *gS = [[gpuElementData gridWithName: @"sector"] objectForKey: @"matrix"];
3145 NSMutableDictionary *d = (NSMutableDictionary *)gpuData->data2;
3146
3147 void *hostSpeciesData = NULL;
3148 NSValue *v = [d objectForKey: @"speciesData"];
3149 if (v) [v getValue: &hostSpeciesData];
3150
3151 void *hostSectorCoordinates = NULL;
3152 v = [d objectForKey: @"sectorCoordinates"];
3153 if (v) [v getValue: &hostSectorCoordinates];
3154
3155 void *hostNumSectors = NULL;
3156 v = [d objectForKey: @"numSectors"];
3157 if (v) [v getValue: &hostNumSectors];
3158
3159 void *hostSectorSize = NULL;
3160 v = [d objectForKey: @"sectorSize"];
3161 if (v) [v getValue: &hostSectorSize];
3162
3163 void *hostSectorElementTable = NULL;
3164 v = [d objectForKey: @"sectorElementTable"];
3165 if (v) [v getValue: &hostSectorElementTable];
3166
3167 void *hostSectorNeighborTable = NULL;
3168 v = [d objectForKey: @"sectorNeighborTable"];
3169 if (v) [v getValue: &hostSectorNeighborTable];
3170 #if 0
3171 void *hostElementNeighborTable = NULL;
3172 v = [d objectForKey: @"elementNeighborTable"];
3173 if (v) [v getValue: &hostElementNeighborTable];
3174 #endif
3175 void *hostReadOnlySector = NULL;
3176 v = [d objectForKey: @"readOnlySector"];
3177 if (v) [v getValue: &hostReadOnlySector];
3178
3179 #if 0
3180 int dims = [[super spatialModel] dimensions];
3181 int nSize, i;
3182 if (dims == 2) nSize = S_XY_TOT;
3183 else nSize = S_XYZ_TOT;
3184 int (*enTable)[nSize * maxElementsPerSector][maxElements] = (void *)hostElementNeighborTable;
3185 for (i = 0; i < (nSize * maxElementsPerSector); ++i) {
3186 if ( (*enTable)[i][0] != -1) printf("transfer en = (%d, %d)\n", i, (*enTable)[i][0]);
3187 }
3188 #endif
3189
3190 (gpuFunctions->initGPUKernel)(self, gpuData->gpuPtrs, (int)aFlag, gpuData->parameters, [gC getMatrix], numOfCells, totalElements,
3191 [gX getMatrix], [gY getMatrix], [gZ getMatrix], [gT getMatrix], [gS getMatrix],
3192 NULL, elementForceTable, elementForceTypeTable, elementTypeTable, elementForceFlagTable,
3193 hostSpeciesData, meshNeighbors,
3194 hostSectorCoordinates, hostNumSectors, hostSectorSize, hostSectorElementTable, hostSectorNeighborTable, hostReadOnlySector);
3195 }
3196
3197 - (void)invokeGPUKernel: (void *)data currentTime: (double)currentTime endTime: (double)endTime
3198 {
3199 ModelGPUData *gpuData = (ModelGPUData *)data;
3200 SEMfunctions *gpuFunctions = (SEMfunctions *)gpuData->gpuFunctions;
3201
3202 (gpuFunctions->invokeGPUKernel)(self, gpuData->gpuPtrs, currentTime, endTime);
3203 }
3204
3205 - (void)releaseGPUKernel: (void *)data
3206 {
3207 ModelGPUData *gpuData = (ModelGPUData *)data;
3208 SEMfunctions *gpuFunctions = (SEMfunctions *)gpuData->gpuFunctions;
3209 (gpuFunctions->releaseGPUKernel)(self, gpuData->gpuPtrs);
3210 }
3211
3212 @end
3213
3214 //
3215 // Visualization
3216 //
3217
3218 @implementation SubcellularElementModel (Povray)
3219
3220 - (NSMutableString *)povrayCode
3221 {
3222 int i, j;
3223
3224 Grid2DArray *X = [[elementData gridWithName: @"X"] objectForKey: @"matrix"];
3225 Grid2DArray *Y = [[elementData gridWithName: @"Y"] objectForKey: @"matrix"];
3226 Grid2DArray *Z = [[elementData gridWithName: @"Z"] objectForKey: @"matrix"];
3227 Grid2DArray *T = [[elementData gridWithName: @"type"] objectForKey: @"matrix"];
3228 Grid2DArray *C = [[elementData gridWithName: @"cell"] objectForKey: @"matrix"];
3229
3230 float (*xGrid)[0][maxElements] = [X getMatrix];
3231 float (*yGrid)[0][maxElements] = [Y getMatrix];
3232 float (*zGrid)[0][maxElements] = [Z getMatrix];
3233 int (*etGrid)[0][maxElements] = [T getMatrix];
3234 int (*cGrid)[0][maxElements] = [C getMatrix];
3235
3236 NSDictionary *povraySpec = [model objectForKey: @"povray"];
3237 BOOL translateOrigin = NO;
3238 id s = [povraySpec objectForKey: @"translateToOrigin"];
3239 if (s) translateOrigin = [s boolValue];
3240
3241 BOOL blob = YES;
3242 s = [povraySpec objectForKey: @"blob"];
3243 if (s) blob = [s boolValue];
3244
3245 NSString *shape = @"sphere";
3246 s = [povraySpec objectForKey: @"shape"];
3247 if (s) shape = s;
3248
3249 double radius = 0.12;
3250 s = [povraySpec objectForKey: @"radius"];
3251 if (s) radius = [s doubleValue];
3252
3253 NSMutableString *povray = [NSMutableString new];
3254
3255 // get max cell number
3256 int maxNum = -1;
3257 for (i = 0; i < totalElements; ++i) {
3258 if ((*cGrid)[0][i] > maxNum) maxNum = (*cGrid)[0][i];
3259 }
3260
3261 double transX = 0, transY = 0, transZ = 0;
3262 if (translateOrigin) {
3263 transX = 0 - (*xGrid)[0][0];
3264 transY = 0 - (*yGrid)[0][0];
3265 transZ = 0 - (*zGrid)[0][0];
3266 }
3267
3268 for (j = 0; j <= maxNum; ++j) {
3269 BOOL first = YES;
3270 for (i = 0; i < totalElements; ++i) {
3271 if ((*cGrid)[0][i] == j) {
3272 if (first) {
3273 if (blob) {
3274 [povray appendString: @"blob {\n"];
3275 [povray appendString: @" threshold 0.1\n"];
3276 }
3277 //[povray appendString: @" threshold 0.5\n"];
3278 first = NO;
3279 }
3280
3281 if ([shape isEqualToString: @"sphere"]) {
3282 [povray appendString: @" sphere {\n"];
3283 [povray appendFormat: @" <%f, %f, %f>, %f", (*xGrid)[0][i] + transX, (*yGrid)[0][i] + transY, (*zGrid)[0][i] + transZ, radius];
3284 if (blob) [povray appendString: @" strength 1"];
3285 [povray appendString: @"\n"];
3286 //[povray appendFormat: @" <%f, %f, %f>, %f strength 1\n", (*xGrid)[0][i] + transX, (*yGrid)[0][i] + transY, (*zGrid)[0][i] + transZ, 0.1];
3287 //[povray appendFormat: @" <%f, %f, %f>, %f strength 1\n", (*xGrid)[i][j], (*yGrid)[i][j], (*zGrid)[i][j], cellRadius/2.0];
3288 [povray appendString: @" texture {\n"];
3289 if ((*etGrid)[0][i])
3290 [povray appendFormat: @" pigment { rgb <%f, 0.5, 0.5> }\n", 1.0*(j+1)/(maxNum+1)];
3291 else
3292 [povray appendFormat: @" pigment { rgb <%f, 0.5, 0.0> }\n", 1.0*(j+1)/(maxNum+1)];
3293 [povray appendString: @" }\n"];
3294 [povray appendString: @" }\n"];
3295 }
3296 }
3297 }
3298
3299 if (!first) {
3300 if (blob) [povray appendString: @"}"];
3301 [povray appendString: @"\n\n"];
3302 }
3303 }
3304
3305 return povray;
3306 }
3307
3308 @end
3309
3310
3311 //
3312 // MPI
3313 //
3314 @implementation SubcellularElementModel (MPI)
3315
3316 - (void)allocateMPISimulationWithEncode: (NSString *)anEncode
3317 {
3318 int dims = [[super spatialModel] dimensions];
3319 printf("ScEM allocateMPISimulationWithEncode\n");
3320
3321 int neighborSize, neighborSectorSize;
3322 if (dims == 2) { neighborSize = S_XY_TOT; neighborSectorSize = sectorsInDomain; }
3323 else { neighborSize = S_XYZ_TOT; neighborSectorSize = sectorsInDomain*sectorsInDomain; }
3324
3325 outgoingSectors = malloc(sizeof(int) * neighborSectorSize * neighborSize);
3326 outgoingElements = malloc(sizeof(float) * 4 * neighborSectorSize * maxElementsPerSector * neighborSize);
3327 incomingSectors = malloc(sizeof(int) * neighborSectorSize * neighborSize);
3328 incomingElements = malloc(sizeof(float) * 4 * neighborSectorSize * maxElementsPerSector * neighborSize);
3329 availableElements = [NSMutableArray new];
3330
3331 int i, direction;
3332 float (*iElements)[neighborSize][4 * neighborSectorSize * maxElementsPerSector] = (void *)incomingElements;
3333 for (direction = S_START; direction < neighborSize; ++direction)
3334 for (i = 0; i < 4 * neighborSectorSize * maxElementsPerSector; ++i)
3335 (*iElements)[direction][i] = -1.0;
3336 }
3337
3338 - (void)initializeMPISimulation
3339 {
3340 int i, j;
3341 int dims = [[super spatialModel] dimensions];
3342
3343 if (dims == 2) {
3344 int (*oSectors)[sectorsInDomain][S_XY_TOT] = outgoingSectors;
3345 int (*iSectors)[sectorsInDomain][S_XY_TOT] = incomingSectors;
3346 int (*sCoordinates)[3][maxSectors] = (void *)sectorCoordinates;
3347
3348 for (i = 0; i < S_XY_TOT; ++i)
3349 for (j = 0; j < sectorsInDomain; ++j) {
3350 (*iSectors)[j][i] = -1;
3351 (*oSectors)[j][i] = -1;
3352 }
3353
3354 // setup MPI neighbor incoming/outgoing sector tables
3355
3356 // incoming sectors are the outside border sectors
3357 // they have coordinates of -1 or sectorsInDomain
3358
3359 // outgoing sectors are the outside border sectors
3360 // they have coordinates of 0 or (sectorsInDomain - 1)
3361
3362 int direction;
3363 for (direction = S_START; direction < S_XY_TOT; ++direction) {
3364 switch (direction) {
3365
3366 case S_LEFT: { // count = sectorsInDomain
3367 int iNum = 0, oNum = 0;
3368 for (i = 0; i < numSectors; ++i) {
3369 if (((*sCoordinates)[0][i] == -1) && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3370 (*iSectors)[iNum][direction] = i;
3371 ++iNum;
3372 }
3373 if (((*sCoordinates)[0][i] == 0) && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3374 (*oSectors)[oNum][direction] = i;
3375 //printf("S_LEFT outgoingSector: %d, %d\n", oNum, i);
3376 ++oNum;
3377 }
3378 }
3379 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3380 break;
3381 }
3382
3383 case S_RIGHT: { // count = sectorsInDomain
3384 int iNum = 0, oNum = 0;
3385 for (i = 0; i < numSectors; ++i) {
3386 if (((*sCoordinates)[0][i] == sectorsInDomain) && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3387 (*iSectors)[iNum][direction] = i;
3388 //printf("S_RIGHT incomingSector: %d, %d\n", iNum, i);
3389 ++iNum;
3390 }
3391 if (((*sCoordinates)[0][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3392 (*oSectors)[oNum][direction] = i;
3393 ++oNum;
3394 }
3395 }
3396 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3397 break;
3398 }
3399
3400 case S_UP: { // count = sectorsInDomain
3401 int iNum = 0, oNum = 0;
3402 for (i = 0; i < numSectors; ++i) {
3403 if (((*sCoordinates)[1][i] == sectorsInDomain) && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)) {
3404 (*iSectors)[iNum][direction] = i;
3405 //printf("S_UP incomingSector: %d, %d\n", iNum, i);
3406 ++iNum;
3407 }
3408 if (((*sCoordinates)[1][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)) {
3409 (*oSectors)[oNum][direction] = i;
3410 ++oNum;
3411 }
3412 }
3413 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3414 break;
3415 }
3416
3417 case S_DOWN: { // count = sectorsInDomain
3418 int iNum = 0, oNum = 0;
3419 for (i = 0; i < numSectors; ++i) {
3420 if (((*sCoordinates)[1][i] == -1) && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)) {
3421 (*iSectors)[iNum][direction] = i;
3422 ++iNum;
3423 }
3424 if (((*sCoordinates)[1][i] == 0) && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)) {
3425 (*oSectors)[oNum][direction] = i;
3426 //printf("S_DOWN outgoingSector: %d, %d\n", oNum, i);
3427 ++oNum;
3428 }
3429 }
3430 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3431 break;
3432 }
3433
3434 case S_LEFT_UP: { // count = 1
3435 int iNum = 0, oNum = 0;
3436 for (i = 0; i < numSectors; ++i) {
3437 if (((*sCoordinates)[0][i] == -1) && ((*sCoordinates)[1][i] == sectorsInDomain)) {
3438 (*iSectors)[iNum][direction] = i;
3439 ++iNum;
3440 }
3441 if (((*sCoordinates)[0][i] == 0) && ((*sCoordinates)[1][i] == (sectorsInDomain - 1))) {
3442 (*oSectors)[oNum][direction] = i;
3443 ++oNum;
3444 }
3445 }
3446 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3447 break;
3448 }
3449
3450 case S_RIGHT_UP: { // count = 1
3451 int iNum = 0, oNum = 0;
3452 for (i = 0; i < numSectors; ++i) {
3453 if (((*sCoordinates)[0][i] == sectorsInDomain) && ((*sCoordinates)[1][i] == sectorsInDomain)) {
3454 (*iSectors)[iNum][direction] = i;
3455 ++iNum;
3456 }
3457 if (((*sCoordinates)[0][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[1][i] == (sectorsInDomain - 1))) {
3458 (*oSectors)[oNum][direction] = i;
3459 ++oNum;
3460 }
3461 }
3462 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3463 break;
3464 }
3465
3466 case S_LEFT_DOWN: { // count = 1
3467 int iNum = 0, oNum = 0;
3468 for (i = 0; i < numSectors; ++i) {
3469 if (((*sCoordinates)[0][i] == -1) && ((*sCoordinates)[1][i] == -1)) {
3470 (*iSectors)[iNum][direction] = i;
3471 ++iNum;
3472 }
3473 if (((*sCoordinates)[0][i] == 0) && ((*sCoordinates)[1][i] == 0)) {
3474 (*oSectors)[oNum][direction] = i;
3475 ++oNum;
3476 }
3477 }
3478 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3479 break;
3480 }
3481
3482 case S_RIGHT_DOWN: { // count = 1
3483 int iNum = 0, oNum = 0;
3484 for (i = 0; i < numSectors; ++i) {
3485 if (((*sCoordinates)[0][i] == sectorsInDomain) && ((*sCoordinates)[1][i] == -1)) {
3486 (*iSectors)[iNum][direction] = i;
3487 ++iNum;
3488 }
3489 if (((*sCoordinates)[0][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[1][i] == 0)) {
3490 (*oSectors)[oNum][direction] = i;
3491 ++oNum;
3492 }
3493 }
3494 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3495 break;
3496 }
3497 }
3498 }
3499 } else {
3500 // 3D
3501
3502 int (*oSectors)[sectorsInDomain*sectorsInDomain][S_XYZ_TOT] = outgoingSectors;
3503 int (*iSectors)[sectorsInDomain*sectorsInDomain][S_XYZ_TOT] = incomingSectors;
3504 int (*sCoordinates)[3][maxSectors] = (void *)sectorCoordinates;
3505
3506 for (i = 0; i < S_XYZ_TOT; ++i)
3507 for (j = 0; j < sectorsInDomain*sectorsInDomain; ++j) {
3508 (*iSectors)[j][i] = -1;
3509 (*oSectors)[j][i] = -1;
3510 }
3511
3512 // setup MPI neighbor incoming/outgoing sector tables
3513
3514 // incoming sectors are the outside border sectors
3515 // they have coordinates of -1 or sectorsInDomain
3516
3517 // outgoing sectors are the outside border sectors
3518 // they have coordinates of 0 or (sectorsInDomain - 1)
3519
3520 int direction;
3521 for (direction = S_START; direction < S_XYZ_TOT; ++direction) {
3522 switch (direction) {
3523
3524 case S_LEFT: { // count = sectorsInDomain*sectorsInDomain
3525 int iNum = 0, oNum = 0;
3526 for (i = 0; i < numSectors; ++i) {
3527 if (((*sCoordinates)[0][i] == -1)
3528 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)
3529 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3530 (*iSectors)[iNum][direction] = i;
3531 ++iNum;
3532 }
3533 if (((*sCoordinates)[0][i] == 0)
3534 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)
3535 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3536 (*oSectors)[oNum][direction] = i;
3537 //printf("S_LEFT outgoingSector: %d, %d\n", oNum, i);
3538 ++oNum;
3539 }
3540 }
3541 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3542 break;
3543 }
3544
3545 case S_RIGHT: { // count = sectorsInDomain*sectorsInDomain
3546 int iNum = 0, oNum = 0;
3547 for (i = 0; i < numSectors; ++i) {
3548 if (((*sCoordinates)[0][i] == sectorsInDomain)
3549 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)
3550 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3551 (*iSectors)[iNum][direction] = i;
3552 //printf("S_RIGHT incomingSector: %d, %d\n", iNum, i);
3553 ++iNum;
3554 }
3555 if (((*sCoordinates)[0][i] == (sectorsInDomain - 1))
3556 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)
3557 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3558 (*oSectors)[oNum][direction] = i;
3559 ++oNum;
3560 }
3561 }
3562 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3563 break;
3564 }
3565
3566 case S_UP: { // count = sectorsInDomain*sectorsInDomain
3567 int iNum = 0, oNum = 0;
3568 for (i = 0; i < numSectors; ++i) {
3569 if (((*sCoordinates)[1][i] == sectorsInDomain)
3570 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)
3571 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3572 (*iSectors)[iNum][direction] = i;
3573 ++iNum;
3574 }
3575 if (((*sCoordinates)[1][i] == (sectorsInDomain - 1))
3576 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)
3577 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3578 (*oSectors)[oNum][direction] = i;
3579 //printf("S_UP outgoingSector: %d, %d\n", oNum, i);
3580 ++oNum;
3581 }
3582 }
3583 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3584 break;
3585 }
3586
3587 case S_DOWN: { // count = sectorsInDomain*sectorsInDomain
3588 int iNum = 0, oNum = 0;
3589 for (i = 0; i < numSectors; ++i) {
3590 if (((*sCoordinates)[1][i] == -1)
3591 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)
3592 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3593 (*iSectors)[iNum][direction] = i;
3594 ++iNum;
3595 }
3596 if (((*sCoordinates)[1][i] == 0)
3597 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)
3598 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3599 (*oSectors)[oNum][direction] = i;
3600 //printf("S_DOWN outgoingSector: %d, %d\n", oNum, i);
3601 ++oNum;
3602 }
3603 }
3604 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3605 break;
3606 }
3607
3608 case S_LEFT_UP: { // count = sectorsInDomain
3609 int iNum = 0, oNum = 0;
3610 for (i = 0; i < numSectors; ++i) {
3611 if (((*sCoordinates)[0][i] == -1) && ((*sCoordinates)[1][i] == sectorsInDomain)
3612 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3613 (*iSectors)[iNum][direction] = i;
3614 ++iNum;
3615 }
3616 if (((*sCoordinates)[0][i] == 0) && ((*sCoordinates)[1][i] == (sectorsInDomain - 1))
3617 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3618 (*oSectors)[oNum][direction] = i;
3619 ++oNum;
3620 }
3621 }
3622 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3623 break;
3624 }
3625
3626 case S_RIGHT_UP: { // count = sectorsInDomain
3627 int iNum = 0, oNum = 0;
3628 for (i = 0; i < numSectors; ++i) {
3629 if (((*sCoordinates)[0][i] == sectorsInDomain) && ((*sCoordinates)[1][i] == sectorsInDomain)
3630 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3631 (*iSectors)[iNum][direction] = i;
3632 ++iNum;
3633 }
3634 if (((*sCoordinates)[0][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[1][i] == (sectorsInDomain - 1))
3635 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3636 (*oSectors)[oNum][direction] = i;
3637 ++oNum;
3638 }
3639 }
3640 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3641 break;
3642 }
3643
3644 case S_LEFT_DOWN: { // count = sectorsInDomain
3645 int iNum = 0, oNum = 0;
3646 for (i = 0; i < numSectors; ++i) {
3647 if (((*sCoordinates)[0][i] == -1) && ((*sCoordinates)[1][i] == -1)
3648 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3649 (*iSectors)[iNum][direction] = i;
3650 ++iNum;
3651 }
3652 if (((*sCoordinates)[0][i] == 0) && ((*sCoordinates)[1][i] == 0)
3653 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3654 (*oSectors)[oNum][direction] = i;
3655 ++oNum;
3656 }
3657 }
3658 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3659 break;
3660 }
3661
3662 case S_RIGHT_DOWN: { // count = sectorsInDomain
3663 int iNum = 0, oNum = 0;
3664 for (i = 0; i < numSectors; ++i) {
3665 if (((*sCoordinates)[0][i] == sectorsInDomain) && ((*sCoordinates)[1][i] == -1)
3666 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3667 (*iSectors)[iNum][direction] = i;
3668 ++iNum;
3669 }
3670 if (((*sCoordinates)[0][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[1][i] == 0)
3671 && ((*sCoordinates)[2][i] >= 0) && ((*sCoordinates)[2][i] < sectorsInDomain)) {
3672 (*oSectors)[oNum][direction] = i;
3673 ++oNum;
3674 }
3675 }
3676 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3677 break;
3678 }
3679
3680 case S_FRONT: { // count = sectorsInDomain*sectorsInDomain
3681 int iNum = 0, oNum = 0;
3682 for (i = 0; i < numSectors; ++i) {
3683 if (((*sCoordinates)[2][i] == -1)
3684 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)
3685 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3686 (*iSectors)[iNum][direction] = i;
3687 ++iNum;
3688 }
3689 if (((*sCoordinates)[2][i] == 0)
3690 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)
3691 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3692 (*oSectors)[oNum][direction] = i;
3693 //printf("S_FRONT outgoingSector: %d, %d\n", oNum, i);
3694 ++oNum;
3695 }
3696 }
3697 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3698 break;
3699 }
3700
3701 case S_BACK: { // count = sectorsInDomain*sectorsInDomain
3702 int iNum = 0, oNum = 0;
3703 for (i = 0; i < numSectors; ++i) {
3704 if (((*sCoordinates)[2][i] == sectorsInDomain)
3705 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)
3706 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3707 (*iSectors)[iNum][direction] = i;
3708 ++iNum;
3709 }
3710 if (((*sCoordinates)[2][i] == (sectorsInDomain - 1))
3711 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)
3712 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3713 (*oSectors)[oNum][direction] = i;
3714 //printf("S_BACK outgoingSector: %d, %d\n", oNum, i);
3715 ++oNum;
3716 }
3717 }
3718 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3719 break;
3720 }
3721
3722 case S_LEFT_FRONT: { // count = sectorsInDomain
3723 int iNum = 0, oNum = 0;
3724 for (i = 0; i < numSectors; ++i) {
3725 if (((*sCoordinates)[0][i] == -1) && ((*sCoordinates)[2][i] == -1)
3726 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3727 (*iSectors)[iNum][direction] = i;
3728 ++iNum;
3729 }
3730 if (((*sCoordinates)[0][i] == 0) && ((*sCoordinates)[2][i] == 0)
3731 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3732 (*oSectors)[oNum][direction] = i;
3733 ++oNum;
3734 }
3735 }
3736 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3737 break;
3738 }
3739
3740 case S_RIGHT_FRONT: { // count = sectorsInDomain
3741 int iNum = 0, oNum = 0;
3742 for (i = 0; i < numSectors; ++i) {
3743 if (((*sCoordinates)[0][i] == sectorsInDomain) && ((*sCoordinates)[2][i] == -1)
3744 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3745 (*iSectors)[iNum][direction] = i;
3746 ++iNum;
3747 }
3748 if (((*sCoordinates)[0][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[2][i] == 0)
3749 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3750 (*oSectors)[oNum][direction] = i;
3751 ++oNum;
3752 }
3753 }
3754 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3755 break;
3756 }
3757
3758 case S_LEFT_BACK: { // count = sectorsInDomain
3759 int iNum = 0, oNum = 0;
3760 for (i = 0; i < numSectors; ++i) {
3761 if (((*sCoordinates)[0][i] == -1) && ((*sCoordinates)[2][i] == sectorsInDomain)
3762 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3763 (*iSectors)[iNum][direction] = i;
3764 ++iNum;
3765 }
3766 if (((*sCoordinates)[0][i] == 0) && ((*sCoordinates)[2][i] == (sectorsInDomain - 1))
3767 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3768 (*oSectors)[oNum][direction] = i;
3769 ++oNum;
3770 }
3771 }
3772 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3773 break;
3774 }
3775
3776 case S_RIGHT_BACK: { // count = sectorsInDomain
3777 int iNum = 0, oNum = 0;
3778 for (i = 0; i < numSectors; ++i) {
3779 if (((*sCoordinates)[0][i] == sectorsInDomain) && ((*sCoordinates)[2][i] == sectorsInDomain)
3780 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3781 (*iSectors)[iNum][direction] = i;
3782 ++iNum;
3783 }
3784 if (((*sCoordinates)[0][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[2][i] == (sectorsInDomain - 1))
3785 && ((*sCoordinates)[1][i] >= 0) && ((*sCoordinates)[1][i] < sectorsInDomain)) {
3786 (*oSectors)[oNum][direction] = i;
3787 ++oNum;
3788 }
3789 }
3790 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3791 break;
3792 }
3793
3794 case S_UP_FRONT: { // count = sectorsInDomain
3795 int iNum = 0, oNum = 0;
3796 for (i = 0; i < numSectors; ++i) {
3797 if (((*sCoordinates)[1][i] == sectorsInDomain) && ((*sCoordinates)[2][i] == -1)
3798 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)) {
3799 (*iSectors)[iNum][direction] = i;
3800 ++iNum;
3801 }
3802 if (((*sCoordinates)[1][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[2][i] == 0)
3803 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)) {
3804 (*oSectors)[oNum][direction] = i;
3805 ++oNum;
3806 }
3807 }
3808 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3809 break;
3810 }
3811
3812 case S_DOWN_FRONT: { // count = sectorsInDomain
3813 int iNum = 0, oNum = 0;
3814 for (i = 0; i < numSectors; ++i) {
3815 if (((*sCoordinates)[1][i] == -1) && ((*sCoordinates)[2][i] == -1)
3816 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)) {
3817 (*iSectors)[iNum][direction] = i;
3818 ++iNum;
3819 }
3820 if (((*sCoordinates)[1][i] == 0) && ((*sCoordinates)[2][i] == 0)
3821 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)) {
3822 (*oSectors)[oNum][direction] = i;
3823 ++oNum;
3824 }
3825 }
3826 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3827 break;
3828 }
3829
3830 case S_UP_BACK: { // count = sectorsInDomain
3831 int iNum = 0, oNum = 0;
3832 for (i = 0; i < numSectors; ++i) {
3833 if (((*sCoordinates)[1][i] == sectorsInDomain) && ((*sCoordinates)[2][i] == sectorsInDomain)
3834 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)) {
3835 (*iSectors)[iNum][direction] = i;
3836 ++iNum;
3837 }
3838 if (((*sCoordinates)[1][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[2][i] == (sectorsInDomain - 1))
3839 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)) {
3840 (*oSectors)[oNum][direction] = i;
3841 ++oNum;
3842 }
3843 }
3844 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3845 break;
3846 }
3847
3848 case S_DOWN_BACK: { // count = sectorsInDomain
3849 int iNum = 0, oNum = 0;
3850 for (i = 0; i < numSectors; ++i) {
3851 if (((*sCoordinates)[1][i] == -1) && ((*sCoordinates)[2][i] == sectorsInDomain)
3852 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)) {
3853 (*iSectors)[iNum][direction] = i;
3854 ++iNum;
3855 }
3856 if (((*sCoordinates)[1][i] == 0) && ((*sCoordinates)[2][i] == (sectorsInDomain - 1))
3857 && ((*sCoordinates)[0][i] >= 0) && ((*sCoordinates)[0][i] < sectorsInDomain)) {
3858 (*oSectors)[oNum][direction] = i;
3859 ++oNum;
3860 }
3861 }
3862 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3863 break;
3864 }
3865
3866 case S_LEFT_UP_FRONT: { // count = 1
3867 int iNum = 0, oNum = 0;
3868 for (i = 0; i < numSectors; ++i) {
3869 if (((*sCoordinates)[0][i] == -1) && ((*sCoordinates)[1][i] == sectorsInDomain) && ((*sCoordinates)[2][i] == -1)) {
3870 (*iSectors)[iNum][direction] = i;
3871 ++iNum;
3872 }
3873 if (((*sCoordinates)[0][i] == 0) && ((*sCoordinates)[1][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[2][i] == 0)) {
3874 (*oSectors)[oNum][direction] = i;
3875 ++oNum;
3876 }
3877 }
3878 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3879 break;
3880 }
3881
3882 case S_RIGHT_UP_FRONT: { // count = 1
3883 int iNum = 0, oNum = 0;
3884 for (i = 0; i < numSectors; ++i) {
3885 if (((*sCoordinates)[0][i] == sectorsInDomain) && ((*sCoordinates)[1][i] == sectorsInDomain) && ((*sCoordinates)[2][i] == -1)) {
3886 (*iSectors)[iNum][direction] = i;
3887 ++iNum;
3888 }
3889 if (((*sCoordinates)[0][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[1][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[2][i] == 0)) {
3890 (*oSectors)[oNum][direction] = i;
3891 ++oNum;
3892 }
3893 }
3894 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3895 break;
3896 }
3897
3898 case S_LEFT_DOWN_FRONT: { // count = 1
3899 int iNum = 0, oNum = 0;
3900 for (i = 0; i < numSectors; ++i) {
3901 if (((*sCoordinates)[0][i] == -1) && ((*sCoordinates)[1][i] == -1) && ((*sCoordinates)[2][i] == -1)) {
3902 (*iSectors)[iNum][direction] = i;
3903 ++iNum;
3904 }
3905 if (((*sCoordinates)[0][i] == 0) && ((*sCoordinates)[1][i] == 0) && ((*sCoordinates)[2][i] == 0)) {
3906 (*oSectors)[oNum][direction] = i;
3907 ++oNum;
3908 }
3909 }
3910 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3911 break;
3912 }
3913
3914 case S_RIGHT_DOWN_FRONT: { // count = 1
3915 int iNum = 0, oNum = 0;
3916 for (i = 0; i < numSectors; ++i) {
3917 if (((*sCoordinates)[0][i] == sectorsInDomain) && ((*sCoordinates)[1][i] == -1) && ((*sCoordinates)[2][i] == -1)) {
3918 (*iSectors)[iNum][direction] = i;
3919 ++iNum;
3920 }
3921 if (((*sCoordinates)[0][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[1][i] == 0) && ((*sCoordinates)[2][i] == 0)) {
3922 (*oSectors)[oNum][direction] = i;
3923 ++oNum;
3924 }
3925 }
3926 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3927 break;
3928 }
3929
3930 case S_LEFT_UP_BACK: { // count = 1
3931 int iNum = 0, oNum = 0;
3932 for (i = 0; i < numSectors; ++i) {
3933 if (((*sCoordinates)[0][i] == -1) && ((*sCoordinates)[1][i] == sectorsInDomain) && ((*sCoordinates)[2][i] == sectorsInDomain)) {
3934 (*iSectors)[iNum][direction] = i;
3935 ++iNum;
3936 }
3937 if (((*sCoordinates)[0][i] == 0) && ((*sCoordinates)[1][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[2][i] == (sectorsInDomain - 1))) {
3938 (*oSectors)[oNum][direction] = i;
3939 ++oNum;
3940 }
3941 }
3942 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3943 break;
3944 }
3945
3946 case S_RIGHT_UP_BACK: { // count = 1
3947 int iNum = 0, oNum = 0;
3948 for (i = 0; i < numSectors; ++i) {
3949 if (((*sCoordinates)[0][i] == sectorsInDomain) && ((*sCoordinates)[1][i] == sectorsInDomain) && ((*sCoordinates)[2][i] == sectorsInDomain)) {
3950 (*iSectors)[iNum][direction] = i;
3951 ++iNum;
3952 }
3953 if (((*sCoordinates)[0][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[1][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[2][i] == (sectorsInDomain - 1))) {
3954 (*oSectors)[oNum][direction] = i;
3955 ++oNum;
3956 }
3957 }
3958 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3959 break;
3960 }
3961
3962 case S_LEFT_DOWN_BACK: { // count = 1
3963 int iNum = 0, oNum = 0;
3964 for (i = 0; i < numSectors; ++i) {
3965 if (((*sCoordinates)[0][i] == -1) && ((*sCoordinates)[1][i] == -1) && ((*sCoordinates)[2][i] == sectorsInDomain)) {
3966 (*iSectors)[iNum][direction] = i;
3967 ++iNum;
3968 }
3969 if (((*sCoordinates)[0][i] == 0) && ((*sCoordinates)[1][i] == 0) && ((*sCoordinates)[2][i] == (sectorsInDomain - 1))) {
3970 (*oSectors)[oNum][direction] = i;
3971 ++oNum;
3972 }
3973 }
3974 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3975 break;
3976 }
3977
3978 case S_RIGHT_DOWN_BACK: { // count = 1
3979 int iNum = 0, oNum = 0;
3980 for (i = 0; i < numSectors; ++i) {
3981 if (((*sCoordinates)[0][i] == sectorsInDomain) && ((*sCoordinates)[1][i] == -1) && ((*sCoordinates)[2][i] == sectorsInDomain)) {
3982 (*iSectors)[iNum][direction] = i;
3983 ++iNum;
3984 }
3985 if (((*sCoordinates)[0][i] == (sectorsInDomain - 1)) && ((*sCoordinates)[1][i] == 0) && ((*sCoordinates)[2][i] == (sectorsInDomain - 1))) {
3986 (*oSectors)[oNum][direction] = i;
3987 ++oNum;
3988 }
3989 }
3990 if ([self isDebug]) printf("neighbor %s, incoming = %d, outgoing = %d\n", bc_sector_direction_string(direction), iNum, oNum);
3991 break;
3992 }
3993
3994 }
3995 }
3996 }
3997
3998 // perform initial MPI synchronization
3999 [self initiateGPUMPIData: NULL mode: BC_MPI_SEM_MODE_INIT];
4000 }
4001
4002 - (void)initializeMPIFileCompleted
4003 {
4004 printf("ScEM initializeMPIFileCompleted\n");
4005
4006 // MPI initialization
4007 [self initializeMPISimulation];
4008
4009 // place elements in sectors
4010 if (TIME_PROFILE) bc_record_time([[NSString stringWithFormat: @"start -assignElementsToSectors, rank = %d", [modelController rank]] UTF8String]);
4011 [self assignElementsToSectors];
4012 if (TIME_PROFILE) bc_record_time([[NSString stringWithFormat: @"end -assignElementsToSectors, rank = %d", [modelController rank]] UTF8String]);
4013
4014 // neighbor tables are calculated from element and sector coordinates
4015 if (TIME_PROFILE) bc_record_time([[NSString stringWithFormat: @"start -initializeNeighborTables, rank = %d", [modelController rank]] UTF8String]);
4016 [self initializeNeighborTables];
4017 if (TIME_PROFILE) bc_record_time([[NSString stringWithFormat: @"end -initializeNeighborTables, rank = %d", [modelController rank]] UTF8String]);
4018 }
4019
4020 @end
4021
4022
4023 //
4024 // GPU
4025 //
4026
4027 @implementation SubcellularElementModel (GPUMPI)
4028
4029 - (NSMutableString *)definitionsGPUMPICode: (NSString *)prefixName
4030 {
4031 int dims = [[super spatialModel] dimensions];
4032 printf("definitionsGPUCode: %d\n", dims);
4033
4034 NSMutableString *GPUCode = [NSMutableString new];
4035
4036 [GPUCode appendFormat: @"__constant__ float %@_sem_parameters[%d];\n\n", prefixName, [self numOfParameters]];
4037
4038 [GPUCode appendFormat: @"\n__constant__ BC_SEM_GPUgrids %@_sem_grids[1];\n", prefixName];
4039 [GPUCode appendFormat: @"\n__constant__ BC_MPI_SEM_GPUdata %@_mpi_sem_data[1];\n", prefixName];
4040 [GPUCode appendString: @"\n"];
4041
4042 return GPUCode;
4043 }
4044
4045 - (NSDictionary *)controllerGPUMPICode: (NSString *)prefixName
4046 {
4047 int dims = [[super spatialModel] dimensions];
4048
4049 NSMutableDictionary *GPUDictionary = [NSMutableDictionary new];
4050 NSMutableString *GPUCode;
4051
4052 //
4053 // Headers and defines
4054 //
4055
4056 GPUCode = [NSMutableString new];
4057 [GPUCode appendString: @"#include <stdio.h>\n"];
4058 [GPUCode appendString: @"#include <unistd.h>\n\n"];
4059 [GPUCode appendString: @"extern \"C\" {\n"];
4060 [GPUCode appendString: @"#include <BioSwarm/GPUDefines.h>\n"];
4061 //[GPUCode appendString: @"#include <BioSwarm/GPU_sem.h>\n"];
4062 //[GPUCode appendString: @"#include \"GPU_sem.h\"\n"];
4063 [GPUDictionary setObject: GPUCode forKey: @"header"];
4064
4065 GPUCode = [NSMutableString new];
4066 [GPUCode appendString: @"#include <BioSwarm/GPU_sem.h>\n"];
4067 [GPUCode appendString: @"#include <BioSwarm/GPU_MPI.h>\n"];
4068 [GPUCode appendFormat: @"#include \"%@_defines.cu\"\n\n", prefixName];
4069 [GPUCode appendFormat: @"#include \"%@_kernel.cu\"\n\n", prefixName];
4070 [GPUDictionary setObject: GPUCode forKey: @"defines"];
4071
4072 //
4073 // Allocation: generic
4074 //
4075
4076 //
4077 // Data Transfer
4078 //
4079 GPUCode = [NSMutableString new];
4080 [GPUCode appendFormat: @"%@_transferGPUKernel(void *model, void *g, int aFlag, float *hostParameters, int *cellNumber, int numOfCells, int totalElements, void *hostX, void *hostY, void *hostZ, void *hostType, void *hostSector, void *hostElementNeighborTable, void *EFT, void *EFTT, void *ETT, void *EFFT, void *hostData, void *meshElements, void *hostSectorCoordinates, void *hostNumSectors, void *hostSectorSize, void *hostSectorElementTable, void *hostSectorNeighborTable, void *hostReadOnlySector)", prefixName];
4081 [GPUDictionary setObject: GPUCode forKey: @"transfer function"];
4082
4083 GPUCode = [NSMutableString new];
4084 [GPUCode appendString: @"\n{\n"];
4085 [GPUCode appendString: @"\n BC_SEM_transferGPUKernel(model, g, aFlag, hostParameters, cellNumber, numOfCells, totalElements, hostX, hostY, hostZ, hostType, hostSector, hostElementNeighborTable, EFT, EFTT, ETT, EFFT, hostData, meshElements, hostSectorCoordinates, hostNumSectors, hostSectorSize, hostSectorElementTable, hostSectorNeighborTable, hostReadOnlySector);\n"];
4086 [GPUCode appendString: @"\n if (aFlag) {\n"];
4087 [GPUCode appendString: @" // Copy data structure\n"];
4088 [GPUCode appendFormat: @" cudaMemcpyToSymbol(%@_sem_grids, g, sizeof(BC_SEM_GPUgrids), 0, cudaMemcpyHostToDevice);\n", prefixName];
4089
4090 [GPUCode appendString: @" }\n"];
4091 [GPUCode appendString: @"}\n"];
4092
4093 [GPUCode appendFormat: @"\nvoid %@_MPI_transferGPUKernel(void *model, void *g, int aFlag, void *sem, void *incomingElements, void *outgoingElements, int totalElements)", prefixName];
4094 [GPUCode appendString: @"\n{\n"];
4095 [GPUCode appendFormat: @" BC_MPI_SEM_GPUdata *mpi_data = (BC_MPI_SEM_GPUdata *)g;\n"];
4096 [GPUCode appendFormat: @" BC_SEM_GPUgrids *sem_data = (BC_SEM_GPUgrids *)sem;\n"];
4097 [GPUCode appendString: @"\n BC_MPI_SEM_transferGPUKernel(model, g, aFlag, sem, incomingElements, outgoingElements, totalElements);\n"];
4098 [GPUCode appendString: @"\n if (sem_data->totalElements != totalElements) {\n"];
4099 [GPUCode appendString: @" // Copy data structure\n"];
4100 [GPUCode appendFormat: @" sem_data->totalElements = totalElements;\n"];
4101 [GPUCode appendFormat: @" cudaMemcpyToSymbol(%@_sem_grids, sem, sizeof(BC_SEM_GPUgrids), 0, cudaMemcpyHostToDevice);\n", prefixName];
4102
4103 [GPUCode appendString: @" }\n"];
4104 [GPUCode appendString: @"}\n"];
4105
4106 [GPUCode appendFormat: @"\nvoid *%@_MPI_allocGPUKernel(void *model, void *g, int boundarySize, int sectorsInDomain, void *incomingSectors, void *outgoingSectors)", prefixName];
4107 [GPUCode appendString: @"\n{\n"];
4108 [GPUCode appendString: @" void *mpi_data = BC_MPI_SEM_allocGPUKernel(model, g, boundarySize, sectorsInDomain, incomingSectors, outgoingSectors);\n"];
4109 [GPUCode appendString: @" // Copy data structure\n"];
4110 [GPUCode appendFormat: @" cudaMemcpyToSymbol(%@_mpi_sem_data, mpi_data, sizeof(BC_MPI_SEM_GPUdata), 0, cudaMemcpyHostToDevice);\n", prefixName];
4111 [GPUCode appendString: @" cudacheck(\"transfer MPI data structure\");\n"];
4112 [GPUCode appendString: @" return mpi_data;\n"];
4113 [GPUCode appendString: @"}\n"];
4114
4115 [GPUDictionary setObject: GPUCode forKey: @"transfer code"];
4116
4117 //
4118 // Execution
4119 //
4120
4121 GPUCode = [NSMutableString new];
4122 [GPUCode appendFormat: @"%@_invokeGPUKernel(void *model, void *%@_data, double startTime, double nextTime)", prefixName, prefixName];
4123 [GPUDictionary setObject: GPUCode forKey: @"execution function"];
4124
4125 GPUCode = [NSMutableString new];
4126 [GPUCode appendFormat: @" BC_SEM_GPUgrids *%@_ptrs = (BC_SEM_GPUgrids *)%@_data;\n", prefixName, prefixName];
4127 [GPUCode appendFormat: @" BC_MPI_SEM_GPUdata *%@_mpi_ptrs = (BC_MPI_SEM_GPUdata *)%@_ptrs->mpiData;\n", prefixName, prefixName];
4128 [GPUDictionary setObject: GPUCode forKey: @"structure"];
4129
4130 GPUCode = [NSMutableString new];
4131 [GPUCode appendFormat: @" int %@_x = 256;\n", prefixName];
4132 [GPUCode appendFormat: @" int %@_y = 1;\n", prefixName];
4133 [GPUCode appendFormat: @" if (%@_x > %@_ptrs->totalElements) %@_x = %@_ptrs->totalElements;\n", prefixName, prefixName, prefixName, prefixName];
4134 [GPUCode appendFormat: @" if (%@_y > %@_ptrs->numOfModels) %@_y = %@_ptrs->numOfModels;\n", prefixName, prefixName, prefixName, prefixName];
4135 [GPUCode appendFormat: @" dim3 %@_threadsPerBlock(%@_x, %@_y);\n", prefixName, prefixName, prefixName];
4136 [GPUCode appendFormat: @" dim3 %@_threadsPerGrid((%@_ptrs->totalElements + %@_threadsPerBlock.x - 1) / %@_threadsPerBlock.x, (%@_ptrs->numOfModels + %@_threadsPerBlock.y - 1) / %@_threadsPerBlock.y);\n", prefixName, prefixName, prefixName, prefixName, prefixName, prefixName, prefixName];
4137 [GPUCode appendString: @"\n"];
4138
4139 [GPUCode appendFormat: @" int %@_xx = 256;\n", prefixName];
4140 [GPUCode appendFormat: @" if (%@_xx > %@_ptrs->numOfCells) %@_xx = %@_ptrs->numOfCells;\n", prefixName, prefixName, prefixName, prefixName];
4141 [GPUCode appendFormat: @" dim3 %@_cell_threadsPerBlock(%@_xx, %@_y);\n", prefixName, prefixName, prefixName];
4142 [GPUCode appendFormat: @" dim3 %@_cell_blocksPerGrid((%@_ptrs->numOfCells + %@_cell_threadsPerBlock.x - 1) / %@_cell_threadsPerBlock.x, (%@_ptrs->numOfModels + %@_cell_threadsPerBlock.y - 1) / %@_cell_threadsPerBlock.y);\n", prefixName, prefixName, prefixName, prefixName, prefixName, prefixName, prefixName];
4143 [GPUCode appendString: @"\n"];
4144
4145 [GPUCode appendFormat: @" int %@_nx = 256;\n", prefixName];
4146 [GPUCode appendFormat: @" int %@_ny = 1;\n", prefixName];
4147 [GPUCode appendFormat: @" int %@_maxy = %@_ptrs->numOfModels * %@_ptrs->sectorNeighborSize * %@_ptrs->maxElementsPerSector;\n", prefixName, prefixName, prefixName, prefixName];
4148 [GPUCode appendFormat: @" if (%@_nx > %@_ptrs->totalElements) %@_nx = %@_ptrs->totalElements;\n", prefixName, prefixName, prefixName, prefixName];
4149 [GPUCode appendFormat: @" if (%@_ny > %@_maxy) %@_ny = %@_maxy;\n", prefixName, prefixName, prefixName, prefixName];
4150 [GPUCode appendFormat: @" dim3 %@n_threadsPerBlock(%@_nx, %@_ny);\n", prefixName, prefixName, prefixName];
4151 [GPUCode appendFormat: @" dim3 %@n_threadsPerGrid((%@_ptrs->totalElements + %@n_threadsPerBlock.x - 1) / %@n_threadsPerBlock.x, (%@_maxy + %@n_threadsPerBlock.y - 1) / %@n_threadsPerBlock.y);\n", prefixName, prefixName, prefixName, prefixName, prefixName, prefixName, prefixName];
4152 [GPUCode appendString: @"\n"];
4153
4154 [GPUCode appendFormat: @" int %@_sx = 256;\n", prefixName];
4155 [GPUCode appendFormat: @" int %@_sy = 1;\n", prefixName];
4156 [GPUCode appendFormat: @" if (%@_sx > %@_ptrs->maxSectors) %@_sx = %@_ptrs->maxSectors;\n", prefixName, prefixName, prefixName, prefixName];
4157 [GPUCode appendFormat: @" if (%@_sy > %@_ptrs->numOfModels) %@_sy = %@_ptrs->numOfModels;\n", prefixName, prefixName, prefixName, prefixName];
4158 [GPUCode appendFormat: @" dim3 %@s_threadsPerBlock(%@_sx, %@_sy);\n", prefixName, prefixName, prefixName];
4159 [GPUCode appendFormat: @" dim3 %@s_threadsPerGrid((%@_ptrs->maxSectors + %@s_threadsPerBlock.x - 1) / %@s_threadsPerBlock.x, (%@_ptrs->numOfModels + %@s_threadsPerBlock.y - 1) / %@s_threadsPerBlock.y);\n", prefixName, prefixName, prefixName, prefixName, prefixName, prefixName, prefixName];
4160 [GPUCode appendString: @"\n"];
4161
4162 [GPUCode appendFormat: @" int %@_mx = 256;\n", prefixName];
4163 [GPUCode appendFormat: @" int %@_my = 1;\n", prefixName];
4164 [GPUCode appendFormat: @" if (%@_mx > %@_ptrs->numOfModels) %@_mx = %@_ptrs->numOfModels;\n", prefixName, prefixName, prefixName, prefixName];
4165 [GPUCode appendFormat: @" dim3 %@m_threadsPerBlock(%@_mx, %@_my);\n", prefixName, prefixName, prefixName];
4166 [GPUCode appendFormat: @" dim3 %@m_threadsPerGrid((%@_ptrs->numOfModels + %@m_threadsPerBlock.x - 1) / %@m_threadsPerBlock.x, 1);\n", prefixName, prefixName, prefixName, prefixName];
4167 [GPUCode appendString: @"\n"];
4168
4169 [GPUCode appendFormat: @" int %@_mpi_x = 256;\n", prefixName];
4170 [GPUCode appendFormat: @" int %@_mpi_maxx = %@_mpi_ptrs->boundarySize * %@_ptrs->maxElementsPerSector;\n", prefixName, prefixName, prefixName];
4171 [GPUCode appendFormat: @" if (%@_mpi_x > %@_mpi_maxx) %@_mpi_x = %@_mpi_maxx;\n", prefixName, prefixName, prefixName, prefixName];
4172 [GPUCode appendFormat: @" dim3 %@_mpi_threadsPerBlock(%@_mpi_x, 1);\n", prefixName, prefixName];
4173 [GPUCode appendFormat: @" dim3 %@_mpi_blocksPerGrid((%@_mpi_maxx + %@_mpi_threadsPerBlock.x - 1) / %@_mpi_threadsPerBlock.x, 1);\n", prefixName, prefixName, prefixName, prefixName];
4174
4175 [GPUCode appendFormat: @" int %@s_mpi_x = 256;\n", prefixName];
4176 [GPUCode appendFormat: @" int %@s_mpi_maxx = %@_mpi_ptrs->boundarySize;\n", prefixName, prefixName];
4177 [GPUCode appendFormat: @" if (%@s_mpi_x > %@s_mpi_maxx) %@s_mpi_x = %@s_mpi_maxx;\n", prefixName, prefixName, prefixName, prefixName];
4178 [GPUCode appendFormat: @" dim3 %@s_mpi_threadsPerBlock(%@s_mpi_x, 1);\n", prefixName, prefixName];
4179 [GPUCode appendFormat: @" dim3 %@s_mpi_blocksPerGrid((%@s_mpi_maxx + %@s_mpi_threadsPerBlock.x - 1) / %@s_mpi_threadsPerBlock.x, 1);\n", prefixName, prefixName, prefixName, prefixName];
4180
4181 [GPUDictionary setObject: GPUCode forKey: @"execution variables"];
4182
4183 GPUCode = [NSMutableString new];
4184 [GPUCode appendFormat: @" if (%@_ptrs->calcCellCenters) %@_SEM_center_kernel<<< %@_cell_blocksPerGrid, %@_cell_threadsPerBlock >>>();\n", prefixName, prefixName, prefixName, prefixName];
4185 if ([self isDebug]) [GPUCode appendString: @" cudaPrintfDisplay(stdout, true);\n"];
4186
4187 [GPUCode appendFormat: @" if (%@_ptrs->calcElementCenters) %@_SEM_element_center_kernel<<< %@_cell_blocksPerGrid, %@_cell_threadsPerBlock >>>();\n", prefixName, prefixName, prefixName, prefixName];
4188 if ([self isDebug]) [GPUCode appendString: @" cudaPrintfDisplay(stdout, true);\n"];
4189
4190 #if 0
4191 [GPUCode appendFormat: @" %@_SEM_kernel1_2rk<<< %@_threadsPerGrid, %@_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4192 if ([self isDebug]) [GPUCode appendString: @" cudaPrintfDisplay(stdout, true);\n"];
4193 [GPUCode appendFormat: @" %@_SEM_kernel2_2rk<<< %@_threadsPerGrid, %@_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4194 if ([self isDebug]) [GPUCode appendString: @" cudaPrintfDisplay(stdout, true);\n"];
4195 #endif
4196 #if 0
4197 [GPUCode appendFormat: @" cudaMemset2D (%@_ptrs->X_tmp, %@_ptrs->pitch, 0, %@_ptrs->maxElements * sizeof(float), %@_ptrs->numOfModels);\n", prefixName, prefixName, prefixName, prefixName];
4198 [GPUCode appendFormat: @" cudaMemset2D (%@_ptrs->Y_tmp, %@_ptrs->pitch, 0, %@_ptrs->maxElements * sizeof(float), %@_ptrs->numOfModels);\n", prefixName, prefixName, prefixName, prefixName];
4199 [GPUCode appendFormat: @" cudaMemset2D (%@_ptrs->Z_tmp, %@_ptrs->pitch, 0, %@_ptrs->maxElements * sizeof(float), %@_ptrs->numOfModels);\n", prefixName, prefixName, prefixName, prefixName];
4200 [GPUCode appendString: @"\n"];
4201
4202 [GPUCode appendFormat: @" %@_SEM_neighbor_kernel1_2rk<<< %@n_threadsPerGrid, %@n_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4203 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_neighbor_kernel1_2rk\");\n\n", prefixName];
4204
4205 [GPUCode appendFormat: @" %@_SEM_neighbor_sum_kernel1_2rk<<< %@_threadsPerGrid, %@_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4206 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_neighbor_sum_kernel1_2rk\");\n\n", prefixName];
4207
4208 [GPUCode appendFormat: @" %@_MPI_SEM_gather<<< %@_mpi_blocksPerGrid, %@_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_F1);\n", prefixName, prefixName, prefixName];
4209 [GPUCode appendFormat: @" BC_MPI_initiate_transfer(model, BC_MPI_SEM_MODE_F1, currentTime, nextTime);\n"];
4210 [GPUCode appendFormat: @" %@_MPI_SEM_scatter<<< %@_mpi_blocksPerGrid, %@_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_F1);\n\n", prefixName, prefixName, prefixName];
4211
4212 [GPUCode appendFormat: @" %@_SEM_neighbor_kernel2_2rk<<< %@n_threadsPerGrid, %@n_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4213 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_neighbor_kernel2_2rk\");\n\n", prefixName];
4214
4215 [GPUCode appendFormat: @" %@_SEM_neighbor_update_kernel_2rk<<< %@_threadsPerGrid, %@_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4216 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_neighbor_update_kernel_2rk\");\n\n", prefixName];
4217
4218 [GPUCode appendFormat: @" %@_SEM_sector_count_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4219 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_sector_count_kernel\");\n\n", prefixName];
4220
4221 [GPUCode appendFormat: @" %@_SEM_create_sectors_kernel<<< %@m_threadsPerGrid, %@m_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4222 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_create_sectors_kernel\");\n\n", prefixName];
4223
4224 [GPUCode appendFormat: @" %@_SEM_build_sector_neighbors_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4225 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_build_sector_neighbors_kernel\");\n\n", prefixName];
4226
4227 [GPUCode appendFormat: @" %@_SEM_move_elements_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4228 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_move_elements_kernel\");\n\n", prefixName];
4229
4230 [GPUCode appendFormat: @" %@_MPI_SEM_gather<<< %@_mpi_blocksPerGrid, %@_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_MOVE_READ_ONLY);\n", prefixName, prefixName, prefixName];
4231 [GPUCode appendFormat: @" BC_MPI_initiate_transfer(model, BC_MPI_SEM_MODE_MOVE_READ_ONLY, currentTime, nextTime);\n"];
4232 [GPUCode appendFormat: @" %@_MPI_SEM_scatter<<< %@_mpi_blocksPerGrid, %@_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_MOVE_READ_ONLY);\n", prefixName, prefixName, prefixName];
4233 [GPUCode appendFormat: @" %@_MPI_SEM_sector_scatter<<< %@s_mpi_blocksPerGrid, %@s_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_MOVE_READ_ONLY);\n\n", prefixName, prefixName, prefixName];
4234
4235 [GPUCode appendFormat: @" %@_MPI_SEM_gather<<< %@_mpi_blocksPerGrid, %@_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_MOVE);\n", prefixName, prefixName, prefixName];
4236 [GPUCode appendFormat: @" BC_MPI_initiate_transfer(model, BC_MPI_SEM_MODE_MOVE, currentTime, nextTime);\n"];
4237 [GPUCode appendFormat: @" %@_MPI_SEM_scatter<<< %@_mpi_blocksPerGrid, %@_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_MOVE);\n", prefixName, prefixName, prefixName];
4238 [GPUCode appendFormat: @" %@_MPI_SEM_sector_scatter<<< %@s_mpi_blocksPerGrid, %@s_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_MOVE);\n\n", prefixName, prefixName, prefixName];
4239
4240 [GPUCode appendFormat: @" %@_SEM_update_element_sector_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4241 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_update_element_sector_kernel\");\n\n", prefixName];
4242
4243 [GPUCode appendFormat: @" %@_SEM_update_element_neighbor_kernel<<< %@n_threadsPerGrid, %@n_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4244 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_update_element_neighbor_kernel\");\n\n", prefixName];
4245 #endif
4246
4247 [GPUCode appendFormat: @" %@_SEM_pairwise_kernel1_2rk<<< %@_threadsPerGrid, %@_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4248 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_pairwise_kernel1_2rk\");\n\n", prefixName];
4249
4250 [GPUCode appendFormat: @" %@_MPI_SEM_gather<<< %@_mpi_blocksPerGrid, %@_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_F1);\n", prefixName, prefixName, prefixName];
4251 [GPUCode appendFormat: @" BC_MPI_initiate_transfer(model, BC_MPI_SEM_MODE_F1, currentTime, nextTime);\n"];
4252 [GPUCode appendFormat: @" %@_MPI_SEM_scatter<<< %@_mpi_blocksPerGrid, %@_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_F1);\n\n", prefixName, prefixName, prefixName];
4253
4254 [GPUCode appendFormat: @" %@_SEM_pairwise_kernel2_2rk<<< %@_threadsPerGrid, %@_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4255 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_pairwise_kernel2_2rk\");\n\n", prefixName];
4256
4257 [GPUCode appendFormat: @" %@_SEM_sector_count_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4258 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_sector_count_kernel\");\n\n", prefixName];
4259
4260 [GPUCode appendFormat: @" %@_SEM_create_sectors_kernel<<< %@m_threadsPerGrid, %@m_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4261 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_create_sectors_kernel\");\n\n", prefixName];
4262
4263 [GPUCode appendFormat: @" %@_SEM_build_sector_neighbors_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4264 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_build_sector_neighbors_kernel\");\n\n", prefixName];
4265
4266 [GPUCode appendFormat: @" %@_SEM_move_elements_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4267 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_move_elements_kernel\");\n\n", prefixName];
4268
4269 [GPUCode appendFormat: @" %@_MPI_SEM_gather<<< %@_mpi_blocksPerGrid, %@_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_MOVE_READ_ONLY);\n", prefixName, prefixName, prefixName];
4270 [GPUCode appendFormat: @" BC_MPI_initiate_transfer(model, BC_MPI_SEM_MODE_MOVE_READ_ONLY, currentTime, nextTime);\n"];
4271 [GPUCode appendFormat: @" %@_MPI_SEM_scatter<<< %@_mpi_blocksPerGrid, %@_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_MOVE_READ_ONLY);\n", prefixName, prefixName, prefixName];
4272 [GPUCode appendFormat: @" %@_MPI_SEM_sector_scatter<<< %@s_mpi_blocksPerGrid, %@s_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_MOVE_READ_ONLY);\n\n", prefixName, prefixName, prefixName];
4273
4274 [GPUCode appendFormat: @" %@_MPI_SEM_gather<<< %@_mpi_blocksPerGrid, %@_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_MOVE);\n", prefixName, prefixName, prefixName];
4275 [GPUCode appendFormat: @" BC_MPI_initiate_transfer(model, BC_MPI_SEM_MODE_MOVE, currentTime, nextTime);\n"];
4276 [GPUCode appendFormat: @" %@_MPI_SEM_scatter<<< %@_mpi_blocksPerGrid, %@_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_MOVE);\n", prefixName, prefixName, prefixName];
4277 [GPUCode appendFormat: @" %@_MPI_SEM_sector_scatter<<< %@s_mpi_blocksPerGrid, %@s_mpi_threadsPerBlock >>>(currentTime, nextTime, BC_MPI_SEM_MODE_MOVE);\n\n", prefixName, prefixName, prefixName];
4278
4279 [GPUCode appendFormat: @" %@_SEM_update_element_sector_kernel<<< %@s_threadsPerGrid, %@s_threadsPerBlock >>>(currentTime, nextTime);\n", prefixName, prefixName, prefixName];
4280 [GPUCode appendFormat: @" cudacheck(\"invoke %@_SEM_update_element_sector_kernel\");\n\n", prefixName];
4281
4282 [GPUDictionary setObject: GPUCode forKey: @"execution code"];
4283
4284 //
4285 // Release: generic
4286 //
4287
4288 //
4289 // Footer
4290 //
4291 GPUCode = [NSMutableString new];
4292 [GPUCode appendFormat: @"\nvoid %@_MPI_invokeGPUKernel(void *model, void *%@_data, double startTime, double nextTime)", prefixName, prefixName];
4293 [GPUCode appendString: @"\n{\n"];
4294 [GPUCode appendFormat: @" BC_MPI_SEM_GPUdata *%@_ptrs = (BC_MPI_SEM_GPUdata *)%@_data;\n", prefixName, prefixName];
4295 [GPUCode appendString: @"\n}\n"];
4296
4297 [GPUCode appendString: @"\n// GPU function pointers\n"];
4298 //[GPUCode appendFormat: @"SEMfunctions %@_gpuFunctions = {%@_allocGPUKernel, %@_transferGPUKernel, %@_invokeGPUKernel, %@_releaseGPUKernel};\n", prefixName, prefixName, prefixName, prefixName, prefixName];
4299 [GPUCode appendFormat: @"SEMfunctions %@_gpuFunctions = {BC_SEM_allocGPUKernel, %@_transferGPUKernel, %@_invokeGPUKernel, BC_SEM_releaseGPUKernel, %@_MPI_allocGPUKernel, %@_MPI_transferGPUKernel, %@_MPI_invokeGPUKernel, BC_MPI_SEM_releaseGPUKernel};\n", prefixName, prefixName, prefixName, prefixName, prefixName, prefixName];
4300 [GPUDictionary setObject: GPUCode forKey: @"footer"];
4301
4302 return GPUDictionary;
4303 }
4304
4305 - (NSString *)centerKernelGPUMPICode: (NSString *)prefixName
4306 {
4307 NSMutableString *GPUCode;
4308 int dims = [[super spatialModel] dimensions];
4309
4310 GPUCode = [NSMutableString new];
4311
4312 //
4313 // Center calculation for all elements in each cell
4314 //
4315 [GPUCode appendString: @"\n// Calculate cell center\n"];
4316 [GPUCode appendString: @"__global__ void\n"];
4317 [GPUCode appendFormat: @"%@_SEM_center_kernel()\n", prefixName];
4318 [GPUCode appendString: @"{\n"];
4319
4320 [GPUCode appendString: @" int cellNum = blockIdx.x * blockDim.x + threadIdx.x;\n"];
4321 [GPUCode appendString: @" int modelNum = blockIdx.y * blockDim.y + threadIdx.y;\n"];
4322 [GPUCode appendString: @"\n"];
4323 [GPUCode appendFormat: @" if (cellNum >= %@_sem_grids->numOfCells) return;\n", prefixName];
4324 [GPUCode appendFormat: @" if (modelNum >= %@_sem_grids->numOfModels) return;\n", prefixName];
4325 [GPUCode appendString: @"\n"];
4326
4327 [GPUCode appendString: @" int k;\n"];
4328 [GPUCode appendString: @" float cX = 0.0;\n"];
4329 [GPUCode appendString: @" float cY = 0.0;\n"];
4330 if (dims == 3) [GPUCode appendString: @" float cZ = 0.0;\n"];
4331 [GPUCode appendFormat: @" int cidx = modelNum*%@_sem_grids->idx_cPitch+cellNum;\n", prefixName];
4332 //[GPUCode appendString: @" int first = 1;\n"];
4333 [GPUCode appendString: @" float numOfElements = 0.0;\n"];
4334 [GPUCode appendString: @"\n"];
4335
4336 [GPUCode appendFormat: @" for (k = 0; k < %@_sem_grids->totalElements; ++k) {\n", prefixName];
4337 [GPUCode appendFormat: @" int idx = modelNum*%@_sem_grids->idx_pitch+k;\n", prefixName];
4338 [GPUCode appendFormat: @" if (%@_sem_grids->cellNumber[idx] != cellNum) continue;\n", prefixName];
4339 [GPUCode appendString: @"\n"];
4340
4341 [GPUCode appendFormat: @" cX += %@_sem_grids->X[idx];\n", prefixName];
4342 [GPUCode appendFormat: @" cY += %@_sem_grids->Y[idx];\n", prefixName];
4343 if (dims == 3) [GPUCode appendFormat: @" cZ += %@_sem_grids->Z[idx];\n", prefixName];
4344 [GPUCode appendString: @" numOfElements += 1.0f;\n"];
4345 [GPUCode appendString: @" }\n"];
4346 [GPUCode appendString: @"\n"];
4347
4348 [GPUCode appendString: @" cX = cX / numOfElements;\n"];
4349 [GPUCode appendString: @" cY = cY / numOfElements;\n"];
4350 if (dims == 3) [GPUCode appendString: @" cZ = cZ / numOfElements;\n"];
4351
4352 [GPUCode appendFormat: @" %@_sem_grids->cellCenterX[cidx] = cX;\n", prefixName];
4353 [GPUCode appendFormat: @" %@_sem_grids->cellCenterY[cidx] = cY;\n", prefixName];
4354 if (dims == 3) [GPUCode appendFormat: @" %@_sem_grids->cellCenterZ[cidx] = cZ;\n", prefixName];
4355
4356 [GPUCode appendString: @"}\n"];
4357
4358 //
4359 // Center calculation for each element type in each cell
4360 // To avoid using a 3D array, the element types and models are combined in the rows
4361 //
4362 [GPUCode appendString: @"\n// Calculate element centers\n"];
4363 [GPUCode appendString: @"__global__ void\n"];
4364 [GPUCode appendFormat: @"%@_SEM_element_center_kernel()\n", prefixName];
4365 [GPUCode appendString: @"{\n"];
4366
4367 [GPUCode appendString: @" int cellNum = blockIdx.x * blockDim.x + threadIdx.x;\n"];
4368 [GPUCode appendString: @" int modelNum = blockIdx.y * blockDim.y + threadIdx.y;\n"];
4369 [GPUCode appendString: @"\n"];
4370 [GPUCode appendFormat: @" if (cellNum >= %@_sem_grids->numOfCells) return;\n", prefixName];
4371 [GPUCode appendFormat: @" if (modelNum >= %@_sem_grids->numOfModels) return;\n", prefixName];
4372 [GPUCode appendString: @"\n"];
4373
4374 [GPUCode appendString: @" int k;\n"];
4375
4376 [GPUCode appendString: @"\n // zero out element centers\n"];
4377 [GPUCode appendFormat: @" for (k = 0; k < %@_sem_grids->maxElementTypes; ++k) {\n", prefixName];
4378 [GPUCode appendFormat: @" int cidx = ((modelNum*%@_sem_grids->maxElementTypes)+k)*%@_sem_grids->idx_cPitch+cellNum;\n", prefixName, prefixName];
4379 [GPUCode appendFormat: @" %@_sem_grids->elementCenterX[cidx] = 0;\n", prefixName];
4380 [GPUCode appendFormat: @" %@_sem_grids->elementCenterY[cidx] = 0;\n", prefixName];
4381 if (dims == 3) [GPUCode appendFormat: @" %@_sem_grids->elementCenterZ[cidx] = 0;\n", prefixName];
4382 [GPUCode appendFormat: @" %@_sem_grids->elementCenterCount[cidx] = 0;\n", prefixName];
4383 [GPUCode appendString: @" }\n"];
4384
4385 [GPUCode appendString: @"\n // calc element centers\n"];
4386 [GPUCode appendFormat: @" for (k = 0; k < %@_sem_grids->totalElements; ++k) {\n", prefixName];
4387 [GPUCode appendFormat: @" int idx = modelNum*%@_sem_grids->idx_pitch+k;\n", prefixName];
4388 [GPUCode appendFormat: @" if (%@_sem_grids->cellNumber[idx] != cellNum) continue;\n", prefixName];
4389 [GPUCode appendFormat: @" int cidx = ((modelNum*%@_sem_grids->maxElementTypes)+%@_sem_grids->elementType[idx])*%@_sem_grids->idx_cPitch+cellNum;\n", prefixName, prefixName, prefixName];
4390 [GPUCode appendString: @"\n"];
4391
4392 [GPUCode appendFormat: @" %@_sem_grids->elementCenterX[cidx] += %@_sem_grids->X[idx];\n", prefixName, prefixName];
4393 [GPUCode appendFormat: @" %@_sem_grids->elementCenterY[cidx] += %@_sem_grids->Y[idx];\n", prefixName, prefixName];
4394 if (dims == 3) [GPUCode appendFormat: @" %@_sem_grids->elementCenterZ[cidx] += %@_sem_grids->Z[idx];\n", prefixName, prefixName];
4395 [GPUCode appendFormat: @" %@_sem_grids->elementCenterCount[cidx] += 1.0f;\n", prefixName];
4396
4397 [GPUCode appendString: @" }\n"];
4398
4399 [GPUCode appendFormat: @" for (k = 0; k < %@_sem_grids->maxElementTypes; ++k) {\n", prefixName];
4400 [GPUCode appendFormat: @" int cidx = ((modelNum*%@_sem_grids->maxElementTypes)+k)*%@_sem_grids->idx_cPitch+cellNum;\n", prefixName, prefixName];
4401 [GPUCode appendFormat: @" if (%@_sem_grids->elementCenterCount[cidx] != 0) {\n", prefixName];
4402 [GPUCode appendFormat: @" %@_sem_grids->elementCenterX[cidx] /= %@_sem_grids->elementCenterCount[cidx];\n", prefixName, prefixName];
4403 [GPUCode appendFormat: @" %@_sem_grids->elementCenterY[cidx] /= %@_sem_grids->elementCenterCount[cidx];\n", prefixName, prefixName];
4404 if (dims == 3) [GPUCode appendFormat: @" %@_sem_grids->elementCenterZ[cidx] /= %@_sem_grids->elementCenterCount[cidx];\n", prefixName, prefixName];
4405 [GPUCode appendString: @" }\n"];
4406 [GPUCode appendString: @" }\n"];
4407
4408 [GPUCode appendString: @"}\n"];
4409
4410 return GPUCode;
4411 }
4412
4413 - (NSMutableString *)kernelGPUMPICode: (NSString *)prefixName
4414 {
4415 int dims = [[super spatialModel] dimensions];
4416
4417 NSMutableString *GPUCode = [NSMutableString new];
4418
4419 // kernel to calculate centers
4420 [GPUCode appendString: [self centerKernelGPUCode: prefixName]];
4421
4422 // kernel to calculate SEM movement function
4423 //[GPUCode appendString: [self functionKernelGPUCode: prefixName]];
4424
4425 // kernel for numerical method
4426 if ([numericalScheme isEqualToString: @"RungeKutta2ndOrder"]) {
4427 [GPUCode appendString: @"\n// F1 Kernel for 2nd order Runge-Kutta\n"];
4428 [GPUCode appendString: @"__global__ void\n"];
4429 [GPUCode appendFormat: @"%@_SEM_kernel1_2rk(float currentTime, float finalTime)\n", prefixName];
4430 [GPUCode appendString: @"{\n"];
4431 if (useMesh)
4432 [GPUCode appendFormat: @" BC_SEM_mesh_kernel1_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4433 else
4434 [GPUCode appendFormat: @" BC_SEM_kernel1_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4435 [GPUCode appendString: @"}\n"];
4436
4437 [GPUCode appendString: @"\n// F2 Kernel for 2nd order Runge-Kutta\n"];
4438 [GPUCode appendString: @"__global__ void\n"];
4439 [GPUCode appendFormat: @"%@_SEM_kernel2_2rk(float currentTime, float finalTime)\n", prefixName];
4440 [GPUCode appendString: @"{\n"];
4441 if (useMesh)
4442 [GPUCode appendFormat: @" BC_SEM_mesh_kernel2_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4443 else
4444 [GPUCode appendFormat: @" BC_SEM_kernel2_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4445 [GPUCode appendString: @"}\n"];
4446
4447 [GPUCode appendString: @"\n// F1 Kernel for 2nd order Runge-Kutta, pairwise algorithm\n"];
4448 [GPUCode appendString: @"__global__ void\n"];
4449 [GPUCode appendFormat: @"%@_SEM_pairwise_kernel1_2rk(float currentTime, float finalTime)\n", prefixName];
4450 [GPUCode appendString: @"{\n"];
4451 [GPUCode appendFormat: @" BC_SEM_pairwise_kernel1_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4452 [GPUCode appendString: @"}\n"];
4453
4454 [GPUCode appendString: @"\n// F2 Kernel for 2nd order Runge-Kutta, pairwise algorithm\n"];
4455 [GPUCode appendString: @"__global__ void\n"];
4456 [GPUCode appendFormat: @"%@_SEM_pairwise_kernel2_2rk(float currentTime, float finalTime)\n", prefixName];
4457 [GPUCode appendString: @"{\n"];
4458 [GPUCode appendFormat: @" BC_SEM_pairwise_kernel2_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4459 [GPUCode appendString: @"}\n"];
4460
4461 if (!useMesh) {
4462 [GPUCode appendString: @"\n// F1 Kernel for 2nd order Runge-Kutta, neighbor pair algorithm\n"];
4463 [GPUCode appendString: @"__global__ void\n"];
4464 [GPUCode appendFormat: @"%@_SEM_neighbor_kernel1_2rk(float currentTime, float finalTime)\n", prefixName];
4465 [GPUCode appendString: @"{\n"];
4466 [GPUCode appendFormat: @" BC_SEM_neighbor_kernel1_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4467 [GPUCode appendString: @"}\n"];
4468
4469 [GPUCode appendString: @"\n// Sum forces for F1\n"];
4470 [GPUCode appendString: @"__global__ void\n"];
4471 [GPUCode appendFormat: @"%@_SEM_neighbor_sum_kernel1_2rk(float currentTime, float finalTime)\n", prefixName];
4472 [GPUCode appendString: @"{\n"];
4473 [GPUCode appendFormat: @" BC_SEM_neighbor_sum_kernel1_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4474 [GPUCode appendString: @"}\n"];
4475
4476 [GPUCode appendString: @"\n// F2 Kernel for 2nd order Runge-Kutta, neighbor pair algorithm\n"];
4477 [GPUCode appendString: @"__global__ void\n"];
4478 [GPUCode appendFormat: @"%@_SEM_neighbor_kernel2_2rk(float currentTime, float finalTime)\n", prefixName];
4479 [GPUCode appendString: @"{\n"];
4480 [GPUCode appendFormat: @" BC_SEM_neighbor_kernel2_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4481 [GPUCode appendString: @"}\n"];
4482
4483 [GPUCode appendString: @"\n// Sum forces for F2 and update element positions\n"];
4484 [GPUCode appendString: @"__global__ void\n"];
4485 [GPUCode appendFormat: @"%@_SEM_neighbor_update_kernel_2rk(float currentTime, float finalTime)\n", prefixName];
4486 [GPUCode appendString: @"{\n"];
4487 [GPUCode appendFormat: @" BC_SEM_neighbor_update_kernel_2rk(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4488 [GPUCode appendString: @"}\n"];
4489
4490 [GPUCode appendString: @"\n// Determine if need new sectors\n"];
4491 [GPUCode appendString: @"__global__ void\n"];
4492 [GPUCode appendFormat: @"%@_SEM_sector_count_kernel(float currentTime, float finalTime)\n", prefixName];
4493 [GPUCode appendString: @"{\n"];
4494 [GPUCode appendFormat: @" BC_SEM_sector_count_kernel(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4495 [GPUCode appendString: @"}\n"];
4496
4497 [GPUCode appendString: @"\n// Create new sectors\n"];
4498 [GPUCode appendString: @"__global__ void\n"];
4499 [GPUCode appendFormat: @"%@_SEM_create_sectors_kernel(float currentTime, float finalTime)\n", prefixName];
4500 [GPUCode appendString: @"{\n"];
4501 [GPUCode appendFormat: @" BC_SEM_create_sectors_kernel(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4502 [GPUCode appendString: @"}\n"];
4503
4504 [GPUCode appendString: @"\n// Update sector neighbor table\n"];
4505 [GPUCode appendString: @"__global__ void\n"];
4506 [GPUCode appendFormat: @"%@_SEM_build_sector_neighbors_kernel(float currentTime, float finalTime)\n", prefixName];
4507 [GPUCode appendString: @"{\n"];
4508 [GPUCode appendFormat: @" BC_SEM_build_sector_neighbors_kernel(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4509 [GPUCode appendString: @"}\n"];
4510
4511 [GPUCode appendString: @"\n// Move elements to their sectors\n"];
4512 [GPUCode appendString: @"__global__ void\n"];
4513 [GPUCode appendFormat: @"%@_SEM_move_elements_kernel(float currentTime, float finalTime)\n", prefixName];
4514 [GPUCode appendString: @"{\n"];
4515 [GPUCode appendFormat: @" BC_SEM_move_elements_kernel(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4516 [GPUCode appendString: @"}\n"];
4517
4518 [GPUCode appendString: @"\n// Update sector tables\n"];
4519 [GPUCode appendString: @"__global__ void\n"];
4520 [GPUCode appendFormat: @"%@_SEM_update_element_sector_kernel(float currentTime, float finalTime)\n", prefixName];
4521 [GPUCode appendString: @"{\n"];
4522 [GPUCode appendFormat: @" BC_SEM_update_element_sector_kernel(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4523 [GPUCode appendString: @"}\n"];
4524
4525 [GPUCode appendString: @"\n// Update element neighbor table\n"];
4526 [GPUCode appendString: @"__global__ void\n"];
4527 [GPUCode appendFormat: @"%@_SEM_update_element_neighbor_kernel(float currentTime, float finalTime)\n", prefixName];
4528 [GPUCode appendString: @"{\n"];
4529 [GPUCode appendFormat: @" BC_SEM_update_element_neighbor_kernel(%@_sem_grids, currentTime, finalTime);\n", prefixName];
4530 [GPUCode appendString: @"}\n"];
4531
4532 [GPUCode appendString: @"\n// MPI gather\n"];
4533 [GPUCode appendString: @"__global__ void\n"];
4534 [GPUCode appendFormat: @"%@_MPI_SEM_gather(float currentTime, float finalTime, int mode)\n", prefixName];
4535 [GPUCode appendString: @"{\n"];
4536 [GPUCode appendFormat: @" BC_MPI_SEM_gather(%@_mpi_sem_data, %@_sem_grids, mode);\n", prefixName, prefixName];
4537 [GPUCode appendString: @"}\n"];
4538
4539 [GPUCode appendString: @"\n// MPI scatter\n"];
4540 [GPUCode appendString: @"__global__ void\n"];
4541 [GPUCode appendFormat: @"%@_MPI_SEM_scatter(float currentTime, float finalTime, int mode)\n", prefixName];
4542 [GPUCode appendString: @"{\n"];
4543 [GPUCode appendFormat: @" BC_MPI_SEM_scatter(%@_mpi_sem_data, %@_sem_grids, mode);\n", prefixName, prefixName];
4544 [GPUCode appendString: @"}\n"];
4545
4546 [GPUCode appendString: @"\n// MPI sector scatter\n"];
4547 [GPUCode appendString: @"__global__ void\n"];
4548 [GPUCode appendFormat: @"%@_MPI_SEM_sector_scatter(float currentTime, float finalTime, int mode)\n", prefixName];
4549 [GPUCode appendString: @"{\n"];
4550 [GPUCode appendFormat: @" BC_MPI_SEM_sector_scatter(%@_mpi_sem_data, %@_sem_grids, mode);\n", prefixName, prefixName];
4551 [GPUCode appendString: @"}\n"];
4552
4553 }
4554
4555 } else {
4556 printf("ERROR: unknown numerical scheme: %s\n", [numericalScheme UTF8String]);
4557 return nil;
4558 }
4559
4560 return GPUCode;
4561 }
4562
4563 - (NSMutableString *)cleanupGPUMPICode: (NSString *)prefixName
4564 {
4565 return nil;
4566 }
4567
4568 @end
4569
4570
4571 //
4572 // Sector functions
4573 //
4574 void bc_sector_coordinate_shift(int xc, int yc, int zc, int direction, int *X, int *Y, int *Z)
4575 {
4576 *X = xc;
4577 *Y = yc;
4578 *Z = zc;
4579 switch (direction) {
4580 case S_LEFT:
4581 *X = *X - 1; break;
4582 case S_RIGHT:
4583 *X = *X + 1; break;
4584 case S_UP:
4585 *Y = *Y + 1; break;
4586 case S_DOWN:
4587 *Y = *Y - 1; break;
4588 case S_LEFT_UP:
4589 *X = *X - 1; *Y = *Y + 1; break;
4590 case S_RIGHT_UP:
4591 *X = *X + 1; *Y = *Y + 1; break;
4592 case S_LEFT_DOWN:
4593 *X = *X - 1; *Y = *Y - 1; break;
4594 case S_RIGHT_DOWN:
4595 *X = *X + 1; *Y = *Y - 1; break;
4596 case S_FRONT:
4597 *Z = *Z - 1; break;
4598 case S_BACK:
4599 *Z = *Z + 1; break;
4600 case S_LEFT_FRONT:
4601 *X = *X - 1; *Z = *Z - 1; break;
4602 case S_RIGHT_FRONT:
4603 *X = *X + 1; *Z = *Z - 1; break;
4604 case S_LEFT_BACK:
4605 *X = *X - 1; *Z = *Z + 1; break;
4606 case S_RIGHT_BACK:
4607 *X = *X + 1; *Z = *Z + 1; break;
4608 case S_UP_FRONT:
4609 *Y = *Y + 1; *Z = *Z - 1; break;
4610 case S_DOWN_FRONT:
4611 *Y = *Y - 1; *Z = *Z - 1; break;
4612 case S_UP_BACK:
4613 *Y = *Y + 1; *Z = *Z + 1; break;
4614 case S_DOWN_BACK:
4615 *Y = *Y - 1; *Z = *Z + 1; break;
4616 case S_LEFT_UP_FRONT:
4617 *X = *X - 1; *Y = *Y + 1; *Z = *Z - 1; break;
4618 case S_RIGHT_UP_FRONT:
4619 *X = *X + 1; *Y = *Y + 1; *Z = *Z - 1; break;
4620 case S_LEFT_DOWN_FRONT:
4621 *X = *X - 1; *Y = *Y - 1; *Z = *Z - 1; break;
4622 case S_RIGHT_DOWN_FRONT:
4623 *X = *X + 1; *Y = *Y - 1; *Z = *Z - 1; break;
4624 case S_LEFT_UP_BACK:
4625 *X = *X - 1; *Y = *Y + 1; *Z = *Z + 1; break;
4626 case S_RIGHT_UP_BACK:
4627 *X = *X + 1; *Y = *Y + 1; *Z = *Z + 1; break;
4628 case S_LEFT_DOWN_BACK:
4629 *X = *X - 1; *Y = *Y - 1; *Z = *Z + 1; break;
4630 case S_RIGHT_DOWN_BACK:
4631 *X = *X + 1; *Y = *Y - 1; *Z = *Z + 1; break;
4632 }
4633 }
4634
4635 char *bc_sector_direction_string(int direction)
4636 {
4637 switch (direction) {
4638 case S_NONE:
4639 return "S_NONE"; break;
4640 case S_CURRENT:
4641 return "S_CURRENT"; break;
4642 case S_LEFT:
4643 return "S_LEFT"; break;
4644 case S_RIGHT:
4645 return "S_RIGHT"; break;
4646 case S_UP:
4647 return "S_UP"; break;
4648 case S_DOWN:
4649 return "S_DOWN"; break;
4650 case S_LEFT_UP:
4651 return "S_LEFT_UP"; break;
4652 case S_RIGHT_UP:
4653 return "S_RIGHT_UP"; break;
4654 case S_LEFT_DOWN:
4655 return "S_LEFT_DOWN"; break;
4656 case S_RIGHT_DOWN:
4657 return "S_RIGHT_DOWN"; break;
4658 case S_FRONT:
4659 return "S_FRONT"; break;
4660 case S_BACK:
4661 return "S_BACK"; break;
4662 case S_LEFT_FRONT:
4663 return "S_LEFT_FRONT"; break;
4664 case S_RIGHT_FRONT:
4665 return "S_RIGHT_FRONT"; break;
4666 case S_LEFT_BACK:
4667 return "S_LEFT_BACK"; break;
4668 case S_RIGHT_BACK:
4669 return "S_RIGHT_BACK"; break;
4670 case S_UP_FRONT:
4671 return "S_UP_FRONT"; break;
4672 case S_DOWN_FRONT:
4673 return "S_DOWN_FRONT"; break;
4674 case S_UP_BACK:
4675 return "S_UP_BACK"; break;
4676 case S_DOWN_BACK:
4677 return "S_DOWN_BACK"; break;
4678 case S_LEFT_UP_FRONT:
4679 return "S_LEFT_UP_FRONT"; break;
4680 case S_RIGHT_UP_FRONT:
4681 return "S_RIGHT_UP_FRONT"; break;
4682 case S_LEFT_DOWN_FRONT:
4683 return "S_LEFT_DOWN_FRONT"; break;
4684 case S_RIGHT_DOWN_FRONT:
4685 return "S_RIGHT_DOWN_FRONT"; break;
4686 case S_LEFT_UP_BACK:
4687 return "S_LEFT_UP_BACK"; break;
4688 case S_RIGHT_UP_BACK:
4689 return "S_RIGHT_UP_BACK"; break;
4690 case S_LEFT_DOWN_BACK:
4691 return "S_LEFT_DOWN_BACK"; break;
4692 case S_RIGHT_DOWN_BACK:
4693 return "S_RIGHT_DOWN_BACK"; break;
4694 default:
4695 return "ERROR: Unknown sector direction"; break;
4696 }
4697 }