Actual source code: ex48.c
1: static const char help[] = "Toy hydrostatic ice flow with multigrid in 3D\n\
2: \n\
3: Solves the hydrostatic (aka Blatter/Pattyn/First Order) equations for ice sheet flow\n\
4: using multigrid. The ice uses a power-law rheology with \"Glen\" exponent 3 (corresponds\n\
5: to p=4/3 in a p-Laplacian). The focus is on ISMIP-HOM experiments which assume periodic\n\
6: boundary conditions in the x- and y-directions.\n\
7: \n\
8: Equations are rescaled so that the domain size and solution are O(1), details of this scaling\n\
9: can be controlled by the options -units_meter, -units_second, and -units_kilogram.\n\
10: \n\
11: A VTK StructuredGrid output file can be written using the option -o filename.vts\n\
12: \n\n";
14: /*
15: The equations for horizontal velocity (u,v) are
17: - [eta (4 u_x + 2 v_y)]_x - [eta (u_y + v_x)]_y - [eta u_z]_z + rho g s_x = 0
18: - [eta (4 v_y + 2 u_x)]_y - [eta (u_y + v_x)]_x - [eta v_z]_z + rho g s_y = 0
20: where
22: eta = B/2 (epsilon + gamma)^((p-2)/2)
24: is the nonlinear effective viscosity with regularization epsilon and hardness parameter B,
25: written in terms of the second invariant
27: gamma = u_x^2 + v_y^2 + u_x v_y + (1/4) (u_y + v_x)^2 + (1/4) u_z^2 + (1/4) v_z^2
29: The surface boundary conditions are the natural conditions. The basal boundary conditions
30: are either no-slip, or Navier (linear) slip with spatially variant friction coefficient beta^2.
32: In the code, the equations for (u,v) are multiplied through by 1/(rho g) so that residuals are O(1).
34: The discretization is Q1 finite elements, managed by a DA. The grid is never distorted in the
35: map (x,y) plane, but the bed and surface may be bumpy. This is handled as usual in FEM, through
36: the Jacobian of the coordinate transformation from a reference element to the physical element.
38: Since ice-flow is tightly coupled in the z-direction (within columns), the DA is managed
39: specially so that columns are never distributed, and are always contiguous in memory.
40: This amounts to reversing the meaning of X,Y,Z compared to the DA's internal interpretation,
41: and then indexing as vec[i][j][k]. The exotic coarse spaces require 2D DAs which are made to
42: use compatible domain decomposition relative to the 3D DAs.
44: There are two compile-time options:
46: NO_SSE2:
47: If the host supports SSE2, we use integration code that has been vectorized with SSE2
48: intrinsics, unless this macro is defined. The intrinsics speed up integration by about
49: 30% on my architecture (P8700, gcc-4.5 snapshot).
51: COMPUTE_LOWER_TRIANGULAR:
52: The element matrices we assemble are lower-triangular so it is not necessary to compute
53: all entries explicitly. If this macro is defined, the lower-triangular entries are
54: computed explicitly.
56: */
58: #include <petscdmmg.h>
59: #include <ctype.h> /* toupper() */
60: #include <private/daimpl.h> /* There is not yet a public interface to manipulate dm->ops */
62: #if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L
63: # if defined __cplusplus /* C++ restrict is nonstandard and compilers have inconsistent rules about where it can be used */
64: # define restrict
65: # else
66: # define restrict PETSC_RESTRICT
67: # endif
68: #endif
69: #if defined __SSE2__
70: # include <emmintrin.h>
71: #endif
73: /* The SSE2 kernels are only for PetscScalar=double on architectures that support it */
74: #define USE_SSE2_KERNELS (!defined NO_SSE2 \
75: && !defined PETSC_USE_COMPLEX \
76: && !defined PETSC_USE_SCALAR_SINGLE \
77: && !defined PETSC_USE_SCALAR_LONG_DOUBLE \
78: && defined __SSE2__)
80: static PetscCookie THI_COOKIE;
82: typedef enum {QUAD_GAUSS,QUAD_LOBATTO} QuadratureType;
83: static const char *QuadratureTypes[] = {"gauss","lobatto","QuadratureType","QUAD_",0};
84: static const PetscReal HexQWeights[8] = {1,1,1,1,1,1,1,1};
85: static const PetscReal HexQNodes[] = {-0.57735026918962573, 0.57735026918962573};
86: #define G 0.57735026918962573
87: #define H (0.5*(1.+G))
88: #define L (0.5*(1.-G))
89: #define M (-0.5)
90: #define P (0.5)
91: /* Special quadrature: Lobatto in horizontal, Gauss in vertical */
92: static const PetscReal HexQInterp_Lobatto[8][8] = {{H,0,0,0,L,0,0,0},
93: {0,H,0,0,0,L,0,0},
94: {0,0,H,0,0,0,L,0},
95: {0,0,0,H,0,0,0,L},
96: {L,0,0,0,H,0,0,0},
97: {0,L,0,0,0,H,0,0},
98: {0,0,L,0,0,0,H,0},
99: {0,0,0,L,0,0,0,H}};
100: static const PetscReal HexQDeriv_Lobatto[8][8][3] = {
101: {{M*H,M*H,M},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} ,{M*L,M*L,P},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} },
102: {{M*H,0,0} ,{P*H,M*H,M},{0,P*H,0} ,{0,0,0} ,{M*L,0,0} ,{P*L,M*L,P},{0,P*L,0} ,{0,0,0} },
103: {{0,0,0} ,{0,M*H,0} ,{P*H,P*H,M},{M*H,0,0} ,{0,0,0} ,{0,M*L,0} ,{P*L,P*L,P},{M*L,0,0} },
104: {{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,M},{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,P}},
105: {{M*L,M*L,M},{P*L,0,0} ,{0,0,0} ,{0,P*L,0} ,{M*H,M*H,P},{P*H,0,0} ,{0,0,0} ,{0,P*H,0} },
106: {{M*L,0,0} ,{P*L,M*L,M},{0,P*L,0} ,{0,0,0} ,{M*H,0,0} ,{P*H,M*H,P},{0,P*H,0} ,{0,0,0} },
107: {{0,0,0} ,{0,M*L,0} ,{P*L,P*L,M},{M*L,0,0} ,{0,0,0} ,{0,M*H,0} ,{P*H,P*H,P},{M*H,0,0} },
108: {{0,M*L,0} ,{0,0,0} ,{P*L,0,0} ,{M*L,P*L,M},{0,M*H,0} ,{0,0,0} ,{P*H,0,0} ,{M*H,P*H,P}}};
109: /* Stanndard Gauss */
110: static const PetscReal HexQInterp_Gauss[8][8] = {{H*H*H,L*H*H,L*L*H,H*L*H, H*H*L,L*H*L,L*L*L,H*L*L},
111: {L*H*H,H*H*H,H*L*H,L*L*H, L*H*L,H*H*L,H*L*L,L*L*L},
112: {L*L*H,H*L*H,H*H*H,L*H*H, L*L*L,H*L*L,H*H*L,L*H*L},
113: {H*L*H,L*L*H,L*H*H,H*H*H, H*L*L,L*L*L,L*H*L,H*H*L},
114: {H*H*L,L*H*L,L*L*L,H*L*L, H*H*H,L*H*H,L*L*H,H*L*H},
115: {L*H*L,H*H*L,H*L*L,L*L*L, L*H*H,H*H*H,H*L*H,L*L*H},
116: {L*L*L,H*L*L,H*H*L,L*H*L, L*L*H,H*L*H,H*H*H,L*H*H},
117: {H*L*L,L*L*L,L*H*L,H*H*L, H*L*H,L*L*H,L*H*H,H*H*H}};
118: static const PetscReal HexQDeriv_Gauss[8][8][3] = {
119: {{M*H*H,H*M*H,H*H*M},{P*H*H,L*M*H,L*H*M},{P*L*H,L*P*H,L*L*M},{M*L*H,H*P*H,H*L*M}, {M*H*L,H*M*L,H*H*P},{P*H*L,L*M*L,L*H*P},{P*L*L,L*P*L,L*L*P},{M*L*L,H*P*L,H*L*P}},
120: {{M*H*H,L*M*H,L*H*M},{P*H*H,H*M*H,H*H*M},{P*L*H,H*P*H,H*L*M},{M*L*H,L*P*H,L*L*M}, {M*H*L,L*M*L,L*H*P},{P*H*L,H*M*L,H*H*P},{P*L*L,H*P*L,H*L*P},{M*L*L,L*P*L,L*L*P}},
121: {{M*L*H,L*M*H,L*L*M},{P*L*H,H*M*H,H*L*M},{P*H*H,H*P*H,H*H*M},{M*H*H,L*P*H,L*H*M}, {M*L*L,L*M*L,L*L*P},{P*L*L,H*M*L,H*L*P},{P*H*L,H*P*L,H*H*P},{M*H*L,L*P*L,L*H*P}},
122: {{M*L*H,H*M*H,H*L*M},{P*L*H,L*M*H,L*L*M},{P*H*H,L*P*H,L*H*M},{M*H*H,H*P*H,H*H*M}, {M*L*L,H*M*L,H*L*P},{P*L*L,L*M*L,L*L*P},{P*H*L,L*P*L,L*H*P},{M*H*L,H*P*L,H*H*P}},
123: {{M*H*L,H*M*L,H*H*M},{P*H*L,L*M*L,L*H*M},{P*L*L,L*P*L,L*L*M},{M*L*L,H*P*L,H*L*M}, {M*H*H,H*M*H,H*H*P},{P*H*H,L*M*H,L*H*P},{P*L*H,L*P*H,L*L*P},{M*L*H,H*P*H,H*L*P}},
124: {{M*H*L,L*M*L,L*H*M},{P*H*L,H*M*L,H*H*M},{P*L*L,H*P*L,H*L*M},{M*L*L,L*P*L,L*L*M}, {M*H*H,L*M*H,L*H*P},{P*H*H,H*M*H,H*H*P},{P*L*H,H*P*H,H*L*P},{M*L*H,L*P*H,L*L*P}},
125: {{M*L*L,L*M*L,L*L*M},{P*L*L,H*M*L,H*L*M},{P*H*L,H*P*L,H*H*M},{M*H*L,L*P*L,L*H*M}, {M*L*H,L*M*H,L*L*P},{P*L*H,H*M*H,H*L*P},{P*H*H,H*P*H,H*H*P},{M*H*H,L*P*H,L*H*P}},
126: {{M*L*L,H*M*L,H*L*M},{P*L*L,L*M*L,L*L*M},{P*H*L,L*P*L,L*H*M},{M*H*L,H*P*L,H*H*M}, {M*L*H,H*M*H,H*L*P},{P*L*H,L*M*H,L*L*P},{P*H*H,L*P*H,L*H*P},{M*H*H,H*P*H,H*H*P}}};
127: static const PetscReal (*HexQInterp)[8],(*HexQDeriv)[8][3];
128: /* Standard 2x2 Gauss quadrature for the bottom layer. */
129: static const PetscReal QuadQInterp[4][4] = {{H*H,L*H,L*L,H*L},
130: {L*H,H*H,H*L,L*L},
131: {L*L,H*L,H*H,L*H},
132: {H*L,L*L,L*H,H*H}};
133: static const PetscReal QuadQDeriv[4][4][2] = {
134: {{M*H,M*H},{P*H,M*L},{P*L,P*L},{M*L,P*H}},
135: {{M*H,M*L},{P*H,M*H},{P*L,P*H},{M*L,P*L}},
136: {{M*L,M*L},{P*L,M*H},{P*H,P*H},{M*H,P*L}},
137: {{M*L,M*H},{P*L,M*L},{P*H,P*L},{M*H,P*H}}};
138: #undef G
139: #undef H
140: #undef L
141: #undef M
142: #undef P
144: #define HexExtract(x,i,j,k,n) do { \
145: (n)[0] = (x)[i][j][k]; \
146: (n)[1] = (x)[i+1][j][k]; \
147: (n)[2] = (x)[i+1][j+1][k]; \
148: (n)[3] = (x)[i][j+1][k]; \
149: (n)[4] = (x)[i][j][k+1]; \
150: (n)[5] = (x)[i+1][j][k+1]; \
151: (n)[6] = (x)[i+1][j+1][k+1]; \
152: (n)[7] = (x)[i][j+1][k+1]; \
153: } while (0)
155: #define HexExtractRef(x,i,j,k,n) do { \
156: (n)[0] = &(x)[i][j][k]; \
157: (n)[1] = &(x)[i+1][j][k]; \
158: (n)[2] = &(x)[i+1][j+1][k]; \
159: (n)[3] = &(x)[i][j+1][k]; \
160: (n)[4] = &(x)[i][j][k+1]; \
161: (n)[5] = &(x)[i+1][j][k+1]; \
162: (n)[6] = &(x)[i+1][j+1][k+1]; \
163: (n)[7] = &(x)[i][j+1][k+1]; \
164: } while (0)
166: #define QuadExtract(x,i,j,n) do { \
167: (n)[0] = (x)[i][j]; \
168: (n)[1] = (x)[i+1][j]; \
169: (n)[2] = (x)[i+1][j+1]; \
170: (n)[3] = (x)[i][j+1]; \
171: } while (0)
173: static PetscScalar Sqr(PetscScalar a) {return a*a;}
175: static void HexGrad(const PetscReal dphi[][3],const PetscReal zn[],PetscReal dz[])
176: {
177: PetscInt i;
178: dz[0] = dz[1] = dz[2] = 0;
179: for (i=0; i<8; i++) {
180: dz[0] += dphi[i][0] * zn[i];
181: dz[1] += dphi[i][1] * zn[i];
182: dz[2] += dphi[i][2] * zn[i];
183: }
184: }
186: static void HexComputeGeometry(PetscInt q,PetscReal hx,PetscReal hy,const PetscReal dz[restrict],PetscReal phi[restrict],PetscReal dphi[restrict][3],PetscReal *restrict jw)
187: {
188: const PetscReal
189: jac[3][3] = {{hx/2,0,0}, {0,hy/2,0}, {dz[0],dz[1],dz[2]}}
190: ,ijac[3][3] = {{1/jac[0][0],0,0}, {0,1/jac[1][1],0}, {-jac[2][0]/(jac[0][0]*jac[2][2]),-jac[2][1]/(jac[1][1]*jac[2][2]),1/jac[2][2]}}
191: ,jdet = jac[0][0]*jac[1][1]*jac[2][2];
192: PetscInt i;
194: for (i=0; i<8; i++) {
195: const PetscReal *dphir = HexQDeriv[q][i];
196: phi[i] = HexQInterp[q][i];
197: dphi[i][0] = dphir[0]*ijac[0][0] + dphir[1]*ijac[1][0] + dphir[2]*ijac[2][0];
198: dphi[i][1] = dphir[0]*ijac[0][1] + dphir[1]*ijac[1][1] + dphir[2]*ijac[2][1];
199: dphi[i][2] = dphir[0]*ijac[0][2] + dphir[1]*ijac[1][2] + dphir[2]*ijac[2][2];
200: }
201: *jw = 1.0 * jdet;
202: }
204: typedef struct _p_THI *THI;
205: typedef struct _n_Units *Units;
207: typedef struct {
208: PetscScalar u,v;
209: } Node;
211: typedef struct {
212: PetscScalar b; /* bed */
213: PetscScalar h; /* thickness */
214: PetscScalar beta2; /* friction */
215: } PrmNode;
217: typedef struct {
218: PetscReal min,max,cmin,cmax;
219: } PRange;
221: typedef enum {THIASSEMBLY_TRIDIAGONAL,THIASSEMBLY_FULL} THIAssemblyMode;
223: struct _p_THI {
224: PETSCHEADER(int);
225: void (*initialize)(THI,PetscReal x,PetscReal y,PrmNode *p);
226: PetscInt nlevels;
227: PetscInt zlevels;
228: PetscReal Lx,Ly,Lz; /* Model domain */
229: PetscReal alpha; /* Bed angle */
230: Units units;
231: PetscReal dirichlet_scale;
232: PetscReal ssa_friction_scale;
233: PRange eta;
234: PRange beta2;
235: struct {
236: PetscReal Bd2,eps,exponent;
237: } viscosity;
238: struct {
239: PetscReal irefgam,eps2,exponent;
240: } friction;
241: PetscReal rhog;
242: PetscTruth no_slip;
243: PetscTruth tridiagonal;
244: PetscTruth coarse2d;
245: PetscTruth verbose;
246: MatType mattype;
247: };
249: struct _n_Units {
250: /* fundamental */
251: PetscReal meter;
252: PetscReal kilogram;
253: PetscReal second;
254: /* derived */
255: PetscReal Pascal;
256: PetscReal year;
257: };
259: static void PrmHexGetZ(const PrmNode pn[],PetscInt k,PetscInt zm,PetscReal zn[])
260: {
261: const PetscScalar zm1 = zm-1,
262: znl[8] = {pn[0].b + pn[0].h*(PetscScalar)k/zm1,
263: pn[1].b + pn[1].h*(PetscScalar)k/zm1,
264: pn[2].b + pn[2].h*(PetscScalar)k/zm1,
265: pn[3].b + pn[3].h*(PetscScalar)k/zm1,
266: pn[0].b + pn[0].h*(PetscScalar)(k+1)/zm1,
267: pn[1].b + pn[1].h*(PetscScalar)(k+1)/zm1,
268: pn[2].b + pn[2].h*(PetscScalar)(k+1)/zm1,
269: pn[3].b + pn[3].h*(PetscScalar)(k+1)/zm1};
270: PetscInt i;
271: for (i=0; i<8; i++) zn[i] = PetscRealPart(znl[i]);
272: }
274: /* Tests A and C are from the ISMIP-HOM paper (Pattyn et al. 2008) */
275: static void THIInitialize_HOM_A(THI thi,PetscReal x,PetscReal y,PrmNode *p)
276: {
277: Units units = thi->units;
278: PetscReal s = -x*tan(thi->alpha);
279: p->b = s - 1000*units->meter + 500*units->meter * sin(x*2*PETSC_PI/thi->Lx) * sin(y*2*PETSC_PI/thi->Ly);
280: p->h = s - p->b;
281: p->beta2 = 1e30;
282: }
284: static void THIInitialize_HOM_C(THI thi,PetscReal x,PetscReal y,PrmNode *p)
285: {
286: Units units = thi->units;
287: PetscReal s = -x*tan(thi->alpha);
288: p->b = s - 1000*units->meter;
289: p->h = s - p->b;
290: /* tau_b = beta2 v is a stress (Pa) */
291: p->beta2 = 1000 * (1 + sin(x*2*PETSC_PI/thi->Lx)*sin(y*2*PETSC_PI/thi->Ly)) * units->Pascal * units->year / units->meter;
292: }
294: /* These are just toys */
296: /* Same bed as test A, free slip everywhere except for a discontinuous jump to a circular sticky region in the middle. */
297: static void THIInitialize_HOM_X(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
298: {
299: Units units = thi->units;
300: PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
301: PetscReal r = sqrt(x*x + y*y),s = -x*tan(thi->alpha);
302: p->b = s - 1000*units->meter + 500*units->meter * sin(x + PETSC_PI) * sin(y + PETSC_PI);
303: p->h = s - p->b;
304: p->beta2 = 1000 * (r < 1 ? 2 : 0) * units->Pascal * units->year / units->meter;
305: }
307: /* Same bed as A, smoothly varying slipperiness, similar to Matlab's "sombrero" (uncorrelated with bathymetry) */
308: static void THIInitialize_HOM_Z(THI thi,PetscReal xx,PetscReal yy,PrmNode *p)
309: {
310: Units units = thi->units;
311: PetscReal x = xx*2*PETSC_PI/thi->Lx - PETSC_PI,y = yy*2*PETSC_PI/thi->Ly - PETSC_PI; /* [-pi,pi] */
312: PetscReal r = sqrt(x*x + y*y),s = -x*tan(thi->alpha);
313: p->b = s - 1000*units->meter + 500*units->meter * sin(x + PETSC_PI) * sin(y + PETSC_PI);
314: p->h = s - p->b;
315: p->beta2 = 1000 * (1. + sin(sqrt(16*r))/sqrt(1e-2 + 16*r)*cos(x*3/2)*cos(y*3/2)) * units->Pascal * units->year / units->meter;
316: }
318: static void THIFriction(THI thi,PetscReal rbeta2,PetscReal gam,PetscReal *beta2,PetscReal *dbeta2)
319: {
320: if (thi->friction.irefgam == 0) {
321: Units units = thi->units;
322: thi->friction.irefgam = 1./(0.5*PetscSqr(100 * units->meter / units->year));
323: thi->friction.eps2 = 0.5*PetscSqr(1.e-4 / thi->friction.irefgam);
324: }
325: if (thi->friction.exponent == 0) {
326: *beta2 = rbeta2;
327: *dbeta2 = 0;
328: } else {
329: *beta2 = rbeta2 * pow(thi->friction.eps2 + gam*thi->friction.irefgam,thi->friction.exponent);
330: *dbeta2 = thi->friction.exponent * *beta2 / (thi->friction.eps2 + gam*thi->friction.irefgam) * thi->friction.irefgam;
331: }
332: }
334: static void THIViscosity(THI thi,PetscReal gam,PetscReal *eta,PetscReal *deta)
335: {
336: PetscReal Bd2,eps,exponent;
337: if (thi->viscosity.Bd2 == 0) {
338: Units units = thi->units;
339: const PetscReal
340: n = 3., /* Glen exponent */
341: p = 1. + 1./n, /* for Stokes */
342: A = 1.e-16 * pow(units->Pascal,-n) / units->year, /* softness parameter (Pa^{-n}/s) */
343: B = pow(A,-1./n); /* hardness parameter */
344: thi->viscosity.Bd2 = B/2;
345: thi->viscosity.exponent = (p-2)/2;
346: thi->viscosity.eps = 0.5*PetscSqr(1e-5 / units->year);
347: }
348: Bd2 = thi->viscosity.Bd2;
349: exponent = thi->viscosity.exponent;
350: eps = thi->viscosity.eps;
351: *eta = Bd2 * pow(eps + gam,exponent);
352: *deta = exponent * (*eta) / (eps + gam);
353: }
355: static void RangeUpdate(PetscReal *min,PetscReal *max,PetscReal x)
356: {
357: if (x < *min) *min = x;
358: if (x > *max) *max = x;
359: }
361: static void PRangeClear(PRange *p)
362: {
363: p->cmin = p->min = 1e100;
364: p->cmax = p->max = -1e100;
365: }
369: static PetscErrorCode PRangeMinMax(PRange *p,PetscReal min,PetscReal max)
370: {
373: p->cmin = min;
374: p->cmax = max;
375: if (min < p->min) p->min = min;
376: if (max > p->max) p->max = max;
377: return(0);
378: }
382: static PetscErrorCode THIDestroy(THI thi)
383: {
387: if (--((PetscObject)thi)->refct > 0) return(0);
388: PetscFree(thi->units);
389: PetscFree(thi->mattype);
390: PetscHeaderDestroy(thi);
391: return(0);
392: }
396: static PetscErrorCode THICreate(MPI_Comm comm,THI *inthi)
397: {
398: static PetscTruth registered = PETSC_FALSE;
399: THI thi;
400: Units units;
404: *inthi = 0;
405: if (!registered) {
406: PetscCookieRegister("Toy Hydrostatic Ice",&THI_COOKIE);
407: registered = PETSC_TRUE;
408: }
409: PetscHeaderCreate(thi,_p_THI,0,THI_COOKIE,-1,"THI",comm,THIDestroy,0);
411: PetscNew(struct _n_Units,&thi->units);
412: units = thi->units;
413: units->meter = 1e-2;
414: units->second = 1e-7;
415: units->kilogram = 1e-12;
416: PetscOptionsBegin(comm,NULL,"Scaled units options","");
417: {
418: PetscOptionsReal("-units_meter","1 meter in scaled length units","",units->meter,&units->meter,NULL);
419: PetscOptionsReal("-units_second","1 second in scaled time units","",units->second,&units->second,NULL);
420: PetscOptionsReal("-units_kilogram","1 kilogram in scaled mass units","",units->kilogram,&units->kilogram,NULL);
421: }
422: PetscOptionsEnd();
423: units->Pascal = units->kilogram / (units->meter * PetscSqr(units->second));
424: units->year = 31556926. * units->second, /* seconds per year */
426: thi->Lx = 10.e3;
427: thi->Ly = 10.e3;
428: thi->Lz = 1000;
429: thi->nlevels = 1;
430: thi->dirichlet_scale = 1;
431: thi->verbose = PETSC_FALSE;
433: PetscOptionsBegin(comm,NULL,"Toy Hydrostatic Ice options","");
434: {
435: QuadratureType quad = QUAD_GAUSS;
436: char homexp[] = "A";
437: char mtype[256] = MATSBAIJ;
438: PetscReal L,m = 1.0;
439: PetscTruth flg;
440: L = thi->Lx;
441: PetscOptionsReal("-thi_L","Domain size (m)","",L,&L,&flg);
442: if (flg) thi->Lx = thi->Ly = L;
443: PetscOptionsReal("-thi_Lx","X Domain size (m)","",thi->Lx,&thi->Lx,NULL);
444: PetscOptionsReal("-thi_Ly","Y Domain size (m)","",thi->Ly,&thi->Ly,NULL);
445: PetscOptionsReal("-thi_Lz","Z Domain size (m)","",thi->Lz,&thi->Lz,NULL);
446: PetscOptionsString("-thi_hom","ISMIP-HOM experiment (A or C)","",homexp,homexp,sizeof(homexp),NULL);
447: switch (homexp[0] = toupper(homexp[0])) {
448: case 'A':
449: thi->initialize = THIInitialize_HOM_A;
450: thi->no_slip = PETSC_TRUE;
451: thi->alpha = 0.5;
452: break;
453: case 'C':
454: thi->initialize = THIInitialize_HOM_C;
455: thi->no_slip = PETSC_FALSE;
456: thi->alpha = 0.1;
457: break;
458: case 'X':
459: thi->initialize = THIInitialize_HOM_X;
460: thi->no_slip = PETSC_FALSE;
461: thi->alpha = 0.3;
462: break;
463: case 'Z':
464: thi->initialize = THIInitialize_HOM_Z;
465: thi->no_slip = PETSC_FALSE;
466: thi->alpha = 0.5;
467: break;
468: default:
469: SETERRQ1(PETSC_ERR_SUP,"HOM experiment '%c' not implemented",homexp[0]);
470: }
471: PetscOptionsEnum("-thi_quadrature","Quadrature to use for 3D elements","",QuadratureTypes,(PetscEnum)quad,(PetscEnum*)&quad,NULL);
472: switch (quad) {
473: case QUAD_GAUSS:
474: HexQInterp = HexQInterp_Gauss;
475: HexQDeriv = HexQDeriv_Gauss;
476: break;
477: case QUAD_LOBATTO:
478: HexQInterp = HexQInterp_Lobatto;
479: HexQDeriv = HexQDeriv_Lobatto;
480: break;
481: }
482: PetscOptionsReal("-thi_alpha","Bed angle (degrees)","",thi->alpha,&thi->alpha,NULL);
483: PetscOptionsReal("-thi_friction_m","Friction exponent, 0=Coulomb, 1=Navier","",m,&m,NULL);
484: thi->friction.exponent = (m-1)/2;
485: PetscOptionsReal("-thi_dirichlet_scale","Scale Dirichlet boundary conditions by this factor","",thi->dirichlet_scale,&thi->dirichlet_scale,NULL);
486: PetscOptionsReal("-thi_ssa_friction_scale","Scale slip boundary conditions by this factor in SSA (2D) assembly","",thi->ssa_friction_scale,&thi->ssa_friction_scale,NULL);
487: PetscOptionsInt("-thi_nlevels","Number of levels of refinement","",thi->nlevels,&thi->nlevels,NULL);
488: PetscOptionsTruth("-thi_coarse2d","Use a 2D coarse space corresponding to SSA","",thi->coarse2d,&thi->coarse2d,NULL);
489: PetscOptionsTruth("-thi_tridiagonal","Assemble a tridiagonal system (column coupling only) on the finest level","",thi->tridiagonal,&thi->tridiagonal,NULL);
490: PetscOptionsList("-thi_mat_type","Matrix type","MatSetType",MatList,mtype,(char*)mtype,sizeof(mtype),NULL);
491: PetscStrallocpy(mtype,&thi->mattype);
492: PetscOptionsTruth("-thi_verbose","Enable verbose output (like matrix sizes and statistics)","",thi->verbose,&thi->verbose,NULL);
493: }
494: PetscOptionsEnd();
496: /* dimensionalize */
497: thi->Lx *= units->meter;
498: thi->Ly *= units->meter;
499: thi->Lz *= units->meter;
500: thi->alpha *= PETSC_PI / 180;
502: PRangeClear(&thi->eta);
503: PRangeClear(&thi->beta2);
505: {
506: PetscReal u = 1000*units->meter/(3e7*units->second),
507: gradu = u / (100*units->meter),eta,deta,
508: rho = 910 * units->kilogram/pow(units->meter,3),
509: grav = 9.81 * units->meter/PetscSqr(units->second),
510: driving = rho * grav * tan(thi->alpha) * 1000*units->meter;
511: THIViscosity(thi,0.5*gradu*gradu,&eta,&deta);
512: thi->rhog = rho * grav;
513: if (thi->verbose) {
514: PetscPrintf(((PetscObject)thi)->comm,"Units: meter %8.2g second %8.2g kg %8.2g Pa %8.2g\n",units->meter,units->second,units->kilogram,units->Pascal);
515: PetscPrintf(((PetscObject)thi)->comm,"Domain (%6.2g,%6.2g,%6.2g), pressure %8.2g, driving stress %8.2g\n",thi->Lx,thi->Ly,thi->Lz,rho*grav*1e3*units->meter,driving);
516: PetscPrintf(((PetscObject)thi)->comm,"Large velocity 1km/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",u,gradu,eta,2*eta*gradu,2*eta*gradu/driving);
517: THIViscosity(thi,0.5*PetscSqr(1e-3*gradu),&eta,&deta);
518: PetscPrintf(((PetscObject)thi)->comm,"Small velocity 1m/a %8.2g, velocity gradient %8.2g, eta %8.2g, stress %8.2g, ratio %8.2g\n",1e-3*u,1e-3*gradu,eta,2*eta*1e-3*gradu,2*eta*1e-3*gradu/driving);
519: }
520: }
522: *inthi = thi;
523: return(0);
524: }
528: static PetscErrorCode THIInitializePrm(THI thi,DA da2prm,Vec prm)
529: {
530: PrmNode **p;
531: PetscInt i,j,xs,xm,ys,ym,mx,my;
535: DAGetGhostCorners(da2prm,&ys,&xs,0,&ym,&xm,0);
536: DAGetInfo(da2prm,0, &my,&mx,0, 0,0,0, 0,0,0,0);
537: DAVecGetArray(da2prm,prm,&p);
538: for (i=xs; i<xs+xm; i++) {
539: for (j=ys; j<ys+ym; j++) {
540: PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my;
541: thi->initialize(thi,xx,yy,&p[i][j]);
542: }
543: }
544: DAVecRestoreArray(da2prm,prm,&p);
545: return(0);
546: }
550: static PetscErrorCode THISetDMMG(THI thi,DMMG *dmmg)
551: {
553: PetscInt i;
556: if (DMMGGetLevels(dmmg) != thi->nlevels) SETERRQ(PETSC_ERR_ARG_CORRUPT,"DMMG nlevels does not agree with THI");
557: for (i=0; i<thi->nlevels; i++) {
558: PetscInt Mx,My,Mz,mx,my,s,dim;
559: DAStencilType st;
560: DA da = (DA)dmmg[i]->dm,da2prm;
561: Vec X;
562: DAGetInfo(da,&dim, &Mz,&My,&Mx, 0,&my,&mx, 0,&s,0,&st);
563: if (dim == 2) {
564: DAGetInfo(da,&dim, &My,&Mx,0, &my,&mx,0, 0,&s,0,&st);
565: }
566: DACreate2d(((PetscObject)thi)->comm,DA_XYPERIODIC,st,My,Mx,my,mx,sizeof(PrmNode)/sizeof(PetscScalar),s,0,0,&da2prm);
567: DACreateLocalVector(da2prm,&X);
568: {
569: PetscReal Lx = thi->Lx / thi->units->meter,Ly = thi->Ly / thi->units->meter,Lz = thi->Lz / thi->units->meter;
570: if (dim == 2) {
571: PetscPrintf(((PetscObject)thi)->comm,"Level %d domain size (m) %8.2g x %8.2g, num elements %3d x %3d (%8d), size (m) %g x %g\n",i,Lx,Ly,Mx,My,Mx*My,Lx/Mx,Ly/My);
572: } else {
573: PetscPrintf(((PetscObject)thi)->comm,"Level %d domain size (m) %8.2g x %8.2g x %8.2g, num elements %3d x %3d x %3d (%8d), size (m) %g x %g x %g\n",i,Lx,Ly,Lz,Mx,My,Mz,Mx*My*Mz,Lx/Mx,Ly/My,1000./(Mz-1));
574: }
575: }
576: THIInitializePrm(thi,da2prm,X);
577: PetscObjectCompose((PetscObject)da,"DA2Prm",(PetscObject)da2prm);
578: PetscObjectCompose((PetscObject)da,"DA2Prm_Vec",(PetscObject)X);
579: DADestroy(da2prm);
580: VecDestroy(X);
581: }
582: return(0);
583: }
587: static PetscErrorCode THIDAGetPrm(DA da,PrmNode ***prm)
588: {
590: DA da2prm;
591: Vec X;
594: PetscObjectQuery((PetscObject)da,"DA2Prm",(PetscObject*)&da2prm);
595: if (!da2prm) SETERRQ(PETSC_ERR_ARG_WRONG,"No DA2Prm composed with given DA");
596: PetscObjectQuery((PetscObject)da,"DA2Prm_Vec",(PetscObject*)&X);
597: if (!X) SETERRQ(PETSC_ERR_ARG_WRONG,"No DA2Prm_Vec composed with given DA");
598: DAVecGetArray(da2prm,X,prm);
599: return(0);
600: }
604: static PetscErrorCode THIDARestorePrm(DA da,PrmNode ***prm)
605: {
607: DA da2prm;
608: Vec X;
611: PetscObjectQuery((PetscObject)da,"DA2Prm",(PetscObject*)&da2prm);
612: if (!da2prm) SETERRQ(PETSC_ERR_ARG_WRONG,"No DA2Prm composed with given DA");
613: PetscObjectQuery((PetscObject)da,"DA2Prm_Vec",(PetscObject*)&X);
614: if (!X) SETERRQ(PETSC_ERR_ARG_WRONG,"No DA2Prm_Vec composed with given DA");
615: DAVecRestoreArray(da2prm,X,prm);
616: return(0);
617: }
621: static PetscErrorCode THIInitial(DMMG dmmg,Vec X)
622: {
623: THI thi = (THI)dmmg->user;
624: DA da = (DA)dmmg->dm;
625: PetscInt i,j,k,xs,xm,ys,ym,zs,zm,mx,my;
626: PetscReal hx,hy;
627: PrmNode **prm;
628: Node ***x;
632: DAGetInfo(da,0, 0,&my,&mx, 0,0,0, 0,0,0,0);
633: DAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);
634: DAVecGetArray(da,X,&x);
635: THIDAGetPrm(da,&prm);
636: hx = thi->Lx / mx;
637: hy = thi->Ly / my;
638: for (i=xs; i<xs+xm; i++) {
639: for (j=ys; j<ys+ym; j++) {
640: for (k=zs; k<zs+zm; k++) {
641: const PetscScalar zm1 = zm-1,
642: drivingx = thi->rhog * (prm[i+1][j].b+prm[i+1][j].h - prm[i-1][j].b-prm[i-1][j].h) / (2*hx),
643: drivingy = thi->rhog * (prm[i][j+1].b+prm[i][j+1].h - prm[i][j-1].b-prm[i][j-1].h) / (2*hx);
644: x[i][j][k].u = 0. * drivingx * prm[i][j].h*(PetscScalar)k/zm1;
645: x[i][j][k].v = 0. * drivingy * prm[i][j].h*(PetscScalar)k/zm1;
646: }
647: }
648: }
649: DAVecRestoreArray(da,X,&x);
650: THIDARestorePrm(da,&prm);
651: return(0);
652: }
654: static void PointwiseNonlinearity(THI thi,const Node n[restrict 8],const PetscReal phi[restrict 3],PetscReal dphi[restrict 8][3],PetscScalar *restrict u,PetscScalar *restrict v,PetscScalar du[restrict 3],PetscScalar dv[restrict 3],PetscReal *eta,PetscReal *deta)
655: {
656: PetscInt l,ll;
657: PetscScalar gam;
659: du[0] = du[1] = du[2] = 0;
660: dv[0] = dv[1] = dv[2] = 0;
661: *u = 0;
662: *v = 0;
663: for (l=0; l<8; l++) {
664: *u += phi[l] * n[l].u;
665: *v += phi[l] * n[l].v;
666: for (ll=0; ll<3; ll++) {
667: du[ll] += dphi[l][ll] * n[l].u;
668: dv[ll] += dphi[l][ll] * n[l].v;
669: }
670: }
671: gam = Sqr(du[0]) + Sqr(dv[1]) + du[0]*dv[1] + 0.25*Sqr(du[1]+dv[0]) + 0.25*Sqr(du[2]) + 0.25*Sqr(dv[2]);
672: THIViscosity(thi,PetscRealPart(gam),eta,deta);
673: }
675: static void PointwiseNonlinearity2D(THI thi,Node n[],PetscReal phi[],PetscReal dphi[4][2],PetscScalar *u,PetscScalar *v,PetscScalar du[],PetscScalar dv[],PetscReal *eta,PetscReal *deta)
676: {
677: PetscInt l,ll;
678: PetscScalar gam;
680: du[0] = du[1] = 0;
681: dv[0] = dv[1] = 0;
682: *u = 0;
683: *v = 0;
684: for (l=0; l<4; l++) {
685: *u += phi[l] * n[l].u;
686: *v += phi[l] * n[l].v;
687: for (ll=0; ll<2; ll++) {
688: du[ll] += dphi[l][ll] * n[l].u;
689: dv[ll] += dphi[l][ll] * n[l].v;
690: }
691: }
692: gam = Sqr(du[0]) + Sqr(dv[1]) + du[0]*dv[1] + 0.25*Sqr(du[1]+dv[0]);
693: THIViscosity(thi,PetscRealPart(gam),eta,deta);
694: }
698: static PetscErrorCode THIFunctionLocal(DALocalInfo *info,Node ***x,Node ***f,THI thi)
699: {
700: PetscInt xs,ys,xm,ym,zm,i,j,k,q,l;
701: PetscReal hx,hy,etamin,etamax,beta2min,beta2max;
702: PrmNode **prm;
706: xs = info->zs;
707: ys = info->ys;
708: xm = info->zm;
709: ym = info->ym;
710: zm = info->xm;
711: hx = thi->Lx / info->mz;
712: hy = thi->Ly / info->my;
714: etamin = 1e100;
715: etamax = 0;
716: beta2min = 1e100;
717: beta2max = 0;
719: THIDAGetPrm(info->da,&prm);
721: for (i=xs; i<xs+xm; i++) {
722: for (j=ys; j<ys+ym; j++) {
723: PrmNode pn[4];
724: QuadExtract(prm,i,j,pn);
725: for (k=0; k<zm-1; k++) {
726: PetscInt ls = 0;
727: Node n[8],*fn[8];
728: PetscReal zn[8],etabase = 0;
729: PrmHexGetZ(pn,k,zm,zn);
730: HexExtract(x,i,j,k,n);
731: HexExtractRef(f,i,j,k,fn);
732: if (thi->no_slip && k == 0) {
733: for (l=0; l<4; l++) n[l].u = n[l].v = 0;
734: /* The first 4 basis functions lie on the bottom layer, so their contribution is exactly 0, hence we can skip them */
735: ls = 4;
736: }
737: for (q=0; q<8; q++) {
738: PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta;
739: PetscScalar du[3],dv[3],u,v;
740: HexGrad(HexQDeriv[q],zn,dz);
741: HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
742: PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
743: jw /= thi->rhog; /* scales residuals to be O(1) */
744: if (q == 0) etabase = eta;
745: RangeUpdate(&etamin,&etamax,eta);
746: for (l=ls; l<8; l++) { /* test functions */
747: const PetscReal ds[2] = {-tan(thi->alpha),0};
748: const PetscReal pp=phi[l],*dp = dphi[l];
749: fn[l]->u += dp[0]*jw*eta*(4.*du[0]+2.*dv[1]) + dp[1]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*du[2] + pp*jw*thi->rhog*ds[0];
750: fn[l]->v += dp[1]*jw*eta*(2.*du[0]+4.*dv[1]) + dp[0]*jw*eta*(du[1]+dv[0]) + dp[2]*jw*eta*dv[2] + pp*jw*thi->rhog*ds[1];
751: }
752: }
753: if (k == 0) { /* we are on a bottom face */
754: if (thi->no_slip) {
755: /* Note: Non-Galerkin coarse grid operators are very sensitive to the scaling of Dirichlet boundary
756: * conditions. After shenanigans above, etabase contains the effective viscosity at the closest quadrature
757: * point to the bed. We want the diagonal entry in the Dirichlet condition to have similar magnitude to the
758: * diagonal entry corresponding to the adjacent node. The fundamental scaling of the viscous part is in
759: * diagu, diagv below. This scaling is easy to recognize by considering the finite difference operator after
760: * scaling by element size. The no-slip Dirichlet condition is scaled by this factor, and also in the
761: * assembled matrix (see the similar block in THIJacobianLocal).
762: */
763: const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1.);
764: const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
765: fn[0]->u = thi->dirichlet_scale*diagu*n[0].u;
766: fn[0]->v = thi->dirichlet_scale*diagv*n[0].v;
767: } else { /* Integrate over bottom face to apply boundary condition */
768: for (q=0; q<4; q++) {
769: const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q];
770: PetscScalar u=0,v=0,rbeta2=0;
771: PetscReal beta2,dbeta2;
772: for (l=0; l<4; l++) {
773: u += phi[l]*n[l].u;
774: v += phi[l]*n[l].v;
775: rbeta2 += phi[l]*pn[l].beta2;
776: }
777: THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
778: RangeUpdate(&beta2min,&beta2max,beta2);
779: for (l=0; l<4; l++) {
780: const PetscReal pp = phi[l];
781: fn[ls+l]->u += pp*jw*beta2*u;
782: fn[ls+l]->v += pp*jw*beta2*v;
783: }
784: }
785: }
786: }
787: }
788: }
789: }
791: THIDARestorePrm(info->da,&prm);
793: PRangeMinMax(&thi->eta,etamin,etamax);
794: PRangeMinMax(&thi->beta2,beta2min,beta2max);
795: return(0);
796: }
800: static PetscErrorCode THIMatrixStatistics(THI thi,Mat B,PetscViewer viewer)
801: {
803: PetscReal nrm;
804: PetscInt m;
805: PetscMPIInt rank;
808: MatNorm(B,NORM_FROBENIUS,&nrm);
809: MatGetSize(B,&m,0);
810: MPI_Comm_rank(((PetscObject)B)->comm,&rank);
811: if (!rank) {
812: PetscScalar val0,val2;
813: MatGetValue(B,0,0,&val0);
814: MatGetValue(B,2,2,&val2);
815: PetscViewerASCIIPrintf(viewer,"Matrix dim %8d norm %8.2e, (0,0) %8.2e (2,2) %8.2e, eta [%8.2e,%8.2e] beta2 [%8.2e,%8.2e]\n",m,nrm,PetscRealPart(val0),PetscRealPart(val2),thi->eta.cmin,thi->eta.cmax,thi->beta2.cmin,thi->beta2.cmax);
816: }
817: return(0);
818: }
822: static PetscErrorCode THISurfaceStatistics(DA da,Vec X,PetscReal *min,PetscReal *max,PetscReal *mean)
823: {
825: Node ***x;
826: PetscInt i,j,xs,ys,zs,xm,ym,zm,mx,my,mz;
827: PetscReal umin = 1e100,umax=-1e100;
828: PetscScalar usum=0.0,gusum;
831: *min = *max = *mean = 0;
832: DAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0);
833: DAGetCorners(da,&zs,&ys,&xs,&zm,&ym,&xm);
834: if (zs != 0 || zm != mz) SETERRQ(1,"Unexpected decomposition");
835: DAVecGetArray(da,X,&x);
836: for (i=xs; i<xs+xm; i++) {
837: for (j=ys; j<ys+ym; j++) {
838: PetscReal u = PetscRealPart(x[i][j][zm-1].u);
839: RangeUpdate(&umin,&umax,u);
840: usum += u;
841: }
842: }
843: DAVecRestoreArray(da,X,&x);
844: PetscGlobalMin(&umin,min,((PetscObject)da)->comm);
845: PetscGlobalMax(&umax,max,((PetscObject)da)->comm);
846: PetscGlobalSum(&usum,&gusum,((PetscObject)da)->comm);
847: *mean = PetscRealPart(gusum) / (mx*my);
848: return(0);
849: }
853: static PetscErrorCode THISolveStatistics(THI thi,DMMG *dmmg,PetscInt coarsened,const char name[])
854: {
855: MPI_Comm comm = ((PetscObject)thi)->comm;
856: PetscInt nlevels = DMMGGetLevels(dmmg),level = nlevels-1-coarsened;
857: SNES snes = dmmg[level]->snes;
858: Vec X = dmmg[level]->x;
862: PetscPrintf(comm,"Solution statistics after solve: %s\n",name);
863: {
864: PetscInt its,lits;
865: SNESConvergedReason reason;
866: SNESGetIterationNumber(snes,&its);
867: SNESGetConvergedReason(snes,&reason);
868: SNESGetLinearSolveIterations(snes,&lits);
869: PetscPrintf(comm,"%s: Number of Newton iterations = %d, total linear iterations = %d\n",SNESConvergedReasons[reason],its,lits);
870: }
871: {
872: PetscReal nrm2,min[3]={1e100,1e100,1e100},max[3]={-1e100,-1e100,-1e100};
873: PetscInt i,j,m;
874: PetscScalar *x;
875: VecNorm(X,NORM_2,&nrm2);
876: VecGetLocalSize(X,&m);
877: VecGetArray(X,&x);
878: for (i=0; i<m; i+=2) {
879: PetscReal u = PetscRealPart(x[i]),v = PetscRealPart(x[i+1]),c = sqrt(u*u+v*v);
880: min[0] = PetscMin(u,min[0]);
881: min[1] = PetscMin(v,min[1]);
882: min[2] = PetscMin(c,min[2]);
883: max[0] = PetscMax(u,max[0]);
884: max[1] = PetscMax(v,max[1]);
885: max[2] = PetscMax(c,max[2]);
886: }
887: VecRestoreArray(X,&x);
888: MPI_Allreduce(MPI_IN_PLACE,min,3,MPIU_REAL,MPI_MIN,((PetscObject)thi)->comm);
889: MPI_Allreduce(MPI_IN_PLACE,max,3,MPIU_REAL,MPI_MAX,((PetscObject)thi)->comm);
890: /* Dimensionalize to meters/year */
891: nrm2 *= thi->units->year / thi->units->meter;
892: for (j=0; j<3; j++) {
893: min[j] *= thi->units->year / thi->units->meter;
894: max[j] *= thi->units->year / thi->units->meter;
895: }
896: PetscPrintf(comm,"|X|_2 %g u in [%g, %g] v in [%g, %g] c in [%g, %g] \n",nrm2,min[0],max[0],min[1],max[1],min[2],max[2]);
897: {
898: PetscReal umin,umax,umean;
899: THISurfaceStatistics((DA)dmmg[level]->dm,X,&umin,&umax,&umean);
900: umin *= thi->units->year / thi->units->meter;
901: umax *= thi->units->year / thi->units->meter;
902: umean *= thi->units->year / thi->units->meter;
903: PetscPrintf(comm,"Surface statistics: u in [%12.6e, %12.6e] mean %12.6e\n",umin,umax,umean);
904: }
905: /* These values stay nondimensional */
906: PetscPrintf(comm,"Global eta range [%g, %g], converged range [%g, %g]\n",thi->eta.min,thi->eta.max,thi->eta.cmin,thi->eta.cmax);
907: PetscPrintf(comm,"Global beta2 range [%g, %g], converged range [%g, %g]\n",thi->beta2.min,thi->beta2.max,thi->beta2.cmin,thi->beta2.cmax);
908: }
909: PetscPrintf(comm,"\n");
910: return(0);
911: }
915: static PetscErrorCode THIJacobianLocal_2D(DALocalInfo *info,Node **x,Mat B,THI thi)
916: {
917: PetscInt xs,ys,xm,ym,i,j,q,l,ll;
918: PetscReal hx,hy;
919: PrmNode **prm;
923: xs = info->ys;
924: ys = info->xs;
925: xm = info->ym;
926: ym = info->xm;
927: hx = thi->Lx / info->my;
928: hy = thi->Ly / info->mx;
930: MatZeroEntries(B);
931: THIDAGetPrm(info->da,&prm);
933: for (i=xs; i<xs+xm; i++) {
934: for (j=ys; j<ys+ym; j++) {
935: Node n[4];
936: PrmNode pn[4];
937: PetscScalar Ke[4*2][4*2];
938: QuadExtract(prm,i,j,pn);
939: QuadExtract(x,i,j,n);
940: PetscMemzero(Ke,sizeof(Ke));
941: for (q=0; q<4; q++) {
942: PetscReal phi[4],dphi[4][2],jw,eta,deta,beta2,dbeta2;
943: PetscScalar u,v,du[2],dv[2],h = 0,rbeta2 = 0;
944: for (l=0; l<4; l++) {
945: phi[l] = QuadQInterp[q][l];
946: dphi[l][0] = QuadQDeriv[q][l][0]*2./hx;
947: dphi[l][1] = QuadQDeriv[q][l][1]*2./hy;
948: h += phi[l] * pn[l].h;
949: rbeta2 += phi[l] * pn[l].beta2;
950: }
951: jw = 0.25*hx*hy / thi->rhog; /* rhog is only scaling */
952: PointwiseNonlinearity2D(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
953: THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
954: for (l=0; l<4; l++) {
955: const PetscReal pp = phi[l],*dp = dphi[l];
956: for (ll=0; ll<4; ll++) {
957: const PetscReal ppl = phi[ll],*dpl = dphi[ll];
958: PetscScalar dgdu,dgdv;
959: dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1];
960: dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0];
961: /* Picard part */
962: Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale;
963: Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
964: Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
965: Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + pp*jw*(beta2/h)*ppl*thi->ssa_friction_scale;
966: /* extra Newton terms */
967: Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*u*ppl*thi->ssa_friction_scale;
968: Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*u*v*ppl*thi->ssa_friction_scale;
969: Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*u*ppl*thi->ssa_friction_scale;
970: Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + pp*jw*(dbeta2/h)*v*v*ppl*thi->ssa_friction_scale;
971: }
972: }
973: }
974: {
975: const MatStencil rc[4] = {{0,i,j,0},{0,i+1,j,0},{0,i+1,j+1,0},{0,i,j+1,0}};
976: MatSetValuesBlockedStencil(B,4,rc,4,rc,&Ke[0][0],ADD_VALUES);
977: }
978: }
979: }
980: THIDARestorePrm(info->da,&prm);
982: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
983: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
984: MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
985: if (thi->verbose) {THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);}
986: return(0);
987: }
991: static PetscErrorCode THIJacobianLocal_3D(DALocalInfo *info,Node ***x,Mat B,THI thi,THIAssemblyMode amode)
992: {
993: PetscInt xs,ys,xm,ym,zm,i,j,k,q,l,ll;
994: PetscReal hx,hy;
995: PrmNode **prm;
999: xs = info->zs;
1000: ys = info->ys;
1001: xm = info->zm;
1002: ym = info->ym;
1003: zm = info->xm;
1004: hx = thi->Lx / info->mz;
1005: hy = thi->Ly / info->my;
1007: MatZeroEntries(B);
1008: THIDAGetPrm(info->da,&prm);
1010: for (i=xs; i<xs+xm; i++) {
1011: for (j=ys; j<ys+ym; j++) {
1012: PrmNode pn[4];
1013: QuadExtract(prm,i,j,pn);
1014: for (k=0; k<zm-1; k++) {
1015: Node n[8];
1016: PetscReal zn[8],etabase = 0;
1017: PetscScalar Ke[8*2][8*2];
1018: PetscInt ls = 0;
1020: PrmHexGetZ(pn,k,zm,zn);
1021: HexExtract(x,i,j,k,n);
1022: PetscMemzero(Ke,sizeof(Ke));
1023: if (thi->no_slip && k == 0) {
1024: for (l=0; l<4; l++) n[l].u = n[l].v = 0;
1025: ls = 4;
1026: }
1027: for (q=0; q<8; q++) {
1028: PetscReal dz[3],phi[8],dphi[8][3],jw,eta,deta;
1029: PetscScalar du[3],dv[3],u,v;
1030: HexGrad(HexQDeriv[q],zn,dz);
1031: HexComputeGeometry(q,hx,hy,dz,phi,dphi,&jw);
1032: PointwiseNonlinearity(thi,n,phi,dphi,&u,&v,du,dv,&eta,&deta);
1033: jw /= thi->rhog; /* residuals are scaled by this factor */
1034: if (q == 0) etabase = eta;
1035: for (l=ls; l<8; l++) { /* test functions */
1036: const PetscReal *restrict dp = dphi[l];
1037: #if USE_SSE2_KERNELS
1038: /* gcc (up to my 4.5 snapshot) is really bad at hoisting intrinsics so we do it manually */
1039: __m128d
1040: p4 = _mm_set1_pd(4),p2 = _mm_set1_pd(2),p05 = _mm_set1_pd(0.5),
1041: p42 = _mm_setr_pd(4,2),p24 = _mm_shuffle_pd(p42,p42,_MM_SHUFFLE2(0,1)),
1042: du0 = _mm_set1_pd(du[0]),du1 = _mm_set1_pd(du[1]),du2 = _mm_set1_pd(du[2]),
1043: dv0 = _mm_set1_pd(dv[0]),dv1 = _mm_set1_pd(dv[1]),dv2 = _mm_set1_pd(dv[2]),
1044: jweta = _mm_set1_pd(jw*eta),jwdeta = _mm_set1_pd(jw*deta),
1045: dp0 = _mm_set1_pd(dp[0]),dp1 = _mm_set1_pd(dp[1]),dp2 = _mm_set1_pd(dp[2]),
1046: dp0jweta = _mm_mul_pd(dp0,jweta),dp1jweta = _mm_mul_pd(dp1,jweta),dp2jweta = _mm_mul_pd(dp2,jweta),
1047: p4du0p2dv1 = _mm_add_pd(_mm_mul_pd(p4,du0),_mm_mul_pd(p2,dv1)), /* 4 du0 + 2 dv1 */
1048: p4dv1p2du0 = _mm_add_pd(_mm_mul_pd(p4,dv1),_mm_mul_pd(p2,du0)), /* 4 dv1 + 2 du0 */
1049: pdu2dv2 = _mm_unpacklo_pd(du2,dv2), /* [du2, dv2] */
1050: du1pdv0 = _mm_add_pd(du1,dv0), /* du1 + dv0 */
1051: t1 = _mm_mul_pd(dp0,p4du0p2dv1), /* dp0 (4 du0 + 2 dv1) */
1052: t2 = _mm_mul_pd(dp1,p4dv1p2du0); /* dp1 (4 dv1 + 2 du0) */
1054: #endif
1055: #if defined COMPUTE_LOWER_TRIANGULAR /* The element matrices are always symmetric so computing the lower-triangular part is not necessary */
1056: for (ll=ls; ll<8; ll++) { /* trial functions */
1057: #else
1058: for (ll=l; ll<8; ll++) {
1059: #endif
1060: const PetscReal *restrict dpl = dphi[ll];
1061: if (amode == THIASSEMBLY_TRIDIAGONAL && (l-ll)%4) continue; /* these entries would not be inserted */
1062: #if !USE_SSE2_KERNELS
1063: /* The analytic Jacobian in nice, easy-to-read form */
1064: {
1065: PetscScalar dgdu,dgdv;
1066: dgdu = 2.*du[0]*dpl[0] + dv[1]*dpl[0] + 0.5*(du[1]+dv[0])*dpl[1] + 0.5*du[2]*dpl[2];
1067: dgdv = 2.*dv[1]*dpl[1] + du[0]*dpl[1] + 0.5*(du[1]+dv[0])*dpl[0] + 0.5*dv[2]*dpl[2];
1068: /* Picard part */
1069: Ke[l*2+0][ll*2+0] += dp[0]*jw*eta*4.*dpl[0] + dp[1]*jw*eta*dpl[1] + dp[2]*jw*eta*dpl[2];
1070: Ke[l*2+0][ll*2+1] += dp[0]*jw*eta*2.*dpl[1] + dp[1]*jw*eta*dpl[0];
1071: Ke[l*2+1][ll*2+0] += dp[1]*jw*eta*2.*dpl[0] + dp[0]*jw*eta*dpl[1];
1072: Ke[l*2+1][ll*2+1] += dp[1]*jw*eta*4.*dpl[1] + dp[0]*jw*eta*dpl[0] + dp[2]*jw*eta*dpl[2];
1073: /* extra Newton terms */
1074: Ke[l*2+0][ll*2+0] += dp[0]*jw*deta*dgdu*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*du[2];
1075: Ke[l*2+0][ll*2+1] += dp[0]*jw*deta*dgdv*(4.*du[0]+2.*dv[1]) + dp[1]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*du[2];
1076: Ke[l*2+1][ll*2+0] += dp[1]*jw*deta*dgdu*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdu*(du[1]+dv[0]) + dp[2]*jw*deta*dgdu*dv[2];
1077: Ke[l*2+1][ll*2+1] += dp[1]*jw*deta*dgdv*(4.*dv[1]+2.*du[0]) + dp[0]*jw*deta*dgdv*(du[1]+dv[0]) + dp[2]*jw*deta*dgdv*dv[2];
1078: }
1079: #else
1080: /* This SSE2 code is an exact replica of above, but uses explicit packed instructions for some speed
1081: * benefit. On my hardware, these intrinsics are almost twice as fast as above, reducing total assembly cost
1082: * by 25 to 30 percent. */
1083: {
1084: __m128d
1085: keu = _mm_loadu_pd(&Ke[l*2+0][ll*2+0]),
1086: kev = _mm_loadu_pd(&Ke[l*2+1][ll*2+0]),
1087: dpl01 = _mm_loadu_pd(&dpl[0]),dpl10 = _mm_shuffle_pd(dpl01,dpl01,_MM_SHUFFLE2(0,1)),dpl2 = _mm_set_sd(dpl[2]),
1088: t0,t3,pdgduv;
1089: keu = _mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp0jweta,p42),dpl01),
1090: _mm_add_pd(_mm_mul_pd(dp1jweta,dpl10),
1091: _mm_mul_pd(dp2jweta,dpl2))));
1092: kev = _mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(_mm_mul_pd(dp1jweta,p24),dpl01),
1093: _mm_add_pd(_mm_mul_pd(dp0jweta,dpl10),
1094: _mm_mul_pd(dp2jweta,_mm_shuffle_pd(dpl2,dpl2,_MM_SHUFFLE2(0,1))))));
1095: pdgduv = _mm_mul_pd(p05,_mm_add_pd(_mm_add_pd(_mm_mul_pd(p42,_mm_mul_pd(du0,dpl01)),
1096: _mm_mul_pd(p24,_mm_mul_pd(dv1,dpl01))),
1097: _mm_add_pd(_mm_mul_pd(du1pdv0,dpl10),
1098: _mm_mul_pd(pdu2dv2,_mm_set1_pd(dpl[2]))))); /* [dgdu, dgdv] */
1099: t0 = _mm_mul_pd(jwdeta,pdgduv); /* jw deta [dgdu, dgdv] */
1100: t3 = _mm_mul_pd(t0,du1pdv0); /* t0 (du1 + dv0) */
1101: _mm_storeu_pd(&Ke[l*2+0][ll*2+0],_mm_add_pd(keu,_mm_add_pd(_mm_mul_pd(t1,t0),
1102: _mm_add_pd(_mm_mul_pd(dp1,t3),
1103: _mm_mul_pd(t0,_mm_mul_pd(dp2,du2))))));
1104: _mm_storeu_pd(&Ke[l*2+1][ll*2+0],_mm_add_pd(kev,_mm_add_pd(_mm_mul_pd(t2,t0),
1105: _mm_add_pd(_mm_mul_pd(dp0,t3),
1106: _mm_mul_pd(t0,_mm_mul_pd(dp2,dv2))))));
1107: }
1108: #endif
1109: }
1110: }
1111: }
1112: if (k == 0) { /* on a bottom face */
1113: if (thi->no_slip) {
1114: const PetscReal hz = PetscRealPart(pn[0].h)/(zm-1);
1115: const PetscScalar diagu = 2*etabase/thi->rhog*(hx*hy/hz + hx*hz/hy + 4*hy*hz/hx),diagv = 2*etabase/thi->rhog*(hx*hy/hz + 4*hx*hz/hy + hy*hz/hx);
1116: Ke[0][0] = thi->dirichlet_scale*diagu;
1117: Ke[1][1] = thi->dirichlet_scale*diagv;
1118: } else {
1119: for (q=0; q<4; q++) {
1120: const PetscReal jw = 0.25*hx*hy/thi->rhog,*phi = QuadQInterp[q];
1121: PetscScalar u=0,v=0,rbeta2=0;
1122: PetscReal beta2,dbeta2;
1123: for (l=0; l<4; l++) {
1124: u += phi[l]*n[l].u;
1125: v += phi[l]*n[l].v;
1126: rbeta2 += phi[l]*pn[l].beta2;
1127: }
1128: THIFriction(thi,PetscRealPart(rbeta2),PetscRealPart(u*u+v*v)/2,&beta2,&dbeta2);
1129: for (l=0; l<4; l++) {
1130: const PetscReal pp = phi[l];
1131: for (ll=0; ll<4; ll++) {
1132: const PetscReal ppl = phi[ll];
1133: Ke[l*2+0][ll*2+0] += pp*jw*beta2*ppl + pp*jw*dbeta2*u*u*ppl;
1134: Ke[l*2+0][ll*2+1] += pp*jw*dbeta2*u*v*ppl;
1135: Ke[l*2+1][ll*2+0] += pp*jw*dbeta2*v*u*ppl;
1136: Ke[l*2+1][ll*2+1] += pp*jw*beta2*ppl + pp*jw*dbeta2*v*v*ppl;
1137: }
1138: }
1139: }
1140: }
1141: }
1142: {
1143: const MatStencil rc[8] = {{i,j,k,0},{i+1,j,k,0},{i+1,j+1,k,0},{i,j+1,k,0},{i,j,k+1,0},{i+1,j,k+1,0},{i+1,j+1,k+1,0},{i,j+1,k+1,0}};
1144: if (amode == THIASSEMBLY_TRIDIAGONAL) {
1145: for (l=0; l<4; l++) { /* Copy out each of the blocks, discarding horizontal coupling */
1146: const PetscInt l4 = l+4;
1147: const MatStencil rcl[2] = {{rc[l].k,rc[l].j,rc[l].i,0},{rc[l4].k,rc[l4].j,rc[l4].i,0}};
1148: #if defined COMPUTE_LOWER_TRIANGULAR
1149: const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]},
1150: {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]},
1151: {Ke[2*l4+0][2*l+0],Ke[2*l4+0][2*l+1],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]},
1152: {Ke[2*l4+1][2*l+0],Ke[2*l4+1][2*l+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}};
1153: #else
1154: /* Same as above except for the lower-left block */
1155: const PetscScalar Kel[4][4] = {{Ke[2*l+0][2*l+0] ,Ke[2*l+0][2*l+1] ,Ke[2*l+0][2*l4+0] ,Ke[2*l+0][2*l4+1]},
1156: {Ke[2*l+1][2*l+0] ,Ke[2*l+1][2*l+1] ,Ke[2*l+1][2*l4+0] ,Ke[2*l+1][2*l4+1]},
1157: {Ke[2*l+0][2*l4+0],Ke[2*l+1][2*l4+0],Ke[2*l4+0][2*l4+0],Ke[2*l4+0][2*l4+1]},
1158: {Ke[2*l+0][2*l4+1],Ke[2*l+1][2*l4+1],Ke[2*l4+1][2*l4+0],Ke[2*l4+1][2*l4+1]}};
1159: #endif
1160: MatSetValuesBlockedStencil(B,2,rcl,2,rcl,&Kel[0][0],ADD_VALUES);
1161: }
1162: } else {
1163: #if !defined COMPUTE_LOWER_TRIANGULAR /* fill in lower-triangular part, this is really cheap compared to computing the entries */
1164: for (l=0; l<8; l++) {
1165: for (ll=l+1; ll<8; ll++) {
1166: Ke[ll*2+0][l*2+0] = Ke[l*2+0][ll*2+0];
1167: Ke[ll*2+1][l*2+0] = Ke[l*2+0][ll*2+1];
1168: Ke[ll*2+0][l*2+1] = Ke[l*2+1][ll*2+0];
1169: Ke[ll*2+1][l*2+1] = Ke[l*2+1][ll*2+1];
1170: }
1171: }
1172: #endif
1173: MatSetValuesBlockedStencil(B,8,rc,8,rc,&Ke[0][0],ADD_VALUES);
1174: }
1175: }
1176: }
1177: }
1178: }
1179: THIDARestorePrm(info->da,&prm);
1181: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1182: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1183: MatSetOption(B,MAT_SYMMETRIC,PETSC_TRUE);
1184: if (thi->verbose) {THIMatrixStatistics(thi,B,PETSC_VIEWER_STDOUT_WORLD);}
1185: return(0);
1186: }
1190: static PetscErrorCode THIJacobianLocal_3D_Full(DALocalInfo *info,Node ***x,Mat B,THI thi)
1191: {
1195: THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_FULL);
1196: return(0);
1197: }
1201: static PetscErrorCode THIJacobianLocal_3D_Tridiagonal(DALocalInfo *info,Node ***x,Mat B,THI thi)
1202: {
1206: THIJacobianLocal_3D(info,x,B,thi,THIASSEMBLY_TRIDIAGONAL);
1207: return(0);
1208: }
1212: static PetscErrorCode DARefineHierarchy_THI(DA dac0,PetscInt nlevels,DA hierarchy[])
1213: {
1215: THI thi;
1216: PetscInt dim,M,N,m,n,s,dof;
1217: DA dac,daf;
1218: DAStencilType st;
1221: PetscObjectQuery((PetscObject)dac0,"THI",(PetscObject*)&thi);
1222: if (!thi) SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot refine this DA, missing composed THI instance");
1223: if (nlevels > 1) {
1224: DARefineHierarchy(dac0,nlevels-1,hierarchy);
1225: dac = hierarchy[nlevels-2];
1226: } else {
1227: dac = dac0;
1228: }
1229: DAGetInfo(dac,&dim, &N,&M,0, &n,&m,0, &dof,&s,0,&st);
1230: if (dim != 2) SETERRQ(PETSC_ERR_ARG_WRONG,"This function can only refine 2D DAs");
1231: /* Creates a 3D DA with the same map-plane layout as the 2D one, with contiguous columns */
1232: DACreate3d(((PetscObject)dac)->comm,DA_YZPERIODIC,st,thi->zlevels,N,M,1,n,m,dof,s,NULL,NULL,NULL,&daf);
1233: daf->ops->getmatrix = dac->ops->getmatrix;
1234: daf->ops->getinterpolation = dac->ops->getinterpolation;
1235: daf->ops->getcoloring = dac->ops->getcoloring;
1236: daf->interptype = dac->interptype;
1238: DASetFieldName(daf,0,"x-velocity");
1239: DASetFieldName(daf,1,"y-velocity");
1240: hierarchy[nlevels-1] = daf;
1241: return(0);
1242: }
1246: static PetscErrorCode DAGetInterpolation_THI(DA dac,DA daf,Mat *A,Vec *scale)
1247: {
1249: PetscTruth flg,isda2;
1256: PetscTypeCompare((PetscObject)dac,DA2D,&flg);
1257: if (!flg) SETERRQ(PETSC_ERR_ARG_WRONG,"Expected coarse DA to be 2D");
1258: PetscTypeCompare((PetscObject)daf,DA2D,&isda2);
1259: if (isda2) {
1260: /* We are in the 2D problem and use normal DA interpolation */
1261: DAGetInterpolation(dac,daf,A,scale);
1262: } else {
1263: PetscInt i,j,k,xs,ys,zs,xm,ym,zm,mx,my,mz,rstart,cstart;
1264: Mat B;
1266: DAGetInfo(daf,0, &mz,&my,&mx, 0,0,0, 0,0,0,0);
1267: DAGetCorners(daf,&zs,&ys,&xs,&zm,&ym,&xm);
1268: if (zs != 0) SETERRQ(1,"unexpected");
1269: MatCreate(((PetscObject)daf)->comm,&B);
1270: MatSetSizes(B,xm*ym*zm,xm*ym,mx*my*mz,mx*my);
1271:
1272: MatSetType(B,MATAIJ);
1273: MatSeqAIJSetPreallocation(B,1,NULL);
1274: MatMPIAIJSetPreallocation(B,1,NULL,0,NULL);
1275: MatGetOwnershipRange(B,&rstart,NULL);
1276: MatGetOwnershipRangeColumn(B,&cstart,NULL);
1277: for (i=xs; i<xs+xm; i++) {
1278: for (j=ys; j<ys+ym; j++) {
1279: for (k=zs; k<zs+zm; k++) {
1280: PetscInt i2 = i*ym+j,i3 = i2*zm+k;
1281: PetscScalar val = ((k == 0 || k == mz-1) ? 0.5 : 1.) / (mz-1.); /* Integration using trapezoid rule */
1282: MatSetValue(B,cstart+i3,rstart+i2,val,INSERT_VALUES);
1283: }
1284: }
1285: }
1286: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1287: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1288: MatCreateMAIJ(B,sizeof(Node)/sizeof(PetscScalar),A);
1289: MatDestroy(B);
1290: }
1291: return(0);
1292: }
1296: static PetscErrorCode DAGetMatrix_THI_Tridiagonal(DA da,const MatType mtype,Mat *J)
1297: {
1299: Mat A;
1300: PetscInt xm,ym,zm,dim,dof = 2,starts[3],dims[3];
1301: ISLocalToGlobalMapping ltog,ltogb;
1304: DAGetInfo(da,&dim, 0,0,0, 0,0,0, 0,0,0,0);
1305: if (dim != 3) SETERRQ(PETSC_ERR_ARG_WRONG,"Expected DA to be 3D");
1306: DAGetCorners(da,0,0,0,&zm,&ym,&xm);
1307: DAGetISLocalToGlobalMapping(da,<og);
1308: DAGetISLocalToGlobalMappingBlck(da,<ogb);
1309: MatCreate(((PetscObject)da)->comm,&A);
1310: MatSetSizes(A,dof*xm*ym*zm,dof*xm*ym*zm,PETSC_DETERMINE,PETSC_DETERMINE);
1311: MatSetType(A,mtype);
1312: MatSetFromOptions(A);
1313: MatSeqAIJSetPreallocation(A,6,PETSC_NULL);
1314: MatMPIAIJSetPreallocation(A,6,PETSC_NULL,0,PETSC_NULL);
1315: MatSeqBAIJSetPreallocation(A,dof,3,PETSC_NULL);
1316: MatMPIBAIJSetPreallocation(A,dof,3,PETSC_NULL,0,PETSC_NULL);
1317: MatSeqSBAIJSetPreallocation(A,dof,2,PETSC_NULL);
1318: MatMPISBAIJSetPreallocation(A,dof,2,PETSC_NULL,0,PETSC_NULL);
1319: MatSetBlockSize(A,dof);
1320: MatSetLocalToGlobalMapping(A,ltog);
1321: MatSetLocalToGlobalMappingBlock(A,ltogb);
1322: DAGetGhostCorners(da,&starts[0],&starts[1],&starts[2],&dims[0],&dims[1],&dims[2]);
1323: MatSetStencil(A,dim,dims,starts,dof);
1324: *J = A;
1325: return(0);
1326: }
1330: static PetscErrorCode THIDAVecView_VTK_XML(THI thi,DA da,Vec X,const char filename[])
1331: {
1332: const PetscInt dof = 2;
1333: Units units = thi->units;
1334: MPI_Comm comm;
1336: PetscViewer viewer;
1337: PetscMPIInt rank,size,tag,nn,nmax;
1338: PetscInt mx,my,mz,r,range[6];
1339: PetscScalar *x;
1342: comm = ((PetscObject)thi)->comm;
1343: DAGetInfo(da,0, &mz,&my,&mx, 0,0,0, 0,0,0,0);
1344: MPI_Comm_size(comm,&size);
1345: MPI_Comm_rank(comm,&rank);
1346: PetscViewerASCIIOpen(comm,filename,&viewer);
1347: PetscViewerASCIIPrintf(viewer,"<VTKFile type=\"StructuredGrid\" version=\"0.1\" byte_order=\"LittleEndian\">\n");
1348: PetscViewerASCIIPrintf(viewer," <StructuredGrid WholeExtent=\"%d %d %d %d %d %d\">\n",0,mz-1,0,my-1,0,mx-1);
1350: DAGetCorners(da,range,range+1,range+2,range+3,range+4,range+5);
1351: nn = PetscMPIIntCast(range[3]*range[4]*range[5]*dof);
1352: MPI_Reduce(&nn,&nmax,1,MPI_INT,MPI_MAX,0,comm);
1353: tag = ((PetscObject) viewer)->tag;
1354: VecGetArray(X,&x);
1355: if (!rank) {
1356: PetscScalar *array;
1357: PetscMalloc(nmax*sizeof(PetscScalar),&array);
1358: for (r=0; r<size; r++) {
1359: PetscInt i,j,k,xs,xm,ys,ym,zs,zm;
1360: PetscScalar *ptr;
1361: MPI_Status status;
1362: if (r) {
1363: MPI_Recv(range,6,MPIU_INT,r,tag,comm,MPI_STATUS_IGNORE);
1364: }
1365: zs = range[0];ys = range[1];xs = range[2];zm = range[3];ym = range[4];xm = range[5];
1366: if (xm*ym*zm*dof > nmax) SETERRQ(1,"should not happen");
1367: if (r) {
1368: MPI_Recv(array,nmax,MPIU_SCALAR,r,tag,comm,&status);
1369: MPI_Get_count(&status,MPIU_SCALAR,&nn);
1370: if (nn != xm*ym*zm*dof) SETERRQ(1,"should not happen");
1371: ptr = array;
1372: } else ptr = x;
1373: PetscViewerASCIIPrintf(viewer," <Piece Extent=\"%d %d %d %d %d %d\">\n",zs,zs+zm-1,ys,ys+ym-1,xs,xs+xm-1);
1375: PetscViewerASCIIPrintf(viewer," <Points>\n");
1376: PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1377: for (i=xs; i<xs+xm; i++) {
1378: for (j=ys; j<ys+ym; j++) {
1379: for (k=zs; k<zs+zm; k++) {
1380: PrmNode p;
1381: PetscReal xx = thi->Lx*i/mx,yy = thi->Ly*j/my,zz;
1382: thi->initialize(thi,xx,yy,&p);
1383: zz = PetscRealPart(p.b) + PetscRealPart(p.h)*k/(mz-1);
1384: PetscViewerASCIIPrintf(viewer,"%f %f %f\n",xx,yy,zz);
1385: }
1386: }
1387: }
1388: PetscViewerASCIIPrintf(viewer," </DataArray>\n");
1389: PetscViewerASCIIPrintf(viewer," </Points>\n");
1391: PetscViewerASCIIPrintf(viewer," <PointData>\n");
1392: PetscViewerASCIIPrintf(viewer," <DataArray type=\"Float32\" Name=\"velocity\" NumberOfComponents=\"3\" format=\"ascii\">\n");
1393: for (i=0; i<nn; i+=dof) {
1394: PetscViewerASCIIPrintf(viewer,"%f %f %f\n",PetscRealPart(ptr[i])*units->year/units->meter,PetscRealPart(ptr[i+1])*units->year/units->meter,0.0);
1395: }
1396: PetscViewerASCIIPrintf(viewer," </DataArray>\n");
1398: PetscViewerASCIIPrintf(viewer," <DataArray type=\"Int32\" Name=\"rank\" NumberOfComponents=\"1\" format=\"ascii\">\n");
1399: for (i=0; i<nn; i+=dof) {
1400: PetscViewerASCIIPrintf(viewer,"%d\n",r);
1401: }
1402: PetscViewerASCIIPrintf(viewer," </DataArray>\n");
1403: PetscViewerASCIIPrintf(viewer," </PointData>\n");
1405: PetscViewerASCIIPrintf(viewer," </Piece>\n");
1406: }
1407: PetscFree(array);
1408: } else {
1409: MPI_Send(range,6,MPIU_INT,0,tag,comm);
1410: MPI_Send(x,nn,MPIU_SCALAR,0,tag,comm);
1411: }
1412: VecRestoreArray(X,&x);
1413: PetscViewerASCIIPrintf(viewer," </StructuredGrid>\n");
1414: PetscViewerASCIIPrintf(viewer,"</VTKFile>\n");
1415: PetscViewerDestroy(viewer);
1416: return(0);
1417: }
1421: int main(int argc,char *argv[])
1422: {
1423: MPI_Comm comm;
1424: DMMG *dmmg;
1425: THI thi;
1426: PetscInt i;
1428: PetscLogStage stages[3];
1429: PetscTruth repeat_fine_solve = PETSC_FALSE;
1431: PetscInitialize(&argc,&argv,0,help);
1432: comm = PETSC_COMM_WORLD;
1434: /* We define two stages. The first includes all setup costs and solves from a naive initial guess. The second solve
1435: * is more indicative of what might occur during time-stepping. The initial guess is interpolated from the next
1436: * coarser (as in the last step of grid sequencing), and so requires fewer Newton steps. */
1437: PetscOptionsGetTruth(NULL,"-repeat_fine_solve",&repeat_fine_solve,NULL);
1438: PetscLogStageRegister("Full solve",&stages[0]);
1439: if (repeat_fine_solve) {
1440: PetscLogStageRegister("Fine-1 solve",&stages[1]);
1441: PetscLogStageRegister("Fine-only solve",&stages[2]);
1442: }
1444: PetscLogStagePush(stages[0]);
1446: THICreate(comm,&thi);
1447: DMMGCreate(PETSC_COMM_WORLD,thi->nlevels,thi,&dmmg);
1448: {
1449: DA da;
1450: PetscInt M = 3,N = 3,P = 2;
1451: PetscOptionsBegin(comm,NULL,"Grid resolution options","");
1452: {
1453: PetscOptionsInt("-M","Number of elements in x-direction on coarse level","",M,&M,NULL);
1454: N = M;
1455: PetscOptionsInt("-N","Number of elements in y-direction on coarse level (if different from M)","",N,&N,NULL);
1456: if (thi->coarse2d) {
1457: PetscOptionsInt("-zlevels","Number of elements in z-direction on fine level","",thi->zlevels,&thi->zlevels,NULL);
1458: } else {
1459: PetscOptionsInt("-P","Number of elements in z-direction on coarse level","",P,&P,NULL);
1460: }
1461: }
1462: PetscOptionsEnd();
1463: if (thi->coarse2d) {
1464: DACreate2d(comm,DA_XYPERIODIC,DA_STENCIL_BOX,N,M,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,&da);
1465: da->ops->refinehierarchy = DARefineHierarchy_THI;
1466: da->ops->getinterpolation = DAGetInterpolation_THI;
1467: PetscObjectCompose((PetscObject)da,"THI",(PetscObject)thi);
1468: } else {
1469: DACreate3d(comm,DA_YZPERIODIC,DA_STENCIL_BOX,P,N,M,1,PETSC_DETERMINE,PETSC_DETERMINE,sizeof(Node)/sizeof(PetscScalar),1,0,0,0,&da);
1470: }
1471: DASetFieldName(da,0,"x-velocity");
1472: DASetFieldName(da,1,"y-velocity");
1473: DMMGSetDM(dmmg,(DM)da);
1474: DADestroy(da);
1475: }
1476: if (thi->tridiagonal) {
1477: (DMMGGetDA(dmmg))->ops->getmatrix = DAGetMatrix_THI_Tridiagonal;
1478: }
1479: {
1480: /* Use the user-defined matrix type on all but the coarse level */
1481: DMMGSetMatType(dmmg,thi->mattype);
1482: /* PCREDUNDANT only works with AIJ, and so do the third-party direct solvers. So when running in parallel, we can't
1483: * use the faster (S)BAIJ formats on the coarse level. */
1484: PetscFree(dmmg[0]->mtype);
1485: PetscStrallocpy(MATAIJ,&dmmg[0]->mtype);
1486: }
1487: PetscOptionsSetValue("-dmmg_form_function_ghost","1"); /* Spectacularly ugly API, our function evaluation provides ghost values */
1488: DMMGSetSNESLocal(dmmg,THIFunctionLocal,THIJacobianLocal_3D_Full,0,0);
1489: if (thi->tridiagonal) {
1490: DASetLocalJacobian(DMMGGetDA(dmmg),(DALocalFunction1)THIJacobianLocal_3D_Tridiagonal);
1491: }
1492: if (thi->coarse2d) {
1493: for (i=0; i<DMMGGetLevels(dmmg)-1; i++) {
1494: DASetLocalJacobian((DA)dmmg[i]->dm,(DALocalFunction1)THIJacobianLocal_2D);
1495: }
1496: }
1497: for (i=0; i<DMMGGetLevels(dmmg); i++) {
1498: /* This option is only valid for the SBAIJ format. The matrices we assemble are symmetric, but the SBAIJ assembly
1499: * functions will complain if we provide lower-triangular entries without setting this option. */
1500: Mat B = dmmg[i]->B;
1501: PetscTruth flg1,flg2;
1502: PetscTypeCompare((PetscObject)B,MATSEQSBAIJ,&flg1);
1503: PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);
1504: if (flg1 || flg2) {
1505: MatSetOption(B,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);
1506: }
1507: }
1508: MatSetOptionsPrefix(DMMGGetB(dmmg),"thi_");
1509: DMMGSetFromOptions(dmmg);
1510: THISetDMMG(thi,dmmg);
1512: DMMGSetInitialGuess(dmmg,THIInitial);
1513: DMMGSolve(dmmg);
1515: PetscLogStagePop();
1516: THISolveStatistics(thi,dmmg,0,"Full");
1517: /* The first solve is complete */
1519: if (repeat_fine_solve && DMMGGetLevels(dmmg) > 1) {
1520: PetscInt nlevels = DMMGGetLevels(dmmg);
1521: DMMG dmmgc = dmmg[nlevels-2],dmmgf = dmmg[nlevels-1];
1522: Vec Xc = dmmgc->x,Xf = dmmgf->x;
1523: MatRestrict(dmmgf->R,Xf,Xc);
1524: VecPointwiseMult(Xc,Xc,dmmgf->Rscale);
1526: /* Solve on the level with one coarsening, this is a more stringent test of latency */
1527: PetscLogStagePush(stages[1]);
1528: (*dmmgc->solve)(dmmg,nlevels-2);
1529: PetscLogStagePop();
1530: THISolveStatistics(thi,dmmg,1,"Fine-1");
1532: MatInterpolate(dmmgf->R,Xc,Xf);
1534: /* Solve again on the finest level, this is representative of what is needed in a time-stepping code */
1535: PetscLogStagePush(stages[2]);
1536: (*dmmgf->solve)(dmmg,nlevels-1);
1537: PetscLogStagePop();
1538: THISolveStatistics(thi,dmmg,0,"Fine");
1539: }
1541: {
1542: PetscTruth flg;
1543: char filename[PETSC_MAX_PATH_LEN] = "";
1544: PetscOptionsGetString(PETSC_NULL,"-o",filename,sizeof(filename),&flg);
1545: if (flg) {
1546: THIDAVecView_VTK_XML(thi,DMMGGetDA(dmmg),DMMGGetx(dmmg),filename);
1547: }
1548: }
1550: DMMGDestroy(dmmg);
1551: THIDestroy(thi);
1552: PetscFinalize();
1553: return 0;
1554: }