netCDF 4.2.1.1
/usr/src/RPM/BUILD/libnetcdf7-seq-4.2.1.1/ncdump/nccopy.c
00001 /*********************************************************************
00002  *   Copyright 2010, University Corporation for Atmospheric Research
00003  *   See netcdf/README file for copying and redistribution conditions.
00004  *   Thanks to Philippe Poilbarbe and Antonio S. CofiƱo for 
00005  *   compression additions.
00006  *   $Id: nccopy.c 400 2010-08-27 21:02:52Z russ $
00007  *********************************************************************/
00008 
00009 #include "config.h"             /* for USE_NETCDF4 macro */
00010 #include <stdlib.h>
00011 #ifdef HAVE_GETOPT_H
00012 #include <getopt.h>
00013 #endif
00014 #ifndef _WIN32
00015 #include <unistd.h>
00016 #endif
00017 #include <string.h>
00018 #include <netcdf.h>
00019 #include "nciter.h"
00020 #include "chunkspec.h"
00021 #include "utils.h"
00022 #include "dimmap.h"
00023 
00024 /* default bytes of memory we are willing to allocate for variable
00025  * values during copy */
00026 #define COPY_BUFFER_SIZE (5000000)
00027 #define COPY_CHUNKCACHE_PREEMPTION (1.0f) /* for copying, can eject fully read chunks */
00028 #define SAME_AS_INPUT (-1)      /* default, if kind not specified */
00029 #define CHUNK_THRESHOLD (1024)  /* variables with fewer bytes don't get chunked */
00030 
00031 #ifndef USE_NETCDF4
00032 #define NC_CLASSIC_MODEL 0x0100 /* Enforce classic model if netCDF-4 not available. */
00033 #endif
00034 
00035 /* Global variables for command-line requests */
00036 char *progname;        /* for error messages */
00037 static int option_kind = SAME_AS_INPUT;
00038 static int option_deflate_level = -1;   /* default, compress output only if input compressed */
00039 static int option_shuffle_vars = NC_NOSHUFFLE; /* default, no shuffling on compression */
00040 static int option_fix_unlimdims = 0; /* default, preserve unlimited dimensions */
00041 static char* option_chunkspec = 0;   /* default, no chunk specification */
00042 static size_t option_copy_buffer_size = COPY_BUFFER_SIZE;
00043 static size_t option_chunk_cache_size = CHUNK_CACHE_SIZE; /* default from config.h */
00044 static size_t option_chunk_cache_nelems = CHUNK_CACHE_NELEMS; /* default from config.h */
00045 static int option_compute_chunkcaches = 0; /* default, don't try still flaky estimate of
00046                                             * chunk cache for each variable */
00047 static int option_read_diskless = 0; /* default, don't read input into memory on open */
00048 static int option_write_diskless = 0; /* default, don't write output to diskless file */
00049 
00050 /* get group id in output corresponding to group igrp in input,
00051  * given parent group id (or root group id) parid in output. */
00052 static int
00053 get_grpid(int igrp, int parid, int *ogrpp) {
00054     int stat = NC_NOERR;
00055     int ogid = parid;           /* like igrp but in output file */
00056 #ifdef USE_NETCDF4
00057     int inparid;
00058 
00059     /* if not root group, get corresponding output groupid from group name */
00060     stat = nc_inq_grp_parent(igrp, &inparid);
00061     if(stat == NC_NOERR) {      /* not root group */
00062         char grpname[NC_MAX_NAME + 1];
00063         NC_CHECK(nc_inq_grpname(igrp, grpname));
00064         NC_CHECK(nc_inq_grp_ncid(parid, grpname, &ogid));
00065     } else if(stat == NC_ENOGRP) { /* root group */
00066         stat = NC_NOERR;
00067     } else {
00068         NC_CHECK(stat);
00069     }
00070 #endif  /* USE_NETCDF4 */
00071     *ogrpp = ogid;
00072     return stat;
00073 }
00074 
00075 
00076 #ifdef USE_NETCDF4
00077 /* Get parent id needed to define a new group from its full name in an
00078  * open file identified by ncid.  Assumes all intermediate groups are
00079  * already defined.  */
00080 static int
00081 nc_inq_parid(int ncid, const char *fullname, int *locidp) {
00082     int stat = NC_NOERR;
00083     char *parent = strdup(fullname);
00084     char *slash = "/";          /* groupname separator */
00085     char *last_slash;
00086     if(parent == NULL) {
00087         NC_CHECK(NC_ENOMEM);
00088     } else
00089         last_slash = strrchr(parent, '/');
00090     if(last_slash == parent) {  /* parent is root */
00091         free(parent);
00092         parent = strdup(slash);
00093     } else {
00094         *last_slash = '\0';     /* truncate to get parent name */
00095     }
00096     NC_CHECK(nc_inq_grp_full_ncid(ncid, parent, locidp));
00097        free(parent);
00098     return stat;
00099 }
00100 
00101 /* Return size of chunk in bytes for a variable varid in a group igrp, or 0 if
00102  * layout is contiguous */
00103 static int
00104 inq_var_chunksize(int igrp, int varid, size_t* chunksizep) {
00105     int stat = NC_NOERR;
00106     int ndims;
00107     size_t *chunksizes;
00108     int dim;
00109     int contig = 1;
00110     nc_type vartype;
00111     size_t value_size;
00112     size_t prod;
00113 
00114     NC_CHECK(nc_inq_vartype(igrp, varid, &vartype));
00115     /* from type, get size in memory needed for each value */
00116     NC_CHECK(nc_inq_type(igrp, vartype, NULL, &value_size));
00117     prod = value_size;
00118     NC_CHECK(nc_inq_varndims(igrp, varid, &ndims));
00119     chunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t));
00120     if(ndims > 0) {
00121         NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, NULL));
00122     }
00123     if(contig == 1) {
00124         *chunksizep = 0;
00125     } else {
00126         NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, chunksizes));
00127         for(dim = 0; dim < ndims; dim++) {
00128             prod *= chunksizes[dim];
00129         }
00130         *chunksizep = prod;
00131     }
00132     free(chunksizes);
00133     return stat;
00134 }
00135 
00136 /* Return estimated number of elems required in chunk cache and
00137  * estimated size of chunk cache adequate to efficiently copy input
00138  * variable ivarid to output variable ovarid, which may have different
00139  * chunk size and shape */
00140 static int
00141 inq_var_chunking_params(int igrp, int ivarid, int ogrp, int ovarid,
00142                         size_t* chunkcache_sizep,
00143                         size_t *chunkcache_nelemsp,
00144                         float * chunkcache_preemptionp)
00145 {
00146     int stat = NC_NOERR;
00147     int ndims;
00148     size_t *ichunksizes, *ochunksizes;
00149     int dim;
00150     int icontig = 1, ocontig = 1;
00151     nc_type vartype;
00152     size_t value_size;
00153     size_t prod, iprod, oprod;
00154     size_t nelems;
00155     *chunkcache_nelemsp = CHUNK_CACHE_NELEMS;
00156     *chunkcache_sizep = CHUNK_CACHE_SIZE;
00157     *chunkcache_preemptionp = COPY_CHUNKCACHE_PREEMPTION;
00158 
00159     NC_CHECK(nc_inq_varndims(igrp, ivarid, &ndims));
00160     if(ndims > 0) {
00161         NC_CHECK(nc_inq_var_chunking(igrp, ivarid, &icontig, NULL));
00162         NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &ocontig, NULL));
00163     }
00164     if(icontig == 1 && ocontig == 1) { /* no chunking in input or output */
00165         *chunkcache_nelemsp = 0;
00166         *chunkcache_sizep = 0;
00167         *chunkcache_preemptionp = 0;
00168         return stat;
00169     }
00170 
00171     NC_CHECK(nc_inq_vartype(igrp, ivarid, &vartype));
00172     NC_CHECK(nc_inq_type(igrp, vartype, NULL, &value_size));
00173     iprod = value_size;
00174 
00175     if(icontig == 0 && ocontig == 1) { /* chunking only in input */
00176         *chunkcache_nelemsp = 1;       /* read one input chunk at a time */
00177         *chunkcache_sizep = iprod;
00178         *chunkcache_preemptionp = 1.0f;
00179         return stat;
00180     }
00181 
00182     ichunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t));
00183     if(icontig == 1) { /* if input contiguous, treat as if chunked on
00184                         * first dimension */
00185         ichunksizes[0] = 1;
00186         for(dim = 1; dim < ndims; dim++) {
00187             ichunksizes[dim] = dim;
00188         }
00189     } else {
00190         NC_CHECK(nc_inq_var_chunking(igrp, ivarid, &icontig, ichunksizes));
00191     }
00192 
00193     /* now can assume chunking in both input and output */
00194     ochunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t));
00195     NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &ocontig, ochunksizes));
00196 
00197     nelems = 1;
00198     oprod = value_size;
00199     for(dim = 0; dim < ndims; dim++) {
00200         nelems += 1 + (ichunksizes[dim] - 1) / ochunksizes[dim];
00201         iprod *= ichunksizes[dim];
00202         oprod *= ochunksizes[dim];
00203     }
00204     prod = iprod + oprod * (nelems - 1);
00205     *chunkcache_nelemsp = nelems;
00206     *chunkcache_sizep = prod;
00207     free(ichunksizes);
00208     free(ochunksizes);
00209     return stat;
00210 }
00211 
00212 /* Forward declaration, because copy_type, copy_vlen_type call each other */
00213 static int copy_type(int igrp, nc_type typeid, int ogrp);
00214 
00215 /* 
00216  * copy a user-defined variable length type in the group igrp to the
00217  * group ogrp
00218  */
00219 static int
00220 copy_vlen_type(int igrp, nc_type itype, int ogrp)
00221 {
00222     int stat = NC_NOERR; 
00223     nc_type ibasetype;
00224     nc_type obasetype;          /* base type in target group */
00225     char name[NC_MAX_NAME];
00226     size_t size;
00227     char basename[NC_MAX_NAME];
00228     size_t basesize;
00229     nc_type vlen_type;
00230 
00231     NC_CHECK(nc_inq_vlen(igrp, itype, name, &size, &ibasetype));
00232     /* to get base type id in target group, use name of base type in
00233      * source group */
00234     NC_CHECK(nc_inq_type(igrp, ibasetype, basename, &basesize));
00235     stat = nc_inq_typeid(ogrp, basename, &obasetype);
00236     /* if no such type, create it now */
00237     if(stat == NC_EBADTYPE) {
00238         NC_CHECK(copy_type(igrp, ibasetype, ogrp));
00239         stat = nc_inq_typeid(ogrp, basename, &obasetype);
00240     }
00241     NC_CHECK(stat);
00242 
00243     /* Now we know base type exists in output and we know its type id */
00244     NC_CHECK(nc_def_vlen(ogrp, name, obasetype, &vlen_type));
00245 
00246     return stat;
00247 }
00248 
00249 /* 
00250  * copy a user-defined opaque type in the group igrp to the group ogrp
00251  */
00252 static int
00253 copy_opaque_type(int igrp, nc_type itype, int ogrp)
00254 {
00255     int stat = NC_NOERR; 
00256     nc_type otype;
00257     char name[NC_MAX_NAME];
00258     size_t size;
00259 
00260     NC_CHECK(nc_inq_opaque(igrp, itype, name, &size));
00261     NC_CHECK(nc_def_opaque(ogrp, size, name, &otype));
00262 
00263     return stat;
00264 }
00265 
00266 /* 
00267  * copy a user-defined enum type in the group igrp to the group ogrp
00268  */
00269 static int
00270 copy_enum_type(int igrp, nc_type itype, int ogrp)
00271 {
00272     int stat = NC_NOERR; 
00273     nc_type otype;
00274     nc_type basetype;
00275     size_t basesize;
00276     size_t nmembers;
00277     char name[NC_MAX_NAME];
00278     int i;
00279 
00280     NC_CHECK(nc_inq_enum(igrp, itype, name, &basetype, &basesize, &nmembers));
00281     NC_CHECK(nc_def_enum(ogrp, basetype, name, &otype));
00282     for(i = 0; i < nmembers; i++) { /* insert enum members */
00283         char ename[NC_MAX_NAME];
00284         long long val;          /* large enough to hold any integer type */
00285         NC_CHECK(nc_inq_enum_member(igrp, itype, i, ename, &val));
00286         NC_CHECK(nc_insert_enum(ogrp, otype, ename, &val));
00287     }
00288     return stat;
00289 }
00290 
00291 /* 
00292  * copy a user-defined compound type in the group igrp to the group ogrp
00293  */
00294 static int
00295 copy_compound_type(int igrp, nc_type itype, int ogrp)
00296 {
00297     int stat = NC_NOERR; 
00298     char name[NC_MAX_NAME];
00299     size_t size;
00300     size_t nfields;
00301     nc_type otype;
00302     int fid;
00303 
00304     NC_CHECK(nc_inq_compound(igrp, itype, name, &size, &nfields));
00305     NC_CHECK(nc_def_compound(ogrp, size, name, &otype));
00306 
00307     for (fid = 0; fid < nfields; fid++) {
00308         char fname[NC_MAX_NAME];
00309         char ftypename[NC_MAX_NAME];
00310         size_t foff;
00311         nc_type iftype, oftype;
00312         int fndims;
00313 
00314         NC_CHECK(nc_inq_compound_field(igrp, itype, fid, fname, &foff, &iftype, &fndims, NULL));
00315         /* type ids in source don't necessarily correspond to same
00316          * typeids in destination, so look up destination typeid by using
00317          * field type name */
00318         NC_CHECK(nc_inq_type(igrp, iftype, ftypename, NULL));
00319         NC_CHECK(nc_inq_typeid(ogrp, ftypename, &oftype));
00320         if(fndims == 0) {
00321             NC_CHECK(nc_insert_compound(ogrp, otype, fname, foff, oftype));
00322         } else {                /* field is array type */
00323             int *fdimsizes;
00324             fdimsizes = (int *) emalloc((fndims + 1) * sizeof(int));
00325             stat = nc_inq_compound_field(igrp, itype, fid, NULL, NULL, NULL, 
00326                                          NULL, fdimsizes);
00327             NC_CHECK(nc_insert_array_compound(ogrp, otype, fname, foff, oftype, fndims, fdimsizes));
00328             free(fdimsizes);
00329         }
00330     }
00331     return stat;
00332 }
00333 
00334 
00335 /* 
00336  * copy a user-defined type in the group igrp to the group ogrp
00337  */
00338 static int
00339 copy_type(int igrp, nc_type typeid, int ogrp)
00340 {
00341     int stat = NC_NOERR; 
00342     nc_type type_class;
00343 
00344     NC_CHECK(nc_inq_user_type(igrp, typeid, NULL, NULL, NULL, NULL, &type_class));
00345 
00346     switch(type_class) {
00347     case NC_VLEN:
00348         NC_CHECK(copy_vlen_type(igrp, typeid, ogrp));
00349         break;
00350     case NC_OPAQUE:
00351         NC_CHECK(copy_opaque_type(igrp, typeid, ogrp));
00352         break;
00353     case NC_ENUM:
00354         NC_CHECK(copy_enum_type(igrp, typeid, ogrp));
00355         break;
00356     case NC_COMPOUND:
00357         NC_CHECK(copy_compound_type(igrp, typeid, ogrp));
00358         break;
00359     default:
00360         NC_CHECK(NC_EBADTYPE);
00361     }
00362     return stat;
00363 }
00364 
00365 /* Copy a group and all its subgroups, recursively, from iroot to
00366  * oroot, the ncids of input file and output file.  This just creates
00367  * all the groups in the destination, but doesn't copy anything that's
00368  * in the groups yet. */
00369 static int
00370 copy_groups(int iroot, int oroot)
00371 {
00372     int stat = NC_NOERR;
00373     int numgrps;
00374     int *grpids;
00375     int i;
00376 
00377     /* get total number of groups and their ids, including all descendants */
00378     NC_CHECK(nc_inq_grps_full(iroot, &numgrps, NULL));
00379     grpids = emalloc(numgrps * sizeof(int));
00380     NC_CHECK(nc_inq_grps_full(iroot, NULL, grpids));
00381     /* create corresponding new groups in ogrp, except for root group */
00382     for(i = 1; i < numgrps; i++) {
00383         char *grpname_full;
00384         char grpname[NC_MAX_NAME];
00385         size_t len_name;
00386         int ogid, oparid;
00387         /* get full group name of input group */
00388         NC_CHECK(nc_inq_grpname_full(grpids[i], &len_name, NULL));
00389         grpname_full = emalloc(len_name + 1);
00390         NC_CHECK(nc_inq_grpname_full(grpids[i], &len_name, grpname_full));
00391         /* get id of parent group of corresponding group in output.
00392          * Note that this exists, because nc_inq_groups returned
00393          * grpids in preorder, so parents are always copied before
00394          * their subgroups */
00395         NC_CHECK(nc_inq_parid(oroot, grpname_full, &oparid));
00396         NC_CHECK(nc_inq_grpname(grpids[i], grpname));
00397         /* define corresponding group in output */
00398         NC_CHECK(nc_def_grp(oparid, grpname, &ogid));
00399         free(grpname_full);
00400     }
00401     free(grpids);
00402     return stat;    
00403 }
00404 
00405 /* 
00406  * Copy the user-defined types in this group (igrp) and all its
00407  * subgroups, recursively, to corresponding group in output (ogrp)
00408  */
00409 static int
00410 copy_types(int igrp, int ogrp)
00411 {
00412     int stat = NC_NOERR; 
00413     int ntypes;
00414     nc_type *types = NULL;
00415     int numgrps;
00416     int *grpids = NULL;
00417     int i;
00418 
00419     NC_CHECK(nc_inq_typeids(igrp, &ntypes, NULL));
00420 
00421     if(ntypes > 0) {
00422         types = (nc_type *) emalloc(ntypes * sizeof(nc_type));
00423         NC_CHECK(nc_inq_typeids(igrp, &ntypes, types));
00424         for (i = 0; i < ntypes; i++) {
00425             NC_CHECK(copy_type(igrp, types[i], ogrp));
00426         }
00427         free(types);
00428     }
00429 
00430     /* Copy types from subgroups */
00431     NC_CHECK(nc_inq_grps(igrp, &numgrps, NULL));
00432     if(numgrps > 0) {
00433         grpids = (int *)emalloc(sizeof(int) * numgrps);
00434         NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids));
00435         for(i = 0; i < numgrps; i++) {
00436             int ogid;
00437             /* get groupid in output corresponding to grpids[i] in
00438              * input, given parent group (or root group) ogrp in
00439              * output */
00440             NC_CHECK(get_grpid(grpids[i], ogrp, &ogid));
00441             NC_CHECK(copy_types(grpids[i], ogid));
00442         }
00443         free(grpids);
00444     }
00445     return stat;
00446 }
00447 
00448 /* Copy all netCDF-4 specific variable properties such as chunking,
00449  * endianness, deflation, checksumming, fill, etc. */
00450 static int
00451 copy_var_specials(int igrp, int varid, int ogrp, int o_varid)
00452 {
00453     int stat = NC_NOERR;
00454     {                           /* handle chunking parameters */
00455         int ndims;
00456         NC_CHECK(nc_inq_varndims(igrp, varid, &ndims));
00457         if (ndims > 0) {                /* no chunking for scalar variables */
00458             int contig = 0;
00459             NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, NULL));
00460             if(contig == 1) {
00461                 NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CONTIGUOUS, NULL));
00462             } else {
00463                 size_t *chunkp = (size_t *) emalloc(ndims * sizeof(size_t));
00464                 int *dimids = (int *) emalloc(ndims * sizeof(int));
00465                 int idim;
00466                 NC_CHECK(nc_inq_var_chunking(igrp, varid, NULL, chunkp));
00467                 NC_CHECK(nc_inq_vardimid(igrp, varid, dimids));
00468                 for(idim = 0; idim < ndims; idim++) {
00469                     int dimid = dimids[idim];
00470                     size_t chunksize = chunkspec_size(dimid);
00471                     if(chunkspec_size(dimid) > 0) { /* found in chunkspec */
00472                         chunkp[idim] = chunksize;
00473                     }
00474                 }
00475                 /* explicitly set chunking, even if default */
00476                 NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CHUNKED, chunkp));
00477                 free(dimids);
00478                 free(chunkp);
00479             }
00480         }
00481     }
00482     { /* handle compression parameters, copying from input, overriding
00483        * with command-line options */
00484         int shuffle, deflate, deflate_level;
00485         NC_CHECK(nc_inq_var_deflate(igrp, varid, &shuffle, &deflate, &deflate_level));
00486         if(option_deflate_level >= 0) { /* change output compression, if requested */
00487           deflate_level = option_deflate_level;
00488           deflate=1;
00489         }
00490         if(shuffle==0 && option_shuffle_vars != 0) {
00491           shuffle = option_shuffle_vars;
00492         }
00493         if(deflate != 0 || shuffle != 0) {
00494             NC_CHECK(nc_def_var_deflate(ogrp, o_varid, shuffle, deflate_level > 0, deflate_level));
00495         }
00496     }
00497     {                           /* handle checksum parameters */
00498         int fletcher32 = 0;
00499         NC_CHECK(nc_inq_var_fletcher32(igrp, varid, &fletcher32));
00500         if(fletcher32 != 0) {
00501             NC_CHECK(nc_def_var_fletcher32(ogrp, o_varid, fletcher32));
00502         }
00503     }
00504     {                           /* handle endianness */
00505         int endianness = 0;
00506         NC_CHECK(nc_inq_var_endian(igrp, varid, &endianness));
00507         if(endianness != NC_ENDIAN_NATIVE) { /* native is the default */
00508             NC_CHECK(nc_def_var_endian(ogrp, o_varid, endianness));
00509         }
00510     }
00511     return stat;
00512 }
00513 
00514 /* Set output variable o_varid (in group ogrp) to use chunking
00515  * specified on command line, only called for classic format input and
00516  * netCDF-4 format output, so no existing chunk lengths to override. */
00517 static int
00518 set_var_chunked(int ogrp, int o_varid)
00519 {
00520     int stat = NC_NOERR;
00521     int ndims;
00522     int odim;
00523     size_t chunk_threshold = CHUNK_THRESHOLD;
00524 
00525     if(chunkspec_ndims() == 0)  /* no chunking specified on command line */
00526         return stat;
00527     NC_CHECK(nc_inq_varndims(ogrp, o_varid, &ndims));
00528 
00529     if (ndims > 0) {            /* no chunking for scalar variables */
00530         int chunked = 0;
00531         int *dimids = (int *) emalloc(ndims * sizeof(int));
00532         size_t varsize;
00533         nc_type vartype;
00534         size_t value_size;
00535         int is_unlimited = 0;
00536 
00537         NC_CHECK(nc_inq_vardimid (ogrp, o_varid, dimids));
00538         NC_CHECK(nc_inq_vartype(ogrp, o_varid, &vartype));
00539         /* from type, get size in memory needed for each value */
00540         NC_CHECK(nc_inq_type(ogrp, vartype, NULL, &value_size));
00541         varsize = value_size;
00542 
00543         /* Determine if this variable should be chunked.  A variable
00544          * should be chunked if any of its dims are in command-line
00545          * chunk spec. It will also be chunked if any of its
00546          * dims are unlimited. */
00547         for(odim = 0; odim < ndims; odim++) {
00548             int odimid = dimids[odim];
00549             int idimid = dimmap_idimid(odimid); /* corresponding dimid in input file */
00550             if(dimmap_ounlim(odimid))
00551                 is_unlimited = 1;
00552             if(idimid != -1) {
00553                 size_t chunksize = chunkspec_size(idimid); /* from chunkspec */
00554                 size_t dimlen;
00555                 NC_CHECK(nc_inq_dimlen(ogrp, odimid, &dimlen));
00556                 if( (chunksize > 0) || dimmap_ounlim(odimid)) {
00557                     chunked = 1;                    
00558                 }
00559                 varsize *= dimlen;
00560             }
00561         }
00562         /* Don't chunk small variables that don't use an unlimited
00563          * dimension. */
00564         if(varsize < chunk_threshold && !is_unlimited)
00565             chunked = 0;
00566 
00567         if(chunked) {
00568             /* Allocate chunksizes and set defaults to dimsize for any
00569              * dimensions not mentioned in chunkspec. */
00570             size_t *chunkp = (size_t *) emalloc(ndims * sizeof(size_t));
00571             for(odim = 0; odim < ndims; odim++) {
00572                 int odimid = dimids[odim];
00573                 int idimid = dimmap_idimid(odimid);
00574                 size_t chunksize = chunkspec_size(idimid);
00575                 if(chunksize > 0) {
00576                     chunkp[odim] = chunksize;
00577                 } else {
00578                     NC_CHECK(nc_inq_dimlen(ogrp, odimid, &chunkp[odim]));
00579                 }
00580             }
00581             NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CHUNKED, chunkp));
00582             free(chunkp);
00583         }
00584         free(dimids);
00585     }
00586     return stat;
00587 }
00588 
00589 /* Set variable to compression specified on command line */
00590 static int
00591 set_var_compressed(int ogrp, int o_varid)
00592 {
00593     int stat = NC_NOERR;
00594     if (option_deflate_level >= 0) {
00595         int deflate = 1;
00596         NC_CHECK(nc_def_var_deflate(ogrp, o_varid, option_shuffle_vars, deflate, option_deflate_level));
00597     }
00598     return stat;
00599 }
00600 
00601 /* Release the variable chunk cache allocated for variable varid in
00602  * group grp.  This is not necessary, but will save some memory when
00603  * processing one variable at a time.  */
00604 #ifdef UNUSED
00605 static int
00606 free_var_chunk_cache(int grp, int varid)
00607 {
00608     int stat = NC_NOERR;
00609     size_t chunk_cache_size = 1;
00610     size_t cache_nelems = 1;
00611     float cache_preemp = 0;
00612     int kind;
00613     NC_CHECK(nc_inq_format(grp, &kind));
00614     if(kind == NC_FORMAT_NETCDF4 || kind == NC_FORMAT_NETCDF4_CLASSIC) {
00615         int contig = 1;
00616         NC_CHECK(nc_inq_var_chunking(grp, varid, &contig, NULL));
00617         if(contig == 0) {       /* chunked */
00618             NC_CHECK(nc_set_var_chunk_cache(grp, varid, chunk_cache_size, cache_nelems, cache_preemp));
00619         }
00620     }
00621     return stat;
00622 }
00623 #endif
00624 
00625 #endif /* USE_NETCDF4 */
00626 
00627 /* Copy dimensions from group igrp to group ogrp, also associate input
00628  * dimids with output dimids (they need not match, because the input
00629  * dimensions may have been defined in a different order than we define
00630  * the output dimensions here. */
00631 static int
00632 copy_dims(int igrp, int ogrp)
00633 {
00634     int stat = NC_NOERR;
00635     int ndims;
00636     int dgrp;
00637 #ifdef USE_NETCDF4
00638     int nunlims;
00639     int *dimids;
00640     int *unlimids;
00641 #else
00642     int unlimid;
00643 #endif /* USE_NETCDF4 */    
00644 
00645     NC_CHECK(nc_inq_ndims(igrp, &ndims));
00646 
00647 #ifdef USE_NETCDF4
00648    /* In netCDF-4 files, dimids may not be sequential because they
00649     * may be defined in various groups, and we are only looking at one
00650     * group at a time. */
00651     /* Find the dimension ids in this group, don't include parents. */
00652     dimids = (int *) emalloc((ndims + 1) * sizeof(int));
00653     NC_CHECK(nc_inq_dimids(igrp, NULL, dimids, 0));
00654     /* Find the number of unlimited dimensions and get their IDs */
00655     NC_CHECK(nc_inq_unlimdims(igrp, &nunlims, NULL));
00656     unlimids = (int *) emalloc((nunlims + 1) * sizeof(int));
00657     NC_CHECK(nc_inq_unlimdims(igrp, NULL, unlimids));
00658 #else
00659     NC_CHECK(nc_inq_unlimdim(igrp, &unlimid));
00660 #endif /* USE_NETCDF4 */
00661 
00662     /* Copy each dimension to output, including unlimited dimension(s) */
00663     for (dgrp = 0; dgrp < ndims; dgrp++) {
00664         char name[NC_MAX_NAME];
00665         size_t length;
00666         int i_is_unlim;
00667         int o_is_unlim;
00668         int idimid, odimid;
00669 #ifdef USE_NETCDF4
00670         int uld;
00671 #endif
00672 
00673         i_is_unlim = 0;
00674 #ifdef USE_NETCDF4
00675         idimid = dimids[dgrp];
00676         for (uld = 0; uld < nunlims; uld++) {
00677             if(idimid == unlimids[uld]) {
00678                 i_is_unlim = 1;
00679                 break;
00680             }     
00681         }
00682 #else
00683         idimid = dgrp;
00684         if(unlimid != -1 && (idimid == unlimid)) {
00685             i_is_unlim = 1;
00686         }
00687 #endif /* USE_NETCDF4 */
00688 
00689         stat = nc_inq_dim(igrp, idimid, name, &length);
00690         if (stat == NC_EDIMSIZE && sizeof(size_t) < 8) {
00691             error("dimension \"%s\" requires 64-bit platform", name);
00692         }       
00693         NC_CHECK(stat);
00694         o_is_unlim = i_is_unlim;
00695         if(i_is_unlim && !option_fix_unlimdims) {
00696             NC_CHECK(nc_def_dim(ogrp, name, NC_UNLIMITED, &odimid));
00697         } else {
00698             NC_CHECK(nc_def_dim(ogrp, name, length, &odimid));
00699             o_is_unlim = 0;
00700         }
00701         /* Store (idimid, odimid) mapping for later use, also whether unlimited */
00702         dimmap_store(idimid, odimid, i_is_unlim, o_is_unlim);
00703     }
00704 #ifdef USE_NETCDF4
00705     free(dimids);
00706     free(unlimids);
00707 #endif /* USE_NETCDF4 */    
00708     return stat;
00709 }
00710 
00711 /* Copy the attributes for variable ivar in group igrp to variable
00712  * ovar in group ogrp.  Global (group) attributes are specified by
00713  * using the varid NC_GLOBAL */
00714 static int
00715 copy_atts(int igrp, int ivar, int ogrp, int ovar)
00716 {
00717     int natts;
00718     int iatt;
00719     int stat = NC_NOERR;
00720 
00721     NC_CHECK(nc_inq_varnatts(igrp, ivar, &natts));
00722     
00723     for(iatt = 0; iatt < natts; iatt++) {
00724         char name[NC_MAX_NAME];
00725         NC_CHECK(nc_inq_attname(igrp, ivar, iatt, name));
00726         NC_CHECK(nc_copy_att(igrp, ivar, name, ogrp, ovar));
00727     }
00728     return stat;
00729 }
00730 
00731 /* copy the schema for a single variable in group igrp to group ogrp */
00732 static int
00733 copy_var(int igrp, int varid, int ogrp)
00734 {
00735     int stat = NC_NOERR;
00736     int ndims;
00737     int *idimids;               /* ids of dims for input variable */
00738     int *odimids;               /* ids of dims for output variable */
00739     char name[NC_MAX_NAME];
00740     nc_type typeid, o_typeid;
00741     int natts;
00742     int i;
00743     int o_varid;
00744 
00745     NC_CHECK(nc_inq_varndims(igrp, varid, &ndims));
00746     idimids = (int *) emalloc((ndims + 1) * sizeof(int));
00747     NC_CHECK(nc_inq_var(igrp, varid, name, &typeid, NULL, idimids, &natts));
00748     o_typeid = typeid;
00749 #ifdef USE_NETCDF4
00750     if (typeid > NC_STRING) {   /* user-defined type */
00751         /* type ids in source don't necessarily correspond to same
00752          * typeids in destination, so look up destination typeid by
00753          * using type name */
00754         char type_name[NC_MAX_NAME];
00755         NC_CHECK(nc_inq_type(igrp, typeid, type_name, NULL));
00756         NC_CHECK(nc_inq_typeid(ogrp, type_name, &o_typeid));
00757     }
00758 #endif  /* USE_NETCDF4 */
00759 
00760     /* get the corresponding dimids in the output file */
00761     odimids = (int *) emalloc((ndims + 1) * sizeof(int));
00762     for(i = 0; i < ndims; i++) {
00763         odimids[i] = dimmap_odimid(idimids[i]);
00764         if(odimids[i] == -1) {
00765             error("Oops, no dimension in output associated with input dimid %d", idimids[i]);
00766         }
00767     }
00768 
00769     /* define the output variable */
00770     NC_CHECK(nc_def_var(ogrp, name, o_typeid, ndims, odimids, &o_varid));
00771 
00772     /* attach the variable attributes to the output variable */
00773     NC_CHECK(copy_atts(igrp, varid, ogrp, o_varid));
00774 #ifdef USE_NETCDF4    
00775     {
00776         int inkind;
00777         int outkind;
00778         NC_CHECK(nc_inq_format(igrp, &inkind));
00779         NC_CHECK(nc_inq_format(ogrp, &outkind));
00780         if(outkind == NC_FORMAT_NETCDF4 || outkind == NC_FORMAT_NETCDF4_CLASSIC) {
00781             if((inkind == NC_FORMAT_NETCDF4 || inkind == NC_FORMAT_NETCDF4_CLASSIC)) {
00782                 /* Copy all netCDF-4 specific variable properties such as
00783                  * chunking, endianness, deflation, checksumming, fill, etc. */
00784                 NC_CHECK(copy_var_specials(igrp, varid, ogrp, o_varid));
00785             } else {
00786                 /* Set chunking if specified in command line option */
00787                 NC_CHECK(set_var_chunked(ogrp, o_varid));
00788             }
00789             /* Set compression if specified in command line option */
00790             NC_CHECK(set_var_compressed(ogrp, o_varid));
00791         }
00792     }
00793 #endif  /* USE_NETCDF4 */
00794     free(idimids);
00795     free(odimids);
00796     return stat;
00797 }
00798 
00799 /* copy the schema for all the variables in group igrp to group ogrp */
00800 static int
00801 copy_vars(int igrp, int ogrp)
00802 {
00803     int stat = NC_NOERR;
00804     int nvars;
00805     int varid;
00806     
00807     NC_CHECK(nc_inq_nvars(igrp, &nvars));
00808     for (varid = 0; varid < nvars; varid++) {
00809         NC_CHECK(copy_var(igrp, varid, ogrp));
00810     }
00811     return stat;
00812 }
00813 
00814 /* Copy the schema in a group and all its subgroups, recursively, from
00815  * group igrp in input to parent group ogrp in destination.  Use
00816  * dimmap array to map input dimids to output dimids. */
00817 static int
00818 copy_schema(int igrp, int ogrp) 
00819 {
00820     int stat = NC_NOERR;
00821     int ogid;                   /* like igrp but in output file */
00822 
00823     /* get groupid in output corresponding to group igrp in input,
00824      * given parent group (or root group) ogrp in output */
00825     NC_CHECK(get_grpid(igrp, ogrp, &ogid));
00826 
00827     NC_CHECK(copy_dims(igrp, ogid));
00828     NC_CHECK(copy_atts(igrp, NC_GLOBAL, ogid, NC_GLOBAL));
00829     NC_CHECK(copy_vars(igrp, ogid));
00830 #ifdef USE_NETCDF4    
00831     {
00832         int numgrps;
00833         int *grpids;
00834         int i;
00835         /* Copy schema from subgroups */
00836         stat = nc_inq_grps(igrp, &numgrps, NULL);
00837         grpids = (int *)emalloc((numgrps + 1) * sizeof(int));
00838         NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids));
00839         
00840         for(i = 0; i < numgrps; i++) {
00841             NC_CHECK(copy_schema(grpids[i], ogid));
00842         }
00843         free(grpids);
00844     }
00845 #endif  /* USE_NETCDF4 */
00846     return stat;    
00847 }
00848 
00849 /* Return number of values for a variable varid in a group igrp */
00850 static int
00851 inq_nvals(int igrp, int varid, long long *nvalsp) {
00852     int stat = NC_NOERR;
00853     int ndims;
00854     int *dimids;
00855     int dim;
00856     long long nvals = 1;
00857 
00858     NC_CHECK(nc_inq_varndims(igrp, varid, &ndims));
00859     dimids = (int *) emalloc((ndims + 1) * sizeof(int));
00860     NC_CHECK(nc_inq_vardimid (igrp, varid, dimids));
00861     for(dim = 0; dim < ndims; dim++) {
00862         size_t len;
00863         NC_CHECK(nc_inq_dimlen(igrp, dimids[dim], &len));
00864         nvals *= len;
00865     }
00866     if(nvalsp)
00867         *nvalsp = nvals;
00868     free(dimids);
00869     return stat;
00870 }
00871 
00872 /* Copy data from variable varid in group igrp to corresponding group
00873  * ogrp. */
00874 static int
00875 copy_var_data(int igrp, int varid, int ogrp) {
00876     int stat = NC_NOERR;
00877     nc_type vartype;
00878     long long nvalues;          /* number of values for this variable */
00879     size_t ntoget;              /* number of values to access this iteration */
00880     size_t value_size;          /* size of a single value of this variable */
00881     static void *buf = 0;       /* buffer for the variable values */
00882     char varname[NC_MAX_NAME];
00883     int ovarid;
00884     size_t *start;
00885     size_t *count;
00886     nciter_t *iterp;            /* opaque structure for iteration status */
00887     int do_realloc = 0;
00888 #ifdef USE_NETCDF4    
00889     int okind;
00890     size_t chunksize;
00891 #endif
00892 
00893     NC_CHECK(inq_nvals(igrp, varid, &nvalues));
00894     if(nvalues == 0)
00895         return stat;
00896     /* get corresponding output variable */
00897     NC_CHECK(nc_inq_varname(igrp, varid, varname));
00898     NC_CHECK(nc_inq_varid(ogrp, varname, &ovarid));
00899     NC_CHECK(nc_inq_vartype(igrp, varid, &vartype));
00900     /* from type, get size in memory needed for each value */
00901     NC_CHECK(nc_inq_type(igrp, vartype, NULL, &value_size));
00902     if(value_size > option_copy_buffer_size) {
00903         option_copy_buffer_size = value_size;
00904         do_realloc = 1;
00905     }
00906 #ifdef USE_NETCDF4    
00907     NC_CHECK(nc_inq_format(ogrp, &okind));
00908     if(okind == NC_FORMAT_NETCDF4 || okind == NC_FORMAT_NETCDF4_CLASSIC) {
00909         /* if this variable chunked, set variable chunk cache size */ 
00910         int contig = 1;
00911         NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &contig, NULL));
00912         if(contig == 0) {       /* chunked */
00913             if(option_compute_chunkcaches) {
00914                 /* Try to estimate variable-specific chunk cache,
00915                  * depending on specific size and shape of this
00916                  * variable's chunks.  This doesn't work yet. */
00917                 size_t chunkcache_size, chunkcache_nelems;
00918                 float chunkcache_preemption;
00919                 NC_CHECK(inq_var_chunking_params(igrp, varid, ogrp, ovarid,
00920                                                  &chunkcache_size, 
00921                                                  &chunkcache_nelems, 
00922                                                  &chunkcache_preemption));
00923                 NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid, 
00924                                                 chunkcache_size, 
00925                                                 chunkcache_nelems, 
00926                                                 chunkcache_preemption)); 
00927             } else {            
00928                 /* by default, use same chunk cache for all chunked variables */
00929                 NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid, 
00930                                                 option_chunk_cache_size,
00931                                                 option_chunk_cache_nelems,
00932                                                 COPY_CHUNKCACHE_PREEMPTION));
00933             }
00934         }
00935     }
00936     /* For chunked variables, option_copy_buffer_size must also be at least as large as
00937      * size of a chunk in input, otherwise resize it. */
00938     {
00939         NC_CHECK(inq_var_chunksize(igrp, varid, &chunksize));
00940         if(chunksize > option_copy_buffer_size) {
00941             option_copy_buffer_size = chunksize;
00942             do_realloc = 1;
00943         }
00944     }
00945 #endif  /* USE_NETCDF4 */
00946     if(buf && do_realloc) {
00947         free(buf);
00948         buf = 0;
00949     }
00950     if(buf == 0) {              /* first time or needs to grow */
00951         buf = emalloc(option_copy_buffer_size);
00952         memset((void*)buf,0,option_copy_buffer_size);
00953     }
00954 
00955     /* initialize variable iteration */
00956     NC_CHECK(nc_get_iter(igrp, varid, option_copy_buffer_size, &iterp));
00957 
00958     start = (size_t *) emalloc((iterp->rank + 1) * sizeof(size_t));
00959     count = (size_t *) emalloc((iterp->rank + 1) * sizeof(size_t));
00960     /* nc_next_iter() initializes start and count on first call,
00961      * changes start and count to iterate through whole variable on
00962      * subsequent calls. */
00963     while((ntoget = nc_next_iter(iterp, start, count)) > 0) {
00964         NC_CHECK(nc_get_vara(igrp, varid, start, count, buf));
00965         NC_CHECK(nc_put_vara(ogrp, ovarid, start, count, buf));
00966 #ifdef USE_NETCDF4
00967         /* we have to explicitly free values for strings and vlens */
00968         if(vartype == NC_STRING) {
00969             NC_CHECK(nc_free_string(ntoget, (char **)buf));
00970         } else if(vartype > NC_STRING) { /* user-defined type */
00971             nc_type vclass;
00972             NC_CHECK(nc_inq_user_type(igrp, vartype, NULL, NULL, NULL, NULL, &vclass));
00973             if(vclass == NC_VLEN) {
00974                 NC_CHECK(nc_free_vlens(ntoget, (nc_vlen_t *)buf));
00975             }
00976         }
00977 #endif  /* USE_NETCDF4 */
00978     } /* end main iteration loop */
00979 #ifdef USE_NETCDF4
00980     /* We're all done with this input and output variable, so if
00981      * either variable is chunked, free up its variable chunk cache */
00982     /* NC_CHECK(free_var_chunk_cache(igrp, varid)); */
00983     /* NC_CHECK(free_var_chunk_cache(ogrp, ovarid)); */
00984 #endif  /* USE_NETCDF4 */
00985     free(start);
00986     free(count);
00987     NC_CHECK(nc_free_iter(iterp));
00988     return stat;
00989 }
00990 
00991 /* Copy data from variables in group igrp to variables in
00992  * corresponding group with parent ogrp, and all subgroups
00993  * recursively  */
00994 static int
00995 copy_data(int igrp, int ogrp)
00996 {
00997     int stat = NC_NOERR;
00998     int ogid;
00999     int nvars;
01000     int varid;
01001 #ifdef USE_NETCDF4
01002     int numgrps;
01003     int *grpids;
01004     int i;
01005 #endif
01006 
01007     /* get groupid in output corresponding to group igrp in input,
01008      * given parent group (or root group) ogrp in output */
01009     NC_CHECK(get_grpid(igrp, ogrp, &ogid));
01010     
01011     /* Copy data from this group */
01012     NC_CHECK(nc_inq_nvars(igrp, &nvars));
01013 
01014     for (varid = 0; varid < nvars; varid++) {
01015         NC_CHECK(copy_var_data(igrp, varid, ogid));
01016     }
01017 #ifdef USE_NETCDF4
01018     /* Copy data from subgroups */
01019     stat = nc_inq_grps(igrp, &numgrps, NULL);
01020     grpids = (int *)emalloc((numgrps + 1) * sizeof(int));
01021     NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids));
01022 
01023     for(i = 0; i < numgrps; i++) {
01024         NC_CHECK(copy_data(grpids[i], ogid));
01025     }
01026     free(grpids);
01027 #endif  /* USE_NETCDF4 */
01028     return stat;
01029 }
01030 
01031 /* Count total number of dimensions in ncid and all its subgroups */
01032 int
01033 count_dims(ncid) {
01034     int numgrps;
01035     int *grpids;
01036     int igrp;
01037     int ndims=0;
01038     /* get total number of groups and their ids, including all descendants */
01039     NC_CHECK(nc_inq_grps_full(ncid, &numgrps, NULL));
01040     grpids = emalloc(numgrps * sizeof(int));
01041     NC_CHECK(nc_inq_grps_full(ncid, NULL, grpids));
01042     for(igrp = 0; igrp < numgrps; igrp++) {
01043         int ndims_local;
01044         nc_inq_ndims(grpids[igrp], &ndims_local);
01045         ndims += ndims_local;
01046     }
01047     free(grpids); 
01048     return ndims;
01049 }
01050 
01051 /* Test if special case: netCDF-3 file with more than one record
01052  * variable.  Performance can be very slow for this case when the disk
01053  * block size is large, there are many record variables, and a
01054  * record's worth of data for some variables is smaller than the disk
01055  * block size.  In this case, copying the record variables a variable
01056  * at a time causes much rereading of record data, so instead we want
01057  * to copy data a record at a time. */
01058 static int
01059 nc3_special_case(int ncid, int kind) {
01060     if (kind == NC_FORMAT_CLASSIC ||  kind == NC_FORMAT_64BIT) {
01061         int recdimid = 0;
01062         NC_CHECK(nc_inq_unlimdim(ncid, &recdimid));
01063         if (recdimid != -1) {   /* we have a record dimension */
01064             int nvars;
01065             int varid;
01066             NC_CHECK(nc_inq_nvars(ncid, &nvars));
01067             for (varid = 0; varid < nvars; varid++) {
01068                 int *dimids = 0;
01069                 int ndims;
01070                 NC_CHECK( nc_inq_varndims(ncid, varid, &ndims) );
01071                 if (ndims > 0) {
01072                     int dimids0;
01073                     dimids = (int *) emalloc((ndims + 1) * sizeof(int));
01074                     NC_CHECK( nc_inq_vardimid(ncid, varid, dimids) );
01075                     dimids0 = dimids[0];
01076                     free(dimids);
01077                     if(dimids0 == recdimid) {
01078                         return 1; /* found a record variable */
01079                     }
01080                 }
01081             }
01082         }
01083     }
01084     return 0;
01085 }
01086 
01087 /* Classify variables in ncid as either fixed-size variables (with no
01088  * unlimited dimension) or as record variables (with an unlimited
01089  * dimension) */
01090 static int
01091 classify_vars(
01092     int ncid,   /* netCDF ID */
01093     size_t *nf, /* for returning number of fixed-size variables */
01094     int **fvars,        /* the array of fixed_size variable IDS, caller should free */
01095     size_t *nr, /* for returning number of record variables */
01096     int **rvars)        /* the array of record variable IDs, caller should free */
01097 {
01098     int varid;
01099     int nvars;
01100     NC_CHECK(nc_inq_nvars(ncid, &nvars));
01101     *nf = 0;
01102     *fvars = (int *) emalloc(nvars * sizeof(int));
01103     *nr = 0;
01104     *rvars = (int *) emalloc(nvars * sizeof(int));
01105     for (varid = 0; varid < nvars; varid++) {
01106         if (isrecvar(ncid, varid)) {
01107             (*rvars)[*nr] = varid;
01108             (*nr)++;
01109         } else {
01110             (*fvars)[*nf] = varid;
01111             (*nf)++;
01112         }
01113     }
01114     return NC_NOERR;
01115 }
01116 
01117 /* Only called for classic format or 64-bit offset format files, to speed up special case */
01118 static int
01119 copy_fixed_size_data(int igrp, int ogrp, size_t nfixed_vars, int *fixed_varids) {
01120     size_t ivar;
01121     /* for each fixed-size variable, copy data */
01122     for (ivar = 0; ivar < nfixed_vars; ivar++) {
01123         int varid = fixed_varids[ivar];
01124         NC_CHECK(copy_var_data(igrp, varid, ogrp));
01125     }
01126     if (fixed_varids)
01127         free(fixed_varids);
01128     return NC_NOERR;
01129 }
01130 
01131 /* copy a record's worth of data for a variable from input to output */
01132 static int
01133 copy_rec_var_data(int ncid,     /* input */
01134                   int ogrp,     /* output */
01135                   int irec,     /* record number */
01136                   int varid,    /* input variable id */
01137                   int ovarid,   /* output variable id */
01138                   size_t *start,   /* start indices for record data */
01139                   size_t *count,   /* edge lengths for record data */
01140                   void *buf        /* buffer large enough to hold data */
01141     ) 
01142 {
01143     NC_CHECK(nc_get_vara(ncid, varid, start, count, buf));
01144     NC_CHECK(nc_put_vara(ogrp, ovarid, start, count, buf));
01145     return NC_NOERR;
01146 }
01147 
01148 /* Only called for classic format or 64-bit offset format files, to speed up special case */
01149 static int
01150 copy_record_data(int ncid, int ogrp, size_t nrec_vars, int *rec_varids) {
01151     int unlimid;
01152     size_t nrecs = 0;           /* how many records? */
01153     size_t irec;
01154     size_t ivar;
01155     void **buf;                 /* space for reading in data for each variable */
01156     int *rec_ovarids;           /* corresponding varids in output */
01157     size_t **start;
01158     size_t **count;
01159     NC_CHECK(nc_inq_unlimdim(ncid, &unlimid));
01160     NC_CHECK(nc_inq_dimlen(ncid, unlimid, &nrecs));
01161     buf = (void **) emalloc(nrec_vars * sizeof(void *));
01162     rec_ovarids = (int *) emalloc(nrec_vars * sizeof(int));
01163     start = (size_t **) emalloc(nrec_vars * sizeof(size_t*));
01164     count = (size_t **) emalloc(nrec_vars * sizeof(size_t*));
01165     /* get space to hold one record's worth of data for each record variable */
01166     for (ivar = 0; ivar < nrec_vars; ivar++) {
01167         int varid;
01168         int ndims;
01169         int *dimids;
01170         nc_type vartype;
01171         size_t value_size;
01172         int dimid;
01173         int ii;
01174         size_t nvals;
01175         char varname[NC_MAX_NAME];
01176         varid = rec_varids[ivar];
01177         NC_CHECK(nc_inq_varndims(ncid, varid, &ndims));
01178         dimids = (int *) emalloc((1 + ndims) * sizeof(int));
01179         start[ivar] = (size_t *) emalloc(ndims * sizeof(size_t));
01180         count[ivar] = (size_t *) emalloc(ndims * sizeof(size_t));
01181         NC_CHECK(nc_inq_vardimid (ncid, varid, dimids));
01182         NC_CHECK(nc_inq_vartype(ncid, varid, &vartype));
01183         NC_CHECK(nc_inq_type(ncid, vartype, NULL, &value_size));
01184         nvals = 1;
01185         for(ii = 1; ii < ndims; ii++) { /* for rec size, don't include first record dimension */
01186             size_t dimlen;
01187             dimid = dimids[ii];
01188             NC_CHECK(nc_inq_dimlen(ncid, dimid, &dimlen));
01189             nvals *= dimlen;
01190             start[ivar][ii] = 0;
01191             count[ivar][ii] = dimlen;
01192         }
01193         start[ivar][0] = 0;     
01194         count[ivar][0] = 1;     /* 1 record */
01195         buf[ivar] = (void *) emalloc(nvals * value_size);
01196         NC_CHECK(nc_inq_varname(ncid, varid, varname));
01197         NC_CHECK(nc_inq_varid(ogrp, varname, &rec_ovarids[ivar]));
01198         if(dimids)
01199             free(dimids);
01200     }
01201 
01202     /* for each record, copy all variable data */
01203     for(irec = 0; irec < nrecs; irec++) {
01204         for (ivar = 0; ivar < nrec_vars; ivar++) {
01205             int varid, ovarid;
01206             varid = rec_varids[ivar];
01207             ovarid = rec_ovarids[ivar];
01208             start[ivar][0] = irec;
01209             NC_CHECK(copy_rec_var_data(ncid, ogrp, irec, varid, ovarid, 
01210                                        start[ivar], count[ivar], buf[ivar]));
01211         }
01212     }
01213     for (ivar = 0; ivar < nrec_vars; ivar++) {
01214         if(start[ivar])
01215             free(start[ivar]);
01216         if(count[ivar])
01217             free(count[ivar]);
01218     }
01219     if(start)
01220         free(start);
01221     if(count)
01222         free(count);
01223     for (ivar = 0; ivar < nrec_vars; ivar++) {
01224         if(buf[ivar]) {
01225             free(buf[ivar]);
01226         }
01227     }
01228     if (rec_varids)
01229         free(rec_varids);
01230     if(buf)
01231         free(buf);
01232     if(rec_ovarids)
01233         free(rec_ovarids);
01234     return NC_NOERR;
01235 }
01236 
01237 /* copy infile to outfile using netCDF API
01238  */
01239 static int
01240 copy(char* infile, char* outfile)
01241 {
01242     int stat = NC_NOERR;
01243     int igrp, ogrp;
01244     int inkind, outkind;
01245     int open_mode = NC_NOWRITE;
01246     int create_mode = NC_CLOBBER;
01247     size_t ndims;
01248 
01249     if(option_read_diskless) {
01250         open_mode |= NC_DISKLESS;
01251     }
01252 
01253     NC_CHECK(nc_open(infile, open_mode, &igrp));
01254 
01255     NC_CHECK(nc_inq_format(igrp, &inkind));
01256 
01257 /* option_kind specifies which netCDF format for output: 
01258  *   -1 -> same as input, 
01259  *    1 -> classic
01260  *    2 -> 64-bit offset
01261  *    3 -> netCDF-4, 
01262  *    4 -> netCDF-4 classic model
01263  *
01264  * However, if compression or shuffling was specified and kind was -1,
01265  * kind is changed to format 4 that supports compression for input of
01266  * type 1 or 2.  
01267  */
01268     outkind = option_kind;
01269     if (option_kind == SAME_AS_INPUT) { /* default, kind not specified */
01270         outkind = inkind;
01271         /* Deduce output kind if netCDF-4 features requested */
01272         if (inkind == NC_FORMAT_CLASSIC || inkind == NC_FORMAT_64BIT) { 
01273             if (option_deflate_level > 0 || 
01274                 option_shuffle_vars == NC_SHUFFLE || 
01275                 option_chunkspec) 
01276             { 
01277                 outkind = NC_FORMAT_NETCDF4_CLASSIC;
01278             }
01279         }
01280     }
01281 
01282 #ifdef USE_NETCDF4
01283     if(option_chunkspec) {
01284         /* Now that input is open, can parse option_chunkspec into binary
01285          * structure. */
01286         NC_CHECK(chunkspec_parse(igrp, option_chunkspec));
01287     }
01288 #endif  /* USE_NETCDF4 */
01289 
01290     if(option_write_diskless)
01291         create_mode |= NC_WRITE | NC_DISKLESS; /* NC_WRITE persists diskless file on close */
01292     switch(outkind) {
01293     case NC_FORMAT_CLASSIC:
01294         /* nothing to do */
01295         break;
01296     case NC_FORMAT_64BIT:
01297         create_mode |= NC_64BIT_OFFSET;
01298         break;
01299 #ifdef USE_NETCDF4
01300     case NC_FORMAT_NETCDF4:
01301         create_mode |= NC_NETCDF4;
01302         break;
01303     case NC_FORMAT_NETCDF4_CLASSIC:
01304         create_mode |= NC_NETCDF4 | NC_CLASSIC_MODEL;
01305         break;
01306 #else
01307     case NC_FORMAT_NETCDF4:
01308     case NC_FORMAT_NETCDF4_CLASSIC:
01309         error("nccopy built with --disable-netcdf4, can't create netCDF-4 files");
01310         break;
01311 #endif  /* USE_NETCDF4 */
01312     default:
01313         error("bad value (%d) for -k option\n", option_kind);
01314         break;
01315     }
01316     NC_CHECK(nc_create(outfile, create_mode, &ogrp));
01317     NC_CHECK(nc_set_fill(ogrp, NC_NOFILL, NULL));
01318 
01319 #ifdef USE_NETCDF4
01320     /* Because types in one group may depend on types in a different
01321      * group, need to create all groups before defining types */
01322     if(inkind == NC_FORMAT_NETCDF4) {
01323         NC_CHECK(copy_groups(igrp, ogrp));
01324         NC_CHECK(copy_types(igrp, ogrp));
01325     }
01326 #endif  /* USE_NETCDF4 */
01327 
01328     ndims = count_dims(igrp);
01329     NC_CHECK(dimmap_init(ndims));
01330     NC_CHECK(copy_schema(igrp, ogrp));
01331     NC_CHECK(nc_enddef(ogrp));
01332 
01333     /* For performance, special case netCDF-3 input or output file with record
01334      * variables, to copy a record-at-a-time instead of a
01335      * variable-at-a-time. */
01336     if(nc3_special_case(igrp, inkind)) {
01337         size_t nfixed_vars, nrec_vars;
01338         int *fixed_varids;
01339         int *rec_varids;
01340         NC_CHECK(classify_vars(igrp, &nfixed_vars, &fixed_varids, &nrec_vars, &rec_varids));
01341         NC_CHECK(copy_fixed_size_data(igrp, ogrp, nfixed_vars, fixed_varids));
01342         NC_CHECK(copy_record_data(igrp, ogrp, nrec_vars, rec_varids));
01343     } else if (nc3_special_case(ogrp, outkind)) {
01344         size_t nfixed_vars, nrec_vars;
01345         int *fixed_varids;
01346         int *rec_varids;
01347         /* classifies output vars, but returns input varids */
01348         NC_CHECK(classify_vars(ogrp, &nfixed_vars, &fixed_varids, &nrec_vars, &rec_varids));
01349         NC_CHECK(copy_fixed_size_data(igrp, ogrp, nfixed_vars, fixed_varids));
01350         NC_CHECK(copy_record_data(igrp, ogrp, nrec_vars, rec_varids));
01351     } else {        
01352         NC_CHECK(copy_data(igrp, ogrp)); /* recursive, to handle nested groups */
01353     }
01354 
01355     NC_CHECK(nc_close(igrp));
01356     NC_CHECK(nc_close(ogrp));
01357     return stat;
01358 }
01359 
01360 static void
01361 usage(void)
01362 {
01363 #define USAGE   "\
01364   [-k n]    specify kind of netCDF format for output file, default same as input\n\
01365             1 classic, 2 64-bit offset, 3 netCDF-4, 4 netCDF-4 classic model\n\
01366   [-d n]    set deflation compression level, default same as input (0=none 9=max)\n\
01367   [-s]      add shuffle option to deflation compression\n\
01368   [-c chunkspec] specify chunking for dimensions, e.g. \"dim1/N1,dim2/N2,...\"\n\
01369   [-u]      convert unlimited dimensions to fixed-size dimensions in output copy\n\
01370   [-w]      write whole output file from diskless netCDF on close\n\
01371   [-m n]    set size in bytes of copy buffer, default is 5000000 bytes\n\
01372   [-h n]    set size in bytes of chunk_cache for chunked variables\n\
01373   [-e n]    set number of elements that chunk_cache can hold\n\
01374   [-r]      read whole input file into diskless file on open (classic or 64-bit offset format only)\n\
01375   infile    name of netCDF input file\n\
01376   outfile   name for netCDF output file\n"
01377 
01378     /* Don't document this flaky option until it works better */
01379     /* [-x]      use experimental computed estimates for variable-specific chunk caches\n\ */
01380 
01381     error("%s [-k n] [-d n] [-s] [-c chunkspec] [-u] [-w] [-m n] [-h n] [-e n] [-r] infile outfile\n%s",
01382           progname, USAGE);
01383 }
01384 
01634 int
01635 main(int argc, char**argv)
01636 {
01637     char* inputfile = NULL;
01638     char* outputfile = NULL;
01639     int c;
01640 
01641 /* table of formats for legal -k values */
01642     struct Kvalues {
01643         char* name;
01644         int kind;
01645     } legalkinds[] = {
01646         {"1", NC_FORMAT_CLASSIC},
01647         {"classic", NC_FORMAT_CLASSIC},
01648         
01649         /* The 64-bit offset kind (2) */
01650         {"2", NC_FORMAT_64BIT},
01651         {"64-bit-offset", NC_FORMAT_64BIT},
01652         {"64-bit offset", NC_FORMAT_64BIT},
01653         
01654         /* NetCDF-4 HDF5 format */
01655         {"3", NC_FORMAT_NETCDF4},
01656         {"hdf5", NC_FORMAT_NETCDF4},
01657         {"netCDF-4", NC_FORMAT_NETCDF4},
01658         {"netCDF4", NC_FORMAT_NETCDF4},
01659         {"enhanced", NC_FORMAT_NETCDF4},
01660 
01661         /* NetCDF-4 HDF5 format, but using only nc3 data model */
01662         {"4", NC_FORMAT_NETCDF4_CLASSIC},
01663         {"hdf5-nc3", NC_FORMAT_NETCDF4_CLASSIC},
01664         {"netCDF-4 classic model", NC_FORMAT_NETCDF4_CLASSIC},
01665         {"netCDF4_classic", NC_FORMAT_NETCDF4_CLASSIC},
01666         {"enhanced-nc3", NC_FORMAT_NETCDF4_CLASSIC},
01667 
01668         /* null terminate*/
01669         {NULL,0}
01670     };
01671 
01672     opterr = 1;
01673     progname = argv[0];
01674 
01675     if (argc <= 1)
01676     {
01677        usage();
01678     }
01679 
01680     while ((c = getopt(argc, argv, "k:d:sum:c:h:e:rwx")) != -1) {
01681         switch(c) {
01682         case 'k': /* for specifying variant of netCDF format to be generated 
01683                      Possible values are:
01684                      1 (=> classic 32 bit)
01685                      2 (=> classic 64 bit offsets)
01686                      3 (=> netCDF-4/HDF5)
01687                      4 (=> classic, but stored in netCDF-4/HDF5 format)
01688                      Also allow string versions of above
01689                      "classic"
01690                      "64-bit-offset"
01691                      "64-bit offset"
01692                      "enhanced" | "hdf5" | "netCDF-4"
01693                      "enhanced-nc3" | "hdf5-nc3" | "netCDF-4 classic model"
01694                    */
01695             {
01696                 struct Kvalues* kvalue;
01697                 char *kind_name = (char *) emalloc(strlen(optarg)+1);
01698                 (void)strcpy(kind_name, optarg);
01699                 for(kvalue=legalkinds;kvalue->name;kvalue++) {
01700                     if(strcmp(kind_name,kvalue->name) == 0) {
01701                         option_kind = kvalue->kind;
01702                         break;
01703                     }
01704                 }
01705                 if(kvalue->name == NULL) {
01706                     error("invalid format: %s", kind_name);
01707                 }
01708             }
01709             break;
01710         case 'd':               /* non-default compression level specified */
01711             option_deflate_level = strtol(optarg, NULL, 10);
01712             if(option_deflate_level < 0 || option_deflate_level > 9) {
01713                 error("invalid deflation level: %d", option_deflate_level);
01714             }
01715             break;
01716         case 's':               /* shuffling, may improve compression */
01717             option_shuffle_vars = NC_SHUFFLE;
01718             break;
01719         case 'u':               /* convert unlimited dimensions to fixed size */
01720             option_fix_unlimdims = 1;
01721             break;
01722         case 'm':               /* non-default size of data copy buffer */
01723         {
01724             double dval;
01725             char *suffix = 0;   /* "K" for kilobytes. "M" for megabytes, ... */
01726             dval = strtod(optarg, &suffix);
01727             if(*suffix) {
01728                 switch (*suffix) {
01729                 case 'k': case 'K':
01730                     dval *= 1000;
01731                     break;
01732                 case 'm': case 'M':
01733                     dval *= 1000000;
01734                     break;
01735                 case 'g': case 'G':
01736                     dval *= 1000000000;
01737                     break;
01738                 case 't': case 'T':
01739                     dval *= 1.0e12;
01740                     break;
01741                 default:
01742                     error("If suffix used for '-m' option value, it must be K, M, G, or T: %c", 
01743                           *suffix);
01744                 }               
01745             }
01746             option_copy_buffer_size = dval;
01747             break;
01748         }
01749         case 'h':               /* non-default size of chunk cache */
01750         {
01751             double dval;
01752             char *suffix = 0;   /* "K" for kilobytes, "M" for megabytes, ... */
01753             dval = strtod(optarg, &suffix);
01754             if(*suffix) {
01755                 switch (*suffix) {
01756                 case 'k': case 'K':
01757                     dval *= 1000;
01758                     break;
01759                 case 'm': case 'M':
01760                     dval *= 1000000;
01761                     break;
01762                 case 'g': case 'G':
01763                     dval *= 1000000000;
01764                     break;
01765                 case 't': case 'T':
01766                     dval *= 1.0e12;
01767                     break;
01768                 default:
01769                     error("If suffix used for '-h' option value, it must be K, M, G, or T: %c", 
01770                           *suffix);
01771                 }               
01772             }
01773             option_chunk_cache_size = dval;
01774             break;
01775             }
01776         case 'e':               /* number of elements chunk cache can hold */
01777             option_chunk_cache_nelems = strtol(optarg, NULL, 10);
01778             if(option_chunk_cache_nelems <= 0) {
01779                 error("invalid value for number of chunk cache elements: %d", option_chunk_cache_nelems);
01780             }
01781             break;
01782         case 'r':
01783             option_read_diskless = 1; /* read into memory on open */
01784             break;
01785         case 'w':
01786             option_write_diskless = 1; /* write to memory, persist on close */
01787             break;
01788         case 'x':               /* use experimental variable-specific chunk caches */
01789             option_compute_chunkcaches = 1;
01790             break;
01791         case 'c':               /* optional chunking spec for each dimension in list */
01792         {
01793             /* save chunkspec string for parsing later, once we know input ncid */
01794             option_chunkspec = strdup(optarg);
01795             break;
01796         }
01797         default: 
01798             usage();
01799         }
01800     }
01801     argc -= optind;
01802     argv += optind;
01803 
01804     if (argc != 2) {
01805         error("one input file and one output file required");
01806     }
01807     inputfile = argv[0];
01808     outputfile = argv[1];
01809 
01810     if(strcmp(inputfile, outputfile) == 0) {
01811         error("output would overwrite input");
01812     }
01813 
01814     if(copy(inputfile, outputfile) != NC_NOERR)
01815         exit(1);
01816     return 0;
01817 }
01818 END_OF_MAIN();
 All Data Structures Files Functions Variables Typedefs Defines

Generated on Tue Aug 6 2013 11:40:56 for netCDF. NetCDF is a Unidata library.