Main Page   Class Hierarchy   Compound List   File List   Compound Members   File Members   Related Pages  

cpl_csv.cpp

00001 /******************************************************************************
00002  * $Id: cpl_csv_cpp-source.html,v 1.11 2002/12/21 19:13:12 warmerda Exp $
00003  *
00004  * Project:  CPL - Common Portability Library
00005  * Purpose:  CSV (comma separated value) file access.
00006  * Author:   Frank Warmerdam, warmerda@home.com
00007  *
00008  ******************************************************************************
00009  * Copyright (c) 1999, Frank Warmerdam
00010  *
00011  * Permission is hereby granted, free of charge, to any person obtaining a
00012  * copy of this software and associated documentation files (the "Software"),
00013  * to deal in the Software without restriction, including without limitation
00014  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
00015  * and/or sell copies of the Software, and to permit persons to whom the
00016  * Software is furnished to do so, subject to the following conditions:
00017  *
00018  * The above copyright notice and this permission notice shall be included
00019  * in all copies or substantial portions of the Software.
00020  *
00021  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00022  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00023  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
00024  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00025  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
00026  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
00027  * DEALINGS IN THE SOFTWARE.
00028  ******************************************************************************
00029  *
00030  * $Log: cpl_csv_cpp-source.html,v $
00030  * Revision 1.11  2002/12/21 19:13:12  warmerda
00030  * updated
00030  *
00031  * Revision 1.6  2002/11/30 16:56:31  warmerda
00032  * fixed up to support quoted newlines properly
00033  *
00034  * Revision 1.5  2002/11/27 19:09:40  warmerda
00035  * implement in-memory caching of whole CSV file
00036  *
00037  * Revision 1.4  2002/09/04 06:16:32  warmerda
00038  * added CPLReadLine(NULL) to cleanup
00039  *
00040  * Revision 1.3  2001/07/18 04:00:49  warmerda
00041  * added CPL_CVSID
00042  *
00043  * Revision 1.2  2001/01/19 21:16:41  warmerda
00044  * expanded tabs
00045  *
00046  * Revision 1.1  2000/10/06 15:20:45  warmerda
00047  * New
00048  *
00049  * Revision 1.2  2000/08/29 21:08:08  warmerda
00050  * fallback to use CPLFindFile()
00051  *
00052  * Revision 1.1  2000/04/05 21:55:59  warmerda
00053  * New
00054  *
00055  */
00056 
00057 #include "cpl_csv.h"
00058 #include "cpl_conv.h"
00059 
00060 CPL_CVSID("$Id: cpl_csv_cpp-source.html,v 1.11 2002/12/21 19:13:12 warmerda Exp $");
00061 
00062 /* ==================================================================== */
00063 /*      The CSVTable is a persistant set of info about an open CSV      */
00064 /*      table.  While it doesn't currently maintain a record index,     */
00065 /*      or in-memory copy of the table, it could be changed to do so    */
00066 /*      in the future.                                                  */
00067 /* ==================================================================== */
00068 typedef struct ctb {
00069     FILE        *fp;
00070 
00071     struct ctb *psNext;
00072 
00073     char        *pszFilename;
00074 
00075     char        **papszFieldNames;
00076 
00077     char        **papszRecFields;
00078 
00079     int         iLastLine;
00080 
00081     /* Cache for whole file */
00082     int         nLineCount;
00083     char        **papszLines;
00084     int         *panLineIndex;
00085     char        *pszRawData;
00086 } CSVTable;
00087 
00088 static CSVTable *psCSVTableList = NULL;
00089 
00090 /************************************************************************/
00091 /*                             CSVAccess()                              */
00092 /*                                                                      */
00093 /*      This function will fetch a handle to the requested table.       */
00094 /*      If not found in the ``open table list'' the table will be       */
00095 /*      opened and added to the list.  Eventually this function may     */
00096 /*      become public with an abstracted return type so that            */
00097 /*      applications can set options about the table.  For now this     */
00098 /*      isn't done.                                                     */
00099 /************************************************************************/
00100 
00101 static CSVTable *CSVAccess( const char * pszFilename )
00102 
00103 {
00104     CSVTable    *psTable;
00105     FILE        *fp;
00106 
00107 /* -------------------------------------------------------------------- */
00108 /*      Is the table already in the list.                               */
00109 /* -------------------------------------------------------------------- */
00110     for( psTable = psCSVTableList; psTable != NULL; psTable = psTable->psNext )
00111     {
00112         if( EQUAL(psTable->pszFilename,pszFilename) )
00113         {
00114             /*
00115              * Eventually we should consider promoting to the front of
00116              * the list to accelerate frequently accessed tables.
00117              */
00118             
00119             return( psTable );
00120         }
00121     }
00122 
00123 /* -------------------------------------------------------------------- */
00124 /*      If not, try to open it.                                         */
00125 /* -------------------------------------------------------------------- */
00126     fp = VSIFOpen( pszFilename, "rb" );
00127     if( fp == NULL )
00128         return NULL;
00129 
00130 /* -------------------------------------------------------------------- */
00131 /*      Create an information structure about this table, and add to    */
00132 /*      the front of the list.                                          */
00133 /* -------------------------------------------------------------------- */
00134     psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1);
00135 
00136     psTable->fp = fp;
00137     psTable->pszFilename = CPLStrdup( pszFilename );
00138     psTable->psNext = psCSVTableList;
00139     
00140     psCSVTableList = psTable;
00141 
00142 /* -------------------------------------------------------------------- */
00143 /*      Read the table header record containing the field names.        */
00144 /* -------------------------------------------------------------------- */
00145     psTable->papszFieldNames = CSVReadParseLine( fp );
00146 
00147     return( psTable );
00148 }
00149 
00150 /************************************************************************/
00151 /*                            CSVDeaccess()                             */
00152 /************************************************************************/
00153 
00154 void CSVDeaccess( const char * pszFilename )
00155 
00156 {
00157     CSVTable    *psLast, *psTable;
00158     
00159 /* -------------------------------------------------------------------- */
00160 /*      A NULL means deaccess all tables.                               */
00161 /* -------------------------------------------------------------------- */
00162     if( pszFilename == NULL )
00163     {
00164         while( psCSVTableList != NULL )
00165             CSVDeaccess( psCSVTableList->pszFilename );
00166         
00167         return;
00168     }
00169 
00170 /* -------------------------------------------------------------------- */
00171 /*      Find this table.                                                */
00172 /* -------------------------------------------------------------------- */
00173     psLast = NULL;
00174     for( psTable = psCSVTableList;
00175          psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename);
00176          psTable = psTable->psNext )
00177     {
00178         psLast = psTable;
00179     }
00180 
00181     if( psTable == NULL )
00182     {
00183         CPLDebug( "CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename );
00184         return;
00185     }
00186 
00187 /* -------------------------------------------------------------------- */
00188 /*      Remove the link from the list.                                  */
00189 /* -------------------------------------------------------------------- */
00190     if( psLast != NULL )
00191         psLast->psNext = psTable->psNext;
00192     else
00193         psCSVTableList = psTable->psNext;
00194 
00195 /* -------------------------------------------------------------------- */
00196 /*      Free the table.                                                 */
00197 /* -------------------------------------------------------------------- */
00198     if( psTable->fp != NULL )
00199         VSIFClose( psTable->fp );
00200 
00201     CSLDestroy( psTable->papszFieldNames );
00202     CSLDestroy( psTable->papszRecFields );
00203     CPLFree( psTable->pszFilename );
00204     CPLFree( psTable->panLineIndex );
00205     CPLFree( psTable->pszRawData );
00206     CPLFree( psTable->papszLines );
00207 
00208     CPLFree( psTable );
00209 
00210     CPLReadLine( NULL );
00211 }
00212 
00213 /************************************************************************/
00214 /*                            CSVSplitLine()                            */
00215 /*                                                                      */
00216 /*      Tokenize a CSV line into fields in the form of a string         */
00217 /*      list.  This is used instead of the CPLTokenizeString()          */
00218 /*      because it provides correct CSV escaping and quoting            */
00219 /*      semantics.                                                      */
00220 /************************************************************************/
00221 
00222 static char **CSVSplitLine( const char *pszString )
00223 
00224 {
00225     char        **papszRetList = NULL;
00226     char        *pszToken;
00227     int         nTokenMax, nTokenLen;
00228 
00229     pszToken = (char *) CPLCalloc(10,1);
00230     nTokenMax = 10;
00231     
00232     while( pszString != NULL && *pszString != '\0' )
00233     {
00234         int     bInString = FALSE;
00235 
00236         nTokenLen = 0;
00237         
00238         /* Try to find the next delimeter, marking end of token */
00239         for( ; *pszString != '\0'; pszString++ )
00240         {
00241 
00242             /* End if this is a delimeter skip it and break. */
00243             if( !bInString && *pszString == ',' )
00244             {
00245                 pszString++;
00246                 break;
00247             }
00248             
00249             if( *pszString == '"' )
00250             {
00251                 if( !bInString || pszString[1] != '"' )
00252                 {
00253                     bInString = !bInString;
00254                     continue;
00255                 }
00256                 else  /* doubled quotes in string resolve to one quote */
00257                 {
00258                     pszString++;
00259                 }
00260             }
00261 
00262             if( nTokenLen >= nTokenMax-2 )
00263             {
00264                 nTokenMax = nTokenMax * 2 + 10;
00265                 pszToken = (char *) CPLRealloc( pszToken, nTokenMax );
00266             }
00267 
00268             pszToken[nTokenLen] = *pszString;
00269             nTokenLen++;
00270         }
00271 
00272         pszToken[nTokenLen] = '\0';
00273         papszRetList = CSLAddString( papszRetList, pszToken );
00274 
00275         /* If the last token is an empty token, then we have to catch
00276          * it now, otherwise we won't reenter the loop and it will be lost. 
00277          */
00278         if ( *pszString == '\0' && *(pszString-1) == ',' )
00279         {
00280             papszRetList = CSLAddString( papszRetList, "" );
00281         }
00282     }
00283 
00284     if( papszRetList == NULL )
00285         papszRetList = (char **) CPLCalloc(sizeof(char *),1);
00286 
00287     CPLFree( pszToken );
00288 
00289     return papszRetList;
00290 }
00291 
00292 /************************************************************************/
00293 /*                          CSVFindNextLine()                           */
00294 /*                                                                      */
00295 /*      Find the start of the next line, while at the same time zero    */
00296 /*      terminating this line.  Take into account that there may be     */
00297 /*      newline indicators within quoted strings, and that quotes       */
00298 /*      can be escaped with a backslash.                                */
00299 /************************************************************************/
00300 
00301 static char *CSVFindNextLine( char *pszThisLine )
00302 
00303 {
00304     int  nQuoteCount = 0, i;
00305 
00306     for( i = 0; pszThisLine[i] != '\0'; i++ )
00307     {
00308         if( pszThisLine[i] == '\"'
00309             && (i == 0 || pszThisLine[i-1] != '\\') )
00310             nQuoteCount++;
00311 
00312         if( (pszThisLine[i] == 10 || pszThisLine[i] == 13)
00313             && (nQuoteCount % 2) == 0 )
00314             break;
00315     }
00316 
00317     while( pszThisLine[i] == 10 || pszThisLine[i] == 13 )
00318         pszThisLine[i++] = '\0';
00319 
00320     if( pszThisLine[i] == '\0' )
00321         return NULL;
00322     else
00323         return pszThisLine + i;
00324 }
00325 
00326 /************************************************************************/
00327 /*                             CSVIngest()                              */
00328 /*                                                                      */
00329 /*      Load entire file into memory and setup index if possible.       */
00330 /************************************************************************/
00331 
00332 static void CSVIngest( const char *pszFilename )
00333 
00334 {
00335     CSVTable *psTable = CSVAccess( pszFilename );
00336     int       nFileLen, i, nMaxLineCount, iLine = 0;
00337     char *pszThisLine;
00338 
00339     if( psTable->pszRawData != NULL )
00340         return;
00341 
00342 /* -------------------------------------------------------------------- */
00343 /*      Ingest whole file.                                              */
00344 /* -------------------------------------------------------------------- */
00345     VSIFSeek( psTable->fp, 0, SEEK_END );
00346     nFileLen = VSIFTell( psTable->fp );
00347     VSIRewind( psTable->fp );
00348 
00349     psTable->pszRawData = (char *) CPLMalloc(nFileLen+1);
00350     if( (int) VSIFRead( psTable->pszRawData, 1, nFileLen, psTable->fp ) 
00351         != nFileLen )
00352     {
00353         CPLFree( psTable->pszRawData );
00354         psTable->pszRawData = NULL;
00355 
00356         CPLError( CE_Failure, CPLE_FileIO, "Read of file %s failed.", 
00357                   psTable->pszFilename );
00358         return;
00359     }
00360 
00361     psTable->pszRawData[nFileLen] = '\0';
00362 
00363 /* -------------------------------------------------------------------- */
00364 /*      Get count of newlines so we can allocate line array.            */
00365 /* -------------------------------------------------------------------- */
00366     nMaxLineCount = 0;
00367     for( i = 0; i < nFileLen; i++ )
00368     {
00369         if( psTable->pszRawData[i] == 10 )
00370             nMaxLineCount++;
00371     }
00372 
00373     psTable->papszLines = (char **) CPLCalloc(sizeof(char*),nMaxLineCount);
00374     
00375 /* -------------------------------------------------------------------- */
00376 /*      Build a list of record pointers into the raw data buffer        */
00377 /*      based on line terminators.  Zero terminate the line             */
00378 /*      strings.                                                        */
00379 /* -------------------------------------------------------------------- */
00380     /* skip header line */
00381     pszThisLine = CSVFindNextLine( psTable->pszRawData );
00382 
00383     while( pszThisLine != NULL && iLine < nMaxLineCount )
00384     {
00385         psTable->papszLines[iLine++] = pszThisLine;
00386         pszThisLine = CSVFindNextLine( pszThisLine );
00387     }
00388 
00389     psTable->nLineCount = iLine;
00390 
00391 /* -------------------------------------------------------------------- */
00392 /*      Allocate and populate index array.  Ensure they are in          */
00393 /*      ascending order so that binary searches can be done on the      */
00394 /*      array.                                                          */
00395 /* -------------------------------------------------------------------- */
00396     psTable->panLineIndex = (int *) CPLMalloc(sizeof(int)*psTable->nLineCount);
00397     for( i = 0; i < psTable->nLineCount; i++ )
00398     {
00399         psTable->panLineIndex[i] = atoi(psTable->papszLines[i]);
00400 
00401         if( i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i-1] )
00402         {
00403             CPLFree( psTable->panLineIndex );
00404             psTable->panLineIndex = NULL;
00405             break;
00406         }
00407     }
00408 
00409     psTable->iLastLine = -1;
00410 
00411 /* -------------------------------------------------------------------- */
00412 /*      We should never need the file handle against, so close it.      */
00413 /* -------------------------------------------------------------------- */
00414     VSIFClose( psTable->fp );
00415     psTable->fp = NULL;
00416 }
00417 
00418 /************************************************************************/
00419 /*                          CSVReadParseLine()                          */
00420 /*                                                                      */
00421 /*      Read one line, and return split into fields.  The return        */
00422 /*      result is a stringlist, in the sense of the CSL functions.      */
00423 /************************************************************************/
00424 
00425 char **CSVReadParseLine( FILE * fp )
00426 
00427 {
00428     const char  *pszLine;
00429     char        *pszWorkLine;
00430     char        **papszReturn;
00431 
00432     CPLAssert( fp != NULL );
00433     if( fp == NULL )
00434         return( NULL );
00435     
00436     pszLine = CPLReadLine( fp );
00437     if( pszLine == NULL )
00438         return( NULL );
00439 
00440 /* -------------------------------------------------------------------- */
00441 /*      If there are no quotes, then this is the simple case.           */
00442 /*      Parse, and return tokens.                                       */
00443 /* -------------------------------------------------------------------- */
00444     if( strchr(pszLine,'\"') == NULL )
00445         return CSVSplitLine( pszLine );
00446 
00447 /* -------------------------------------------------------------------- */
00448 /*      We must now count the quotes in our working string, and as      */
00449 /*      long as it is odd, keep adding new lines.                       */
00450 /* -------------------------------------------------------------------- */
00451     pszWorkLine = CPLStrdup( pszLine );
00452 
00453     while( TRUE )
00454     {
00455         int             i, nCount = 0;
00456 
00457         for( i = 0; pszWorkLine[i] != '\0'; i++ )
00458         {
00459             if( pszWorkLine[i] == '\"'
00460                 && (i == 0 || pszWorkLine[i-1] != '\\') )
00461                 nCount++;
00462         }
00463 
00464         if( nCount % 2 == 0 )
00465             break;
00466 
00467         pszLine = CPLReadLine( fp );
00468         if( pszLine == NULL )
00469             break;
00470 
00471         pszWorkLine = (char *)
00472             CPLRealloc(pszWorkLine,
00473                        strlen(pszWorkLine) + strlen(pszLine) + 1);
00474         strcat( pszWorkLine, pszLine );
00475     }
00476     
00477     papszReturn = CSVSplitLine( pszWorkLine );
00478 
00479     CPLFree( pszWorkLine );
00480 
00481     return papszReturn;
00482 }
00483 
00484 /************************************************************************/
00485 /*                             CSVCompare()                             */
00486 /*                                                                      */
00487 /*      Compare a field to a search value using a particular            */
00488 /*      criteria.                                                       */
00489 /************************************************************************/
00490 
00491 static int CSVCompare( const char * pszFieldValue, const char * pszTarget,
00492                        CSVCompareCriteria eCriteria )
00493 
00494 {
00495     if( eCriteria == CC_ExactString )
00496     {
00497         return( strcmp( pszFieldValue, pszTarget ) == 0 );
00498     }
00499     else if( eCriteria == CC_ApproxString )
00500     {
00501         return( EQUAL( pszFieldValue, pszTarget ) );
00502     }
00503     else if( eCriteria == CC_Integer )
00504     {
00505         return( atoi(pszFieldValue) == atoi(pszTarget) );
00506     }
00507 
00508     return FALSE;
00509 }
00510 
00511 /************************************************************************/
00512 /*                            CSVScanLines()                            */
00513 /*                                                                      */
00514 /*      Read the file scanline for lines where the key field equals     */
00515 /*      the indicated value with the suggested comparison criteria.     */
00516 /*      Return the first matching line split into fields.               */
00517 /************************************************************************/
00518 
00519 char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue,
00520                      CSVCompareCriteria eCriteria )
00521 
00522 {
00523     char        **papszFields = NULL;
00524     int         bSelected = FALSE, nTestValue;
00525 
00526     CPLAssert( pszValue != NULL );
00527     CPLAssert( iKeyField >= 0 );
00528     CPLAssert( fp != NULL );
00529     
00530     nTestValue = atoi(pszValue);
00531     
00532     while( !bSelected ) {
00533         papszFields = CSVReadParseLine( fp );
00534         if( papszFields == NULL )
00535             return( NULL );
00536 
00537         if( CSLCount( papszFields ) < iKeyField+1 )
00538         {
00539             /* not selected */
00540         }
00541         else if( eCriteria == CC_Integer
00542                  && atoi(papszFields[iKeyField]) == nTestValue )
00543         {
00544             bSelected = TRUE;
00545         }
00546         else
00547         {
00548             bSelected = CSVCompare( papszFields[iKeyField], pszValue,
00549                                     eCriteria );
00550         }
00551 
00552         if( !bSelected )
00553         {
00554             CSLDestroy( papszFields );
00555             papszFields = NULL;
00556         }
00557     }
00558     
00559     return( papszFields );
00560 }
00561 
00562 /************************************************************************/
00563 /*                        CSVScanLinesIndexed()                         */
00564 /*                                                                      */
00565 /*      Read the file scanline for lines where the key field equals     */
00566 /*      the indicated value with the suggested comparison criteria.     */
00567 /*      Return the first matching line split into fields.               */
00568 /************************************************************************/
00569 
00570 static char **
00571 CSVScanLinesIndexed( CSVTable *psTable, int nKeyValue )
00572 
00573 {
00574     int         iTop, iBottom, iMiddle, iResult = -1;
00575 
00576     CPLAssert( psTable->panLineIndex != NULL );
00577 
00578 /* -------------------------------------------------------------------- */
00579 /*      Find target record with binary search.                          */
00580 /* -------------------------------------------------------------------- */
00581     iTop = psTable->nLineCount-1;
00582     iBottom = 0;
00583 
00584     while( iTop >= iBottom )
00585     {
00586         iMiddle = (iTop + iBottom) / 2;
00587         if( psTable->panLineIndex[iMiddle] > nKeyValue )
00588             iTop = iMiddle - 1;
00589         else if( psTable->panLineIndex[iMiddle] < nKeyValue )
00590             iBottom = iMiddle + 1;
00591         else
00592         {
00593             iResult = iMiddle;
00594             break;
00595         }
00596     }
00597 
00598     if( iResult == -1 )
00599         return NULL;
00600 
00601 /* -------------------------------------------------------------------- */
00602 /*      Parse target line, and update iLastLine indicator.              */
00603 /* -------------------------------------------------------------------- */
00604     psTable->iLastLine = iResult;
00605     
00606     return CSVSplitLine( psTable->papszLines[iResult] );
00607 }
00608 
00609 /************************************************************************/
00610 /*                        CSVScanLinesIngested()                        */
00611 /*                                                                      */
00612 /*      Read the file scanline for lines where the key field equals     */
00613 /*      the indicated value with the suggested comparison criteria.     */
00614 /*      Return the first matching line split into fields.               */
00615 /************************************************************************/
00616 
00617 static char **
00618 CSVScanLinesIngested( CSVTable *psTable, int iKeyField, const char * pszValue,
00619                       CSVCompareCriteria eCriteria )
00620 
00621 {
00622     char        **papszFields = NULL;
00623     int         bSelected = FALSE, nTestValue;
00624 
00625     CPLAssert( pszValue != NULL );
00626     CPLAssert( iKeyField >= 0 );
00627 
00628     nTestValue = atoi(pszValue);
00629     
00630 /* -------------------------------------------------------------------- */
00631 /*      Short cut for indexed files.                                    */
00632 /* -------------------------------------------------------------------- */
00633     if( iKeyField == 0 && eCriteria == CC_Integer 
00634         && psTable->panLineIndex != NULL )
00635         return CSVScanLinesIndexed( psTable, nTestValue );
00636     
00637 /* -------------------------------------------------------------------- */
00638 /*      Scan from in-core lines.                                        */
00639 /* -------------------------------------------------------------------- */
00640     while( !bSelected && psTable->iLastLine+1 < psTable->nLineCount ) {
00641         psTable->iLastLine++;
00642         papszFields = CSVSplitLine( psTable->papszLines[psTable->iLastLine] );
00643 
00644         if( CSLCount( papszFields ) < iKeyField+1 )
00645         {
00646             /* not selected */
00647         }
00648         else if( eCriteria == CC_Integer
00649                  && atoi(papszFields[iKeyField]) == nTestValue )
00650         {
00651             bSelected = TRUE;
00652         }
00653         else
00654         {
00655             bSelected = CSVCompare( papszFields[iKeyField], pszValue,
00656                                     eCriteria );
00657         }
00658 
00659         if( !bSelected )
00660         {
00661             CSLDestroy( papszFields );
00662             papszFields = NULL;
00663         }
00664     }
00665     
00666     return( papszFields );
00667 }
00668 
00669 /************************************************************************/
00670 /*                            CSVScanFile()                             */
00671 /*                                                                      */
00672 /*      Scan a whole file using criteria similar to above, but also     */
00673 /*      taking care of file opening and closing.                        */
00674 /************************************************************************/
00675 
00676 char **CSVScanFile( const char * pszFilename, int iKeyField,
00677                     const char * pszValue, CSVCompareCriteria eCriteria )
00678 
00679 {
00680     CSVTable    *psTable;
00681 
00682 /* -------------------------------------------------------------------- */
00683 /*      Get access to the table.                                        */
00684 /* -------------------------------------------------------------------- */
00685     CPLAssert( pszFilename != NULL );
00686 
00687     if( iKeyField < 0 )
00688         return NULL;
00689 
00690     psTable = CSVAccess( pszFilename );
00691     if( psTable == NULL )
00692         return NULL;
00693     
00694     CSVIngest( pszFilename );
00695 
00696 /* -------------------------------------------------------------------- */
00697 /*      Does the current record match the criteria?  If so, just        */
00698 /*      return it again.                                                */
00699 /* -------------------------------------------------------------------- */
00700     if( iKeyField >= 0
00701         && iKeyField < CSLCount(psTable->papszRecFields)
00702         && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria) )
00703     {
00704         return psTable->papszRecFields;
00705     }
00706 
00707 /* -------------------------------------------------------------------- */
00708 /*      Scan the file from the beginning, replacing the ``current       */
00709 /*      record'' in our structure with the one that is found.           */
00710 /* -------------------------------------------------------------------- */
00711     psTable->iLastLine = -1;
00712     CSLDestroy( psTable->papszRecFields );
00713 
00714     if( psTable->pszRawData != NULL )
00715         psTable->papszRecFields = 
00716             CSVScanLinesIngested( psTable, iKeyField, pszValue, eCriteria );
00717     else
00718     {
00719         VSIRewind( psTable->fp );
00720         CPLReadLine( psTable->fp );         /* throw away the header line */
00721     
00722         psTable->papszRecFields =
00723             CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria );
00724     }
00725 
00726     return( psTable->papszRecFields );
00727 }
00728 
00729 /************************************************************************/
00730 /*                           CPLGetFieldId()                            */
00731 /*                                                                      */
00732 /*      Read the first record of a CSV file (rewinding to be sure),     */
00733 /*      and find the field with the indicated name.  Returns -1 if      */
00734 /*      it fails to find the field name.  Comparison is case            */
00735 /*      insensitive, but otherwise exact.  After this function has      */
00736 /*      been called the file pointer will be positioned just after      */
00737 /*      the first record.                                               */
00738 /************************************************************************/
00739 
00740 int CSVGetFieldId( FILE * fp, const char * pszFieldName )
00741 
00742 {
00743     char        **papszFields;
00744     int         i;
00745     
00746     CPLAssert( fp != NULL && pszFieldName != NULL );
00747 
00748     VSIRewind( fp );
00749 
00750     papszFields = CSVReadParseLine( fp );
00751     for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ )
00752     {
00753         if( EQUAL(papszFields[i],pszFieldName) )
00754         {
00755             CSLDestroy( papszFields );
00756             return i;
00757         }
00758     }
00759 
00760     CSLDestroy( papszFields );
00761 
00762     return -1;
00763 }
00764 
00765 /************************************************************************/
00766 /*                         CSVGetFileFieldId()                          */
00767 /*                                                                      */
00768 /*      Same as CPLGetFieldId(), except that we get the file based      */
00769 /*      on filename, rather than having an existing handle.             */
00770 /************************************************************************/
00771 
00772 int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName )
00773 
00774 {
00775     CSVTable    *psTable;
00776     int         i;
00777     
00778 /* -------------------------------------------------------------------- */
00779 /*      Get access to the table.                                        */
00780 /* -------------------------------------------------------------------- */
00781     CPLAssert( pszFilename != NULL );
00782 
00783     psTable = CSVAccess( pszFilename );
00784     if( psTable == NULL )
00785         return -1;
00786 
00787 /* -------------------------------------------------------------------- */
00788 /*      Find the requested field.                                       */
00789 /* -------------------------------------------------------------------- */
00790     for( i = 0;
00791          psTable->papszFieldNames != NULL
00792              && psTable->papszFieldNames[i] != NULL;
00793          i++ )
00794     {
00795         if( EQUAL(psTable->papszFieldNames[i],pszFieldName) )
00796         {
00797             return i;
00798         }
00799     }
00800 
00801     return -1;
00802 }
00803 
00804 
00805 /************************************************************************/
00806 /*                         CSVScanFileByName()                          */
00807 /*                                                                      */
00808 /*      Same as CSVScanFile(), but using a field name instead of a      */
00809 /*      field number.                                                   */
00810 /************************************************************************/
00811 
00812 char **CSVScanFileByName( const char * pszFilename,
00813                           const char * pszKeyFieldName,
00814                           const char * pszValue, CSVCompareCriteria eCriteria )
00815 
00816 {
00817     int         iKeyField;
00818 
00819     iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName );
00820     if( iKeyField == -1 )
00821         return NULL;
00822 
00823     return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) );
00824 }
00825 
00826 /************************************************************************/
00827 /*                            CSVGetField()                             */
00828 /*                                                                      */
00829 /*      The all-in-one function to fetch a particular field value       */
00830 /*      from a CSV file.  Note this function will return an empty       */
00831 /*      string, rather than NULL if it fails to find the desired        */
00832 /*      value for some reason.  The caller can't establish that the     */
00833 /*      fetch failed.                                                   */
00834 /************************************************************************/
00835 
00836 const char *CSVGetField( const char * pszFilename,
00837                          const char * pszKeyFieldName,
00838                          const char * pszKeyFieldValue,
00839                          CSVCompareCriteria eCriteria,
00840                          const char * pszTargetField )
00841 
00842 {
00843     CSVTable    *psTable;
00844     char        **papszRecord;
00845     int         iTargetField;
00846     
00847 /* -------------------------------------------------------------------- */
00848 /*      Find the table.                                                 */
00849 /* -------------------------------------------------------------------- */
00850     psTable = CSVAccess( pszFilename );
00851     if( psTable == NULL )
00852         return "";
00853 
00854 /* -------------------------------------------------------------------- */
00855 /*      Find the correct record.                                        */
00856 /* -------------------------------------------------------------------- */
00857     papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName,
00858                                      pszKeyFieldValue, eCriteria );
00859 
00860     if( papszRecord == NULL )
00861         return "";
00862 
00863 /* -------------------------------------------------------------------- */
00864 /*      Figure out which field we want out of this.                     */
00865 /* -------------------------------------------------------------------- */
00866     iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField );
00867     if( iTargetField < 0 )
00868         return "";
00869 
00870     if( iTargetField >= CSLCount( papszRecord ) )
00871         return "";
00872 
00873     return( papszRecord[iTargetField] );
00874 }
00875 
00876 /************************************************************************/
00877 /*                            CSVFilename()                             */
00878 /*                                                                      */
00879 /*      Return the full path to a particular CSV file.  This will       */
00880 /*      eventually be something the application can override.           */
00881 /************************************************************************/
00882 
00883 static const char *(*pfnCSVFilenameHook)(const char *) = NULL;
00884 
00885 const char * CSVFilename( const char *pszBasename )
00886 
00887 {
00888     static char         szPath[512];
00889 
00890     if( pfnCSVFilenameHook == NULL )
00891     {
00892         FILE    *fp = NULL;
00893         const char *pszResult = CPLFindFile( "epsg_csv", pszBasename );
00894 
00895         if( pszResult != NULL )
00896             return pszResult;
00897 
00898         if( getenv("GEOTIFF_CSV") != NULL )
00899         {
00900             sprintf( szPath, "%s/%s", getenv("GEOTIFF_CSV"), pszBasename );
00901         }
00902         else if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL )
00903         {
00904             sprintf( szPath, "csv/%s", pszBasename );
00905         }
00906         else
00907         {
00908             sprintf( szPath, "/usr/local/share/epsg_csv/%s", pszBasename );
00909         }
00910 
00911         if( fp != NULL )
00912             fclose( fp );
00913         
00914         return( szPath );
00915     }
00916     else
00917         return( pfnCSVFilenameHook( pszBasename ) );
00918 }
00919 
00920 /************************************************************************/
00921 /*                         SetCSVFilenameHook()                         */
00922 /*                                                                      */
00923 /*      Applications can use this to set a function that will           */
00924 /*      massage CSV filenames.                                          */
00925 /************************************************************************/
00926 
00971 void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) )
00972 
00973 {
00974     pfnCSVFilenameHook = pfnNewHook;
00975 }

Generated at Sat Dec 21 14:01:57 2002 for GDAL by doxygen1.2.3-20001105 written by Dimitri van Heesch, © 1997-2000