00001 /****************************************************************************** 00002 * $Id: cpl_csv_cpp-source.html,v 1.11 2002/12/21 19:13:12 warmerda Exp $ 00003 * 00004 * Project: CPL - Common Portability Library 00005 * Purpose: CSV (comma separated value) file access. 00006 * Author: Frank Warmerdam, warmerda@home.com 00007 * 00008 ****************************************************************************** 00009 * Copyright (c) 1999, Frank Warmerdam 00010 * 00011 * Permission is hereby granted, free of charge, to any person obtaining a 00012 * copy of this software and associated documentation files (the "Software"), 00013 * to deal in the Software without restriction, including without limitation 00014 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 00015 * and/or sell copies of the Software, and to permit persons to whom the 00016 * Software is furnished to do so, subject to the following conditions: 00017 * 00018 * The above copyright notice and this permission notice shall be included 00019 * in all copies or substantial portions of the Software. 00020 * 00021 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 00022 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 00023 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 00024 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 00025 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 00026 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 00027 * DEALINGS IN THE SOFTWARE. 00028 ****************************************************************************** 00029 * 00030 * $Log: cpl_csv_cpp-source.html,v $ 00030 * Revision 1.11 2002/12/21 19:13:12 warmerda 00030 * updated 00030 * 00031 * Revision 1.6 2002/11/30 16:56:31 warmerda 00032 * fixed up to support quoted newlines properly 00033 * 00034 * Revision 1.5 2002/11/27 19:09:40 warmerda 00035 * implement in-memory caching of whole CSV file 00036 * 00037 * Revision 1.4 2002/09/04 06:16:32 warmerda 00038 * added CPLReadLine(NULL) to cleanup 00039 * 00040 * Revision 1.3 2001/07/18 04:00:49 warmerda 00041 * added CPL_CVSID 00042 * 00043 * Revision 1.2 2001/01/19 21:16:41 warmerda 00044 * expanded tabs 00045 * 00046 * Revision 1.1 2000/10/06 15:20:45 warmerda 00047 * New 00048 * 00049 * Revision 1.2 2000/08/29 21:08:08 warmerda 00050 * fallback to use CPLFindFile() 00051 * 00052 * Revision 1.1 2000/04/05 21:55:59 warmerda 00053 * New 00054 * 00055 */ 00056 00057 #include "cpl_csv.h" 00058 #include "cpl_conv.h" 00059 00060 CPL_CVSID("$Id: cpl_csv_cpp-source.html,v 1.11 2002/12/21 19:13:12 warmerda Exp $"); 00061 00062 /* ==================================================================== */ 00063 /* The CSVTable is a persistant set of info about an open CSV */ 00064 /* table. While it doesn't currently maintain a record index, */ 00065 /* or in-memory copy of the table, it could be changed to do so */ 00066 /* in the future. */ 00067 /* ==================================================================== */ 00068 typedef struct ctb { 00069 FILE *fp; 00070 00071 struct ctb *psNext; 00072 00073 char *pszFilename; 00074 00075 char **papszFieldNames; 00076 00077 char **papszRecFields; 00078 00079 int iLastLine; 00080 00081 /* Cache for whole file */ 00082 int nLineCount; 00083 char **papszLines; 00084 int *panLineIndex; 00085 char *pszRawData; 00086 } CSVTable; 00087 00088 static CSVTable *psCSVTableList = NULL; 00089 00090 /************************************************************************/ 00091 /* CSVAccess() */ 00092 /* */ 00093 /* This function will fetch a handle to the requested table. */ 00094 /* If not found in the ``open table list'' the table will be */ 00095 /* opened and added to the list. Eventually this function may */ 00096 /* become public with an abstracted return type so that */ 00097 /* applications can set options about the table. For now this */ 00098 /* isn't done. */ 00099 /************************************************************************/ 00100 00101 static CSVTable *CSVAccess( const char * pszFilename ) 00102 00103 { 00104 CSVTable *psTable; 00105 FILE *fp; 00106 00107 /* -------------------------------------------------------------------- */ 00108 /* Is the table already in the list. */ 00109 /* -------------------------------------------------------------------- */ 00110 for( psTable = psCSVTableList; psTable != NULL; psTable = psTable->psNext ) 00111 { 00112 if( EQUAL(psTable->pszFilename,pszFilename) ) 00113 { 00114 /* 00115 * Eventually we should consider promoting to the front of 00116 * the list to accelerate frequently accessed tables. 00117 */ 00118 00119 return( psTable ); 00120 } 00121 } 00122 00123 /* -------------------------------------------------------------------- */ 00124 /* If not, try to open it. */ 00125 /* -------------------------------------------------------------------- */ 00126 fp = VSIFOpen( pszFilename, "rb" ); 00127 if( fp == NULL ) 00128 return NULL; 00129 00130 /* -------------------------------------------------------------------- */ 00131 /* Create an information structure about this table, and add to */ 00132 /* the front of the list. */ 00133 /* -------------------------------------------------------------------- */ 00134 psTable = (CSVTable *) CPLCalloc(sizeof(CSVTable),1); 00135 00136 psTable->fp = fp; 00137 psTable->pszFilename = CPLStrdup( pszFilename ); 00138 psTable->psNext = psCSVTableList; 00139 00140 psCSVTableList = psTable; 00141 00142 /* -------------------------------------------------------------------- */ 00143 /* Read the table header record containing the field names. */ 00144 /* -------------------------------------------------------------------- */ 00145 psTable->papszFieldNames = CSVReadParseLine( fp ); 00146 00147 return( psTable ); 00148 } 00149 00150 /************************************************************************/ 00151 /* CSVDeaccess() */ 00152 /************************************************************************/ 00153 00154 void CSVDeaccess( const char * pszFilename ) 00155 00156 { 00157 CSVTable *psLast, *psTable; 00158 00159 /* -------------------------------------------------------------------- */ 00160 /* A NULL means deaccess all tables. */ 00161 /* -------------------------------------------------------------------- */ 00162 if( pszFilename == NULL ) 00163 { 00164 while( psCSVTableList != NULL ) 00165 CSVDeaccess( psCSVTableList->pszFilename ); 00166 00167 return; 00168 } 00169 00170 /* -------------------------------------------------------------------- */ 00171 /* Find this table. */ 00172 /* -------------------------------------------------------------------- */ 00173 psLast = NULL; 00174 for( psTable = psCSVTableList; 00175 psTable != NULL && !EQUAL(psTable->pszFilename,pszFilename); 00176 psTable = psTable->psNext ) 00177 { 00178 psLast = psTable; 00179 } 00180 00181 if( psTable == NULL ) 00182 { 00183 CPLDebug( "CPL_CSV", "CPLDeaccess( %s ) - no match.", pszFilename ); 00184 return; 00185 } 00186 00187 /* -------------------------------------------------------------------- */ 00188 /* Remove the link from the list. */ 00189 /* -------------------------------------------------------------------- */ 00190 if( psLast != NULL ) 00191 psLast->psNext = psTable->psNext; 00192 else 00193 psCSVTableList = psTable->psNext; 00194 00195 /* -------------------------------------------------------------------- */ 00196 /* Free the table. */ 00197 /* -------------------------------------------------------------------- */ 00198 if( psTable->fp != NULL ) 00199 VSIFClose( psTable->fp ); 00200 00201 CSLDestroy( psTable->papszFieldNames ); 00202 CSLDestroy( psTable->papszRecFields ); 00203 CPLFree( psTable->pszFilename ); 00204 CPLFree( psTable->panLineIndex ); 00205 CPLFree( psTable->pszRawData ); 00206 CPLFree( psTable->papszLines ); 00207 00208 CPLFree( psTable ); 00209 00210 CPLReadLine( NULL ); 00211 } 00212 00213 /************************************************************************/ 00214 /* CSVSplitLine() */ 00215 /* */ 00216 /* Tokenize a CSV line into fields in the form of a string */ 00217 /* list. This is used instead of the CPLTokenizeString() */ 00218 /* because it provides correct CSV escaping and quoting */ 00219 /* semantics. */ 00220 /************************************************************************/ 00221 00222 static char **CSVSplitLine( const char *pszString ) 00223 00224 { 00225 char **papszRetList = NULL; 00226 char *pszToken; 00227 int nTokenMax, nTokenLen; 00228 00229 pszToken = (char *) CPLCalloc(10,1); 00230 nTokenMax = 10; 00231 00232 while( pszString != NULL && *pszString != '\0' ) 00233 { 00234 int bInString = FALSE; 00235 00236 nTokenLen = 0; 00237 00238 /* Try to find the next delimeter, marking end of token */ 00239 for( ; *pszString != '\0'; pszString++ ) 00240 { 00241 00242 /* End if this is a delimeter skip it and break. */ 00243 if( !bInString && *pszString == ',' ) 00244 { 00245 pszString++; 00246 break; 00247 } 00248 00249 if( *pszString == '"' ) 00250 { 00251 if( !bInString || pszString[1] != '"' ) 00252 { 00253 bInString = !bInString; 00254 continue; 00255 } 00256 else /* doubled quotes in string resolve to one quote */ 00257 { 00258 pszString++; 00259 } 00260 } 00261 00262 if( nTokenLen >= nTokenMax-2 ) 00263 { 00264 nTokenMax = nTokenMax * 2 + 10; 00265 pszToken = (char *) CPLRealloc( pszToken, nTokenMax ); 00266 } 00267 00268 pszToken[nTokenLen] = *pszString; 00269 nTokenLen++; 00270 } 00271 00272 pszToken[nTokenLen] = '\0'; 00273 papszRetList = CSLAddString( papszRetList, pszToken ); 00274 00275 /* If the last token is an empty token, then we have to catch 00276 * it now, otherwise we won't reenter the loop and it will be lost. 00277 */ 00278 if ( *pszString == '\0' && *(pszString-1) == ',' ) 00279 { 00280 papszRetList = CSLAddString( papszRetList, "" ); 00281 } 00282 } 00283 00284 if( papszRetList == NULL ) 00285 papszRetList = (char **) CPLCalloc(sizeof(char *),1); 00286 00287 CPLFree( pszToken ); 00288 00289 return papszRetList; 00290 } 00291 00292 /************************************************************************/ 00293 /* CSVFindNextLine() */ 00294 /* */ 00295 /* Find the start of the next line, while at the same time zero */ 00296 /* terminating this line. Take into account that there may be */ 00297 /* newline indicators within quoted strings, and that quotes */ 00298 /* can be escaped with a backslash. */ 00299 /************************************************************************/ 00300 00301 static char *CSVFindNextLine( char *pszThisLine ) 00302 00303 { 00304 int nQuoteCount = 0, i; 00305 00306 for( i = 0; pszThisLine[i] != '\0'; i++ ) 00307 { 00308 if( pszThisLine[i] == '\"' 00309 && (i == 0 || pszThisLine[i-1] != '\\') ) 00310 nQuoteCount++; 00311 00312 if( (pszThisLine[i] == 10 || pszThisLine[i] == 13) 00313 && (nQuoteCount % 2) == 0 ) 00314 break; 00315 } 00316 00317 while( pszThisLine[i] == 10 || pszThisLine[i] == 13 ) 00318 pszThisLine[i++] = '\0'; 00319 00320 if( pszThisLine[i] == '\0' ) 00321 return NULL; 00322 else 00323 return pszThisLine + i; 00324 } 00325 00326 /************************************************************************/ 00327 /* CSVIngest() */ 00328 /* */ 00329 /* Load entire file into memory and setup index if possible. */ 00330 /************************************************************************/ 00331 00332 static void CSVIngest( const char *pszFilename ) 00333 00334 { 00335 CSVTable *psTable = CSVAccess( pszFilename ); 00336 int nFileLen, i, nMaxLineCount, iLine = 0; 00337 char *pszThisLine; 00338 00339 if( psTable->pszRawData != NULL ) 00340 return; 00341 00342 /* -------------------------------------------------------------------- */ 00343 /* Ingest whole file. */ 00344 /* -------------------------------------------------------------------- */ 00345 VSIFSeek( psTable->fp, 0, SEEK_END ); 00346 nFileLen = VSIFTell( psTable->fp ); 00347 VSIRewind( psTable->fp ); 00348 00349 psTable->pszRawData = (char *) CPLMalloc(nFileLen+1); 00350 if( (int) VSIFRead( psTable->pszRawData, 1, nFileLen, psTable->fp ) 00351 != nFileLen ) 00352 { 00353 CPLFree( psTable->pszRawData ); 00354 psTable->pszRawData = NULL; 00355 00356 CPLError( CE_Failure, CPLE_FileIO, "Read of file %s failed.", 00357 psTable->pszFilename ); 00358 return; 00359 } 00360 00361 psTable->pszRawData[nFileLen] = '\0'; 00362 00363 /* -------------------------------------------------------------------- */ 00364 /* Get count of newlines so we can allocate line array. */ 00365 /* -------------------------------------------------------------------- */ 00366 nMaxLineCount = 0; 00367 for( i = 0; i < nFileLen; i++ ) 00368 { 00369 if( psTable->pszRawData[i] == 10 ) 00370 nMaxLineCount++; 00371 } 00372 00373 psTable->papszLines = (char **) CPLCalloc(sizeof(char*),nMaxLineCount); 00374 00375 /* -------------------------------------------------------------------- */ 00376 /* Build a list of record pointers into the raw data buffer */ 00377 /* based on line terminators. Zero terminate the line */ 00378 /* strings. */ 00379 /* -------------------------------------------------------------------- */ 00380 /* skip header line */ 00381 pszThisLine = CSVFindNextLine( psTable->pszRawData ); 00382 00383 while( pszThisLine != NULL && iLine < nMaxLineCount ) 00384 { 00385 psTable->papszLines[iLine++] = pszThisLine; 00386 pszThisLine = CSVFindNextLine( pszThisLine ); 00387 } 00388 00389 psTable->nLineCount = iLine; 00390 00391 /* -------------------------------------------------------------------- */ 00392 /* Allocate and populate index array. Ensure they are in */ 00393 /* ascending order so that binary searches can be done on the */ 00394 /* array. */ 00395 /* -------------------------------------------------------------------- */ 00396 psTable->panLineIndex = (int *) CPLMalloc(sizeof(int)*psTable->nLineCount); 00397 for( i = 0; i < psTable->nLineCount; i++ ) 00398 { 00399 psTable->panLineIndex[i] = atoi(psTable->papszLines[i]); 00400 00401 if( i > 0 && psTable->panLineIndex[i] < psTable->panLineIndex[i-1] ) 00402 { 00403 CPLFree( psTable->panLineIndex ); 00404 psTable->panLineIndex = NULL; 00405 break; 00406 } 00407 } 00408 00409 psTable->iLastLine = -1; 00410 00411 /* -------------------------------------------------------------------- */ 00412 /* We should never need the file handle against, so close it. */ 00413 /* -------------------------------------------------------------------- */ 00414 VSIFClose( psTable->fp ); 00415 psTable->fp = NULL; 00416 } 00417 00418 /************************************************************************/ 00419 /* CSVReadParseLine() */ 00420 /* */ 00421 /* Read one line, and return split into fields. The return */ 00422 /* result is a stringlist, in the sense of the CSL functions. */ 00423 /************************************************************************/ 00424 00425 char **CSVReadParseLine( FILE * fp ) 00426 00427 { 00428 const char *pszLine; 00429 char *pszWorkLine; 00430 char **papszReturn; 00431 00432 CPLAssert( fp != NULL ); 00433 if( fp == NULL ) 00434 return( NULL ); 00435 00436 pszLine = CPLReadLine( fp ); 00437 if( pszLine == NULL ) 00438 return( NULL ); 00439 00440 /* -------------------------------------------------------------------- */ 00441 /* If there are no quotes, then this is the simple case. */ 00442 /* Parse, and return tokens. */ 00443 /* -------------------------------------------------------------------- */ 00444 if( strchr(pszLine,'\"') == NULL ) 00445 return CSVSplitLine( pszLine ); 00446 00447 /* -------------------------------------------------------------------- */ 00448 /* We must now count the quotes in our working string, and as */ 00449 /* long as it is odd, keep adding new lines. */ 00450 /* -------------------------------------------------------------------- */ 00451 pszWorkLine = CPLStrdup( pszLine ); 00452 00453 while( TRUE ) 00454 { 00455 int i, nCount = 0; 00456 00457 for( i = 0; pszWorkLine[i] != '\0'; i++ ) 00458 { 00459 if( pszWorkLine[i] == '\"' 00460 && (i == 0 || pszWorkLine[i-1] != '\\') ) 00461 nCount++; 00462 } 00463 00464 if( nCount % 2 == 0 ) 00465 break; 00466 00467 pszLine = CPLReadLine( fp ); 00468 if( pszLine == NULL ) 00469 break; 00470 00471 pszWorkLine = (char *) 00472 CPLRealloc(pszWorkLine, 00473 strlen(pszWorkLine) + strlen(pszLine) + 1); 00474 strcat( pszWorkLine, pszLine ); 00475 } 00476 00477 papszReturn = CSVSplitLine( pszWorkLine ); 00478 00479 CPLFree( pszWorkLine ); 00480 00481 return papszReturn; 00482 } 00483 00484 /************************************************************************/ 00485 /* CSVCompare() */ 00486 /* */ 00487 /* Compare a field to a search value using a particular */ 00488 /* criteria. */ 00489 /************************************************************************/ 00490 00491 static int CSVCompare( const char * pszFieldValue, const char * pszTarget, 00492 CSVCompareCriteria eCriteria ) 00493 00494 { 00495 if( eCriteria == CC_ExactString ) 00496 { 00497 return( strcmp( pszFieldValue, pszTarget ) == 0 ); 00498 } 00499 else if( eCriteria == CC_ApproxString ) 00500 { 00501 return( EQUAL( pszFieldValue, pszTarget ) ); 00502 } 00503 else if( eCriteria == CC_Integer ) 00504 { 00505 return( atoi(pszFieldValue) == atoi(pszTarget) ); 00506 } 00507 00508 return FALSE; 00509 } 00510 00511 /************************************************************************/ 00512 /* CSVScanLines() */ 00513 /* */ 00514 /* Read the file scanline for lines where the key field equals */ 00515 /* the indicated value with the suggested comparison criteria. */ 00516 /* Return the first matching line split into fields. */ 00517 /************************************************************************/ 00518 00519 char **CSVScanLines( FILE *fp, int iKeyField, const char * pszValue, 00520 CSVCompareCriteria eCriteria ) 00521 00522 { 00523 char **papszFields = NULL; 00524 int bSelected = FALSE, nTestValue; 00525 00526 CPLAssert( pszValue != NULL ); 00527 CPLAssert( iKeyField >= 0 ); 00528 CPLAssert( fp != NULL ); 00529 00530 nTestValue = atoi(pszValue); 00531 00532 while( !bSelected ) { 00533 papszFields = CSVReadParseLine( fp ); 00534 if( papszFields == NULL ) 00535 return( NULL ); 00536 00537 if( CSLCount( papszFields ) < iKeyField+1 ) 00538 { 00539 /* not selected */ 00540 } 00541 else if( eCriteria == CC_Integer 00542 && atoi(papszFields[iKeyField]) == nTestValue ) 00543 { 00544 bSelected = TRUE; 00545 } 00546 else 00547 { 00548 bSelected = CSVCompare( papszFields[iKeyField], pszValue, 00549 eCriteria ); 00550 } 00551 00552 if( !bSelected ) 00553 { 00554 CSLDestroy( papszFields ); 00555 papszFields = NULL; 00556 } 00557 } 00558 00559 return( papszFields ); 00560 } 00561 00562 /************************************************************************/ 00563 /* CSVScanLinesIndexed() */ 00564 /* */ 00565 /* Read the file scanline for lines where the key field equals */ 00566 /* the indicated value with the suggested comparison criteria. */ 00567 /* Return the first matching line split into fields. */ 00568 /************************************************************************/ 00569 00570 static char ** 00571 CSVScanLinesIndexed( CSVTable *psTable, int nKeyValue ) 00572 00573 { 00574 int iTop, iBottom, iMiddle, iResult = -1; 00575 00576 CPLAssert( psTable->panLineIndex != NULL ); 00577 00578 /* -------------------------------------------------------------------- */ 00579 /* Find target record with binary search. */ 00580 /* -------------------------------------------------------------------- */ 00581 iTop = psTable->nLineCount-1; 00582 iBottom = 0; 00583 00584 while( iTop >= iBottom ) 00585 { 00586 iMiddle = (iTop + iBottom) / 2; 00587 if( psTable->panLineIndex[iMiddle] > nKeyValue ) 00588 iTop = iMiddle - 1; 00589 else if( psTable->panLineIndex[iMiddle] < nKeyValue ) 00590 iBottom = iMiddle + 1; 00591 else 00592 { 00593 iResult = iMiddle; 00594 break; 00595 } 00596 } 00597 00598 if( iResult == -1 ) 00599 return NULL; 00600 00601 /* -------------------------------------------------------------------- */ 00602 /* Parse target line, and update iLastLine indicator. */ 00603 /* -------------------------------------------------------------------- */ 00604 psTable->iLastLine = iResult; 00605 00606 return CSVSplitLine( psTable->papszLines[iResult] ); 00607 } 00608 00609 /************************************************************************/ 00610 /* CSVScanLinesIngested() */ 00611 /* */ 00612 /* Read the file scanline for lines where the key field equals */ 00613 /* the indicated value with the suggested comparison criteria. */ 00614 /* Return the first matching line split into fields. */ 00615 /************************************************************************/ 00616 00617 static char ** 00618 CSVScanLinesIngested( CSVTable *psTable, int iKeyField, const char * pszValue, 00619 CSVCompareCriteria eCriteria ) 00620 00621 { 00622 char **papszFields = NULL; 00623 int bSelected = FALSE, nTestValue; 00624 00625 CPLAssert( pszValue != NULL ); 00626 CPLAssert( iKeyField >= 0 ); 00627 00628 nTestValue = atoi(pszValue); 00629 00630 /* -------------------------------------------------------------------- */ 00631 /* Short cut for indexed files. */ 00632 /* -------------------------------------------------------------------- */ 00633 if( iKeyField == 0 && eCriteria == CC_Integer 00634 && psTable->panLineIndex != NULL ) 00635 return CSVScanLinesIndexed( psTable, nTestValue ); 00636 00637 /* -------------------------------------------------------------------- */ 00638 /* Scan from in-core lines. */ 00639 /* -------------------------------------------------------------------- */ 00640 while( !bSelected && psTable->iLastLine+1 < psTable->nLineCount ) { 00641 psTable->iLastLine++; 00642 papszFields = CSVSplitLine( psTable->papszLines[psTable->iLastLine] ); 00643 00644 if( CSLCount( papszFields ) < iKeyField+1 ) 00645 { 00646 /* not selected */ 00647 } 00648 else if( eCriteria == CC_Integer 00649 && atoi(papszFields[iKeyField]) == nTestValue ) 00650 { 00651 bSelected = TRUE; 00652 } 00653 else 00654 { 00655 bSelected = CSVCompare( papszFields[iKeyField], pszValue, 00656 eCriteria ); 00657 } 00658 00659 if( !bSelected ) 00660 { 00661 CSLDestroy( papszFields ); 00662 papszFields = NULL; 00663 } 00664 } 00665 00666 return( papszFields ); 00667 } 00668 00669 /************************************************************************/ 00670 /* CSVScanFile() */ 00671 /* */ 00672 /* Scan a whole file using criteria similar to above, but also */ 00673 /* taking care of file opening and closing. */ 00674 /************************************************************************/ 00675 00676 char **CSVScanFile( const char * pszFilename, int iKeyField, 00677 const char * pszValue, CSVCompareCriteria eCriteria ) 00678 00679 { 00680 CSVTable *psTable; 00681 00682 /* -------------------------------------------------------------------- */ 00683 /* Get access to the table. */ 00684 /* -------------------------------------------------------------------- */ 00685 CPLAssert( pszFilename != NULL ); 00686 00687 if( iKeyField < 0 ) 00688 return NULL; 00689 00690 psTable = CSVAccess( pszFilename ); 00691 if( psTable == NULL ) 00692 return NULL; 00693 00694 CSVIngest( pszFilename ); 00695 00696 /* -------------------------------------------------------------------- */ 00697 /* Does the current record match the criteria? If so, just */ 00698 /* return it again. */ 00699 /* -------------------------------------------------------------------- */ 00700 if( iKeyField >= 0 00701 && iKeyField < CSLCount(psTable->papszRecFields) 00702 && CSVCompare(pszValue,psTable->papszRecFields[iKeyField],eCriteria) ) 00703 { 00704 return psTable->papszRecFields; 00705 } 00706 00707 /* -------------------------------------------------------------------- */ 00708 /* Scan the file from the beginning, replacing the ``current */ 00709 /* record'' in our structure with the one that is found. */ 00710 /* -------------------------------------------------------------------- */ 00711 psTable->iLastLine = -1; 00712 CSLDestroy( psTable->papszRecFields ); 00713 00714 if( psTable->pszRawData != NULL ) 00715 psTable->papszRecFields = 00716 CSVScanLinesIngested( psTable, iKeyField, pszValue, eCriteria ); 00717 else 00718 { 00719 VSIRewind( psTable->fp ); 00720 CPLReadLine( psTable->fp ); /* throw away the header line */ 00721 00722 psTable->papszRecFields = 00723 CSVScanLines( psTable->fp, iKeyField, pszValue, eCriteria ); 00724 } 00725 00726 return( psTable->papszRecFields ); 00727 } 00728 00729 /************************************************************************/ 00730 /* CPLGetFieldId() */ 00731 /* */ 00732 /* Read the first record of a CSV file (rewinding to be sure), */ 00733 /* and find the field with the indicated name. Returns -1 if */ 00734 /* it fails to find the field name. Comparison is case */ 00735 /* insensitive, but otherwise exact. After this function has */ 00736 /* been called the file pointer will be positioned just after */ 00737 /* the first record. */ 00738 /************************************************************************/ 00739 00740 int CSVGetFieldId( FILE * fp, const char * pszFieldName ) 00741 00742 { 00743 char **papszFields; 00744 int i; 00745 00746 CPLAssert( fp != NULL && pszFieldName != NULL ); 00747 00748 VSIRewind( fp ); 00749 00750 papszFields = CSVReadParseLine( fp ); 00751 for( i = 0; papszFields != NULL && papszFields[i] != NULL; i++ ) 00752 { 00753 if( EQUAL(papszFields[i],pszFieldName) ) 00754 { 00755 CSLDestroy( papszFields ); 00756 return i; 00757 } 00758 } 00759 00760 CSLDestroy( papszFields ); 00761 00762 return -1; 00763 } 00764 00765 /************************************************************************/ 00766 /* CSVGetFileFieldId() */ 00767 /* */ 00768 /* Same as CPLGetFieldId(), except that we get the file based */ 00769 /* on filename, rather than having an existing handle. */ 00770 /************************************************************************/ 00771 00772 int CSVGetFileFieldId( const char * pszFilename, const char * pszFieldName ) 00773 00774 { 00775 CSVTable *psTable; 00776 int i; 00777 00778 /* -------------------------------------------------------------------- */ 00779 /* Get access to the table. */ 00780 /* -------------------------------------------------------------------- */ 00781 CPLAssert( pszFilename != NULL ); 00782 00783 psTable = CSVAccess( pszFilename ); 00784 if( psTable == NULL ) 00785 return -1; 00786 00787 /* -------------------------------------------------------------------- */ 00788 /* Find the requested field. */ 00789 /* -------------------------------------------------------------------- */ 00790 for( i = 0; 00791 psTable->papszFieldNames != NULL 00792 && psTable->papszFieldNames[i] != NULL; 00793 i++ ) 00794 { 00795 if( EQUAL(psTable->papszFieldNames[i],pszFieldName) ) 00796 { 00797 return i; 00798 } 00799 } 00800 00801 return -1; 00802 } 00803 00804 00805 /************************************************************************/ 00806 /* CSVScanFileByName() */ 00807 /* */ 00808 /* Same as CSVScanFile(), but using a field name instead of a */ 00809 /* field number. */ 00810 /************************************************************************/ 00811 00812 char **CSVScanFileByName( const char * pszFilename, 00813 const char * pszKeyFieldName, 00814 const char * pszValue, CSVCompareCriteria eCriteria ) 00815 00816 { 00817 int iKeyField; 00818 00819 iKeyField = CSVGetFileFieldId( pszFilename, pszKeyFieldName ); 00820 if( iKeyField == -1 ) 00821 return NULL; 00822 00823 return( CSVScanFile( pszFilename, iKeyField, pszValue, eCriteria ) ); 00824 } 00825 00826 /************************************************************************/ 00827 /* CSVGetField() */ 00828 /* */ 00829 /* The all-in-one function to fetch a particular field value */ 00830 /* from a CSV file. Note this function will return an empty */ 00831 /* string, rather than NULL if it fails to find the desired */ 00832 /* value for some reason. The caller can't establish that the */ 00833 /* fetch failed. */ 00834 /************************************************************************/ 00835 00836 const char *CSVGetField( const char * pszFilename, 00837 const char * pszKeyFieldName, 00838 const char * pszKeyFieldValue, 00839 CSVCompareCriteria eCriteria, 00840 const char * pszTargetField ) 00841 00842 { 00843 CSVTable *psTable; 00844 char **papszRecord; 00845 int iTargetField; 00846 00847 /* -------------------------------------------------------------------- */ 00848 /* Find the table. */ 00849 /* -------------------------------------------------------------------- */ 00850 psTable = CSVAccess( pszFilename ); 00851 if( psTable == NULL ) 00852 return ""; 00853 00854 /* -------------------------------------------------------------------- */ 00855 /* Find the correct record. */ 00856 /* -------------------------------------------------------------------- */ 00857 papszRecord = CSVScanFileByName( pszFilename, pszKeyFieldName, 00858 pszKeyFieldValue, eCriteria ); 00859 00860 if( papszRecord == NULL ) 00861 return ""; 00862 00863 /* -------------------------------------------------------------------- */ 00864 /* Figure out which field we want out of this. */ 00865 /* -------------------------------------------------------------------- */ 00866 iTargetField = CSVGetFileFieldId( pszFilename, pszTargetField ); 00867 if( iTargetField < 0 ) 00868 return ""; 00869 00870 if( iTargetField >= CSLCount( papszRecord ) ) 00871 return ""; 00872 00873 return( papszRecord[iTargetField] ); 00874 } 00875 00876 /************************************************************************/ 00877 /* CSVFilename() */ 00878 /* */ 00879 /* Return the full path to a particular CSV file. This will */ 00880 /* eventually be something the application can override. */ 00881 /************************************************************************/ 00882 00883 static const char *(*pfnCSVFilenameHook)(const char *) = NULL; 00884 00885 const char * CSVFilename( const char *pszBasename ) 00886 00887 { 00888 static char szPath[512]; 00889 00890 if( pfnCSVFilenameHook == NULL ) 00891 { 00892 FILE *fp = NULL; 00893 const char *pszResult = CPLFindFile( "epsg_csv", pszBasename ); 00894 00895 if( pszResult != NULL ) 00896 return pszResult; 00897 00898 if( getenv("GEOTIFF_CSV") != NULL ) 00899 { 00900 sprintf( szPath, "%s/%s", getenv("GEOTIFF_CSV"), pszBasename ); 00901 } 00902 else if( (fp = fopen( "csv/horiz_cs.csv", "rt" )) != NULL ) 00903 { 00904 sprintf( szPath, "csv/%s", pszBasename ); 00905 } 00906 else 00907 { 00908 sprintf( szPath, "/usr/local/share/epsg_csv/%s", pszBasename ); 00909 } 00910 00911 if( fp != NULL ) 00912 fclose( fp ); 00913 00914 return( szPath ); 00915 } 00916 else 00917 return( pfnCSVFilenameHook( pszBasename ) ); 00918 } 00919 00920 /************************************************************************/ 00921 /* SetCSVFilenameHook() */ 00922 /* */ 00923 /* Applications can use this to set a function that will */ 00924 /* massage CSV filenames. */ 00925 /************************************************************************/ 00926 00971 void SetCSVFilenameHook( const char *(*pfnNewHook)( const char * ) ) 00972 00973 { 00974 pfnCSVFilenameHook = pfnNewHook; 00975 }