lookup.cpp

00001 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
00002 /* kspell2 - adopted from enchant
00003  * Copyright (C) 2003 Dom Lachowicz
00004  * Copyright (C) 2004 Zack Rusin <zack@kde.org>
00005  *
00006  * This library is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * This library is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with this library; if not, write to the
00018  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019  * Boston, MA 02110-1301, USA.
00020  *
00021  * In addition, as a special exception, Dom Lachowicz
00022  * gives permission to link the code of this program with
00023  * non-LGPL Spelling Provider libraries (eg: a MSFT Office
00024  * spell checker backend) and distribute linked combinations including
00025  * the two.  You must obey the GNU General Public License in all
00026  * respects for all of the code used other than said providers.  If you modify
00027  * this file, you may extend this exception to your version of the
00028  * file, but you are not obligated to do so.  If you do not wish to
00029  * do so, delete this exception statement from your version.
00030  */
00031 
00032 /*
00033  * lookup.c - see if a word appears in the dictionary
00034  *
00035  * Pace Willisson, 1983
00036  *
00037  * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
00038  * All rights reserved.
00039  *
00040  * Redistribution and use in source and binary forms, with or without
00041  * modification, are permitted provided that the following conditions
00042  * are met:
00043  *
00044  * 1. Redistributions of source code must retain the above copyright
00045  *    notice, this list of conditions and the following disclaimer.
00046  * 2. Redistributions in binary form must reproduce the above copyright
00047  *    notice, this list of conditions and the following disclaimer in the
00048  *    documentation and/or other materials provided with the distribution.
00049  * 3. All modifications to the source code must be clearly marked as
00050  *    such.  Binary redistributions based on modified source code
00051  *    must be clearly marked as modified versions in the documentation
00052  *    and/or other materials provided with the distribution.
00053  * 4. All advertising materials mentioning features or use of this software
00054  *    must display the following acknowledgment:
00055  *      This product includes software developed by Geoff Kuenning and
00056  *      other unpaid contributors.
00057  * 5. The name of Geoff Kuenning may not be used to endorse or promote
00058  *    products derived from this software without specific prior
00059  *    written permission.
00060  *
00061  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
00062  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00063  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00064  * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
00065  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00066  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00067  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00068  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00069  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00070  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00071  * SUCH DAMAGE.
00072  */
00073 
00074 /*
00075  * $Log$
00076  * Revision 1.1  2004/01/31 16:44:12  zrusin
00077  * ISpell plugin.
00078  *
00079  * Revision 1.7  2003/09/25 02:44:48  dom
00080  * bug 5813
00081  *
00082  * Revision 1.6  2003/08/26 13:20:40  dom
00083  * ispell crasher fix, implement enchant_dictionary_release
00084  *
00085  * Revision 1.5  2003/08/26 13:08:03  uwog
00086  * Fix segfault when the requested dictionary couldn't be found.
00087  *
00088  * Revision 1.4  2003/08/14 16:27:36  dom
00089  * update some documentation
00090  *
00091  * Revision 1.3  2003/07/28 20:40:27  dom
00092  * fix up the license clause, further win32-registry proof some directory getting functions
00093  *
00094  * Revision 1.2  2003/07/16 22:52:47  dom
00095  * LGPL + exception license
00096  *
00097  * Revision 1.1  2003/07/15 01:15:07  dom
00098  * ispell enchant backend
00099  *
00100  * Revision 1.3  2003/01/29 05:50:12  hippietrail
00101  *
00102  * Fixed my mess in EncodingManager.
00103  * Changed many C casts to C++ casts.
00104  *
00105  * Revision 1.2  2003/01/25 03:16:05  hippietrail
00106  *
00107  * An UT_ICONV_INVALID fix which escaped the last commit.
00108  *
00109  * Revision 1.1  2003/01/24 05:52:34  hippietrail
00110  *
00111  * Refactored ispell code. Old ispell global variables had been put into
00112  * an allocated structure, a pointer to which was passed to many functions.
00113  * I have now made all such functions and variables private members of the
00114  * ISpellChecker class. It was C OO, now it's C++ OO.
00115  *
00116  * I've fixed the makefiles and tested compilation but am unable to test
00117  * operation. Please back out my changes if they cause problems which
00118  * are not obvious or easy to fix.
00119  *
00120  * Revision 1.12  2003/01/06 18:48:39  dom
00121  * ispell cleanup, start of using new 'add' save features
00122  *
00123  * Revision 1.11  2002/09/19 05:31:17  hippietrail
00124  *
00125  * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
00126  * K&R function declarations removed, converted to Doxygen style comments
00127  * where possible.  No code has been changed (I hope).  Compiles for me but
00128  * unable to test.
00129  *
00130  * Revision 1.10  2002/09/17 03:03:30  hippietrail
00131  *
00132  * After seeking permission on the developer list I've reformatted all the
00133  * spelling source which seemed to have parts which used 2, 3, 4, and 8
00134  * spaces for tabs.  It should all look good with our standard 4-space
00135  * tabs now.
00136  * I've concentrated just on indentation in the actual code.  More prettying
00137  * could be done.
00138  * * NO code changes were made *
00139  *
00140  * Revision 1.9  2002/09/13 17:20:13  mpritchett
00141  * Fix more warnings for Linux build
00142  *
00143  * Revision 1.8  2002/05/03 09:49:43  fjfranklin
00144  * o hash downloader update (Gabriel Gerhardsson)
00145  * - Comment out the "Can't open <dictionary>" printf.
00146  * - Make the progressbar more clean at the begining of the download.
00147  * - Add support for tarballs that doesn't have the full path included
00148  * - Fix copyright headers on the newly added files (*HashDownloader.*)
00149  *
00150  * Revision 1.7  2001/08/27 19:06:30  dom
00151  * Lots of compilation fixes
00152  *
00153  * Revision 1.6  2001/08/10 18:32:40  dom
00154  * Spelling and iconv updates. god, i hate iconv
00155  *
00156  * Revision 1.5  2001/08/10 09:57:49  hub
00157  * Patch by sobomax@FreeBSD.org
00158  * #include "iconv.h" directive is missed from src/other/spell/xp/lookup.c and
00159  * src/wp/impexp/xp/ie_imp_RTF.cpp.
00160  * See bug 1823
00161  *
00162  * Revision 1.4  2001/07/18 17:46:01  dom
00163  * Module changes, and fix compiler warnings
00164  *
00165  * Revision 1.3  2001/06/12 21:32:49  dom
00166  * More ispell work...
00167  *
00168  * Revision 1.2  2001/05/12 16:05:42  thomasf
00169  * Big pseudo changes to ispell to make it pass around a structure rather
00170  * than rely on all sorts of gloabals willy nilly here and there.  Also
00171  * fixed our spelling class to work with accepting suggestions once more.
00172  * This code is dirty, gross and ugly (not to mention still not supporting
00173  * multiple hash sized just yet) but it works on my machine and will no
00174  * doubt break other machines.
00175  *
00176  * Revision 1.1  2001/04/15 16:01:24  tomas_f
00177  * moving to spell/xp
00178  *
00179  * Revision 1.7  1999/09/29 23:33:32  justin
00180  * Updates to the underlying ispell-based code to support suggested corrections.
00181  *
00182  * Revision 1.6  1999/04/13 17:12:51  jeff
00183  * Applied "Darren O. Benham" <gecko@benham.net> spell check changes.
00184  * Fixed crash on Win32 with the new code.
00185  *
00186  * Revision 1.5  1999/01/07 01:07:48  paul
00187  * Fixed spell leaks.
00188  *
00189  * Revision 1.5  1999/01/07 01:07:48  paul
00190  * Fixed spell leaks.
00191  *
00192  * Revision 1.4  1998/12/29 14:55:33  eric
00193  *
00194  * I've doctored the ispell code pretty extensively here.  It is now
00195  * warning-free on Win32.  It also *works* on Win32 now, since I
00196  * replaced all the I/O calls with ANSI standard ones.
00197  *
00198  * Revision 1.3  1998/12/28 23:11:30  eric
00199  *
00200  * modified spell code and integration to build on Windows.
00201  * This is still a hack.
00202  *
00203  * Actually, it doesn't yet WORK on Windows.  It just builds.
00204  * SpellCheckInit is failing for some reason.
00205  *
00206  * Revision 1.2  1998/12/28 22:16:22  eric
00207  *
00208  * These changes begin to incorporate the spell checker into AbiWord.  Most
00209  * of this is a hack.
00210  *
00211  * 1.  added other/spell to the -I list in config/abi_defs
00212  * 2.  replaced other/spell/Makefile with one which is more like
00213  *  our build system.
00214  * 3.  added other/spell to other/Makefile so that the build will now
00215  *  dive down and build the spell check library.
00216  * 4.  added the AbiSpell library to the Makefiles in wp/main
00217  * 5.  added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp.
00218  *  This call is a HACK and should be replaced with something
00219  *  proper later.
00220  * 6.  added code to fv_View.cpp as follows:
00221  *  whenever you double-click on a word, the spell checker
00222  *  verifies that word and prints its status to stdout.
00223  *
00224  * Caveats:
00225  * 1.  This will break the Windows build.  I'm going to work on fixing it
00226  *  now.
00227  * 2.  This only works if your dictionary is in /usr/lib/ispell/american.hash.
00228  *  The dictionary location is currently hard-coded.  This will be
00229  *  fixed as well.
00230  *
00231  * Anyway, such as it is, it works.
00232  *
00233  * Revision 1.1  1998/12/28 18:04:43  davet
00234  * Spell checker code stripped from ispell.  At this point, there are
00235  * two external routines...  the Init routine, and a check-a-word routine
00236  * which returns a boolean value, and takes a 16 bit char string.
00237  * The code resembles the ispell code as much as possible still.
00238  *
00239  * Revision 1.42  1995/01/08  23:23:42  geoff
00240  * Support MSDOS_BINARY_OPEN when opening the hash file to read it in.
00241  *
00242  * Revision 1.41  1994/01/25  07:11:51  geoff
00243  * Get rid of all old RCS log lines in preparation for the 3.1 release.
00244  *
00245  */
00246 
00247 #include <stdlib.h>
00248 #include <string.h>
00249 #include <ctype.h>
00250 
00251 #include "ispell_checker.h"
00252 #include "msgs.h"
00253 
00254 #ifdef INDEXDUMP
00255 static void dumpindex P ((struct flagptr * indexp, int depth));
00256 #endif /* INDEXDUMP */
00257 
00258 int     gnMaskBits = 64;
00259 
00265 int ISpellChecker::linit (char *hashname)
00266 {
00267     FILE*   fpHash;
00268         
00269     register int    i;
00270     register struct dent * dp;
00271     struct flagent *    entry;
00272     struct flagptr *    ind;
00273     int         nextchar, x;
00274     int         viazero;
00275     register ichar_t *  cp;
00276 
00277     if ((fpHash = fopen (hashname, "rb")) == NULL)
00278     {
00279         return (-1);
00280     }
00281 
00282     m_hashsize = fread (reinterpret_cast<char *>(&m_hashheader), 1, sizeof m_hashheader, fpHash);
00283     if (m_hashsize < static_cast<int>(sizeof(m_hashheader)))
00284     {
00285         if (m_hashsize < 0)
00286             fprintf (stderr, LOOKUP_C_CANT_READ, hashname);
00287         else if (m_hashsize == 0)
00288             fprintf (stderr, LOOKUP_C_NULL_HASH, hashname);
00289         else
00290             fprintf (stderr,
00291               LOOKUP_C_SHORT_HASH (m_hashname, m_hashsize,
00292                 static_cast<int>(sizeof m_hashheader)));
00293         return (-1);
00294     }
00295     else if (m_hashheader.magic != MAGIC)
00296     {
00297         fprintf (stderr,
00298           LOOKUP_C_BAD_MAGIC (hashname, static_cast<unsigned int>(MAGIC),
00299             static_cast<unsigned int>(m_hashheader.magic)));
00300         return (-1);
00301     }
00302     else if (m_hashheader.magic2 != MAGIC)
00303     {
00304         fprintf (stderr,
00305           LOOKUP_C_BAD_MAGIC2 (hashname, static_cast<unsigned int>(MAGIC),
00306             static_cast<unsigned int>(m_hashheader.magic2)));
00307         return (-1);
00308     }
00309 /*  else if (hashheader.compileoptions != COMPILEOPTIONS*/
00310     else if ( 1 != 1
00311       ||  m_hashheader.maxstringchars != MAXSTRINGCHARS
00312       ||  m_hashheader.maxstringcharlen != MAXSTRINGCHARLEN)
00313     {
00314         fprintf (stderr,
00315           LOOKUP_C_BAD_OPTIONS (static_cast<unsigned int>(m_hashheader.compileoptions),
00316             m_hashheader.maxstringchars, m_hashheader.maxstringcharlen,
00317             static_cast<unsigned int>(COMPILEOPTIONS), MAXSTRINGCHARS, MAXSTRINGCHARLEN));
00318         return (-1);
00319     }
00320 
00321     {
00322         m_hashtbl =
00323          (struct dent *)
00324             calloc (static_cast<unsigned>(m_hashheader.tblsize), sizeof (struct dent));
00325         m_hashsize = m_hashheader.tblsize;
00326         m_hashstrings = static_cast<char *>(malloc(static_cast<unsigned>(m_hashheader.stringsize)));
00327     }
00328     m_numsflags = m_hashheader.stblsize;
00329     m_numpflags = m_hashheader.ptblsize;
00330     m_sflaglist = (struct flagent *)
00331       malloc ((m_numsflags + m_numpflags) * sizeof (struct flagent));
00332     if (m_hashtbl == NULL  ||  m_hashstrings == NULL  ||  m_sflaglist == NULL)
00333     {
00334         fprintf (stderr, LOOKUP_C_NO_HASH_SPACE);
00335         return (-1);
00336     }
00337     m_pflaglist = m_sflaglist + m_numsflags;
00338 
00339     {
00340         if( fread ( m_hashstrings, 1, static_cast<unsigned>(m_hashheader.stringsize), fpHash) 
00341             != static_cast<size_t>(m_hashheader.stringsize) )
00342         {
00343             fprintf (stderr, LOOKUP_C_BAD_FORMAT);
00344             fprintf (stderr, "stringsize err\n" );
00345             return (-1);
00346         }
00347         if ( m_hashheader.compileoptions & 0x04 )
00348         {
00349             if(  fread (reinterpret_cast<char *>(m_hashtbl), 1, static_cast<unsigned>(m_hashheader.tblsize) * sizeof(struct dent), fpHash)
00350                 != (static_cast<size_t>(m_hashheader.tblsize * sizeof (struct dent))))
00351             {
00352                 fprintf (stderr, LOOKUP_C_BAD_FORMAT);
00353                 return (-1);
00354             }
00355         }
00356         else
00357         {
00358             for( x=0; x<m_hashheader.tblsize; x++ )
00359             {
00360                 if(  fread ( reinterpret_cast<char*>(m_hashtbl+x), sizeof( struct dent)-sizeof( MASKTYPE ), 1, fpHash)
00361                     != 1)
00362                 {
00363                     fprintf (stderr, LOOKUP_C_BAD_FORMAT);
00364                     return (-1);
00365                 }
00366             }   /*for*/
00367         }   /*else*/
00368     }
00369     if (fread (reinterpret_cast<char *>(m_sflaglist), 1,
00370     static_cast<unsigned>(m_numsflags+ m_numpflags) * sizeof (struct flagent), fpHash)
00371       != (m_numsflags + m_numpflags) * sizeof (struct flagent))
00372     {
00373         fprintf (stderr, LOOKUP_C_BAD_FORMAT);
00374         return (-1);
00375     }
00376     fclose (fpHash);
00377 
00378     {
00379         for (i = m_hashsize, dp = m_hashtbl;  --i >= 0;  dp++)
00380         {
00381             if (dp->word == (char *) -1)
00382                 dp->word = NULL;
00383             else
00384                 dp->word = &m_hashstrings [ reinterpret_cast<size_t>(dp->word) ];
00385             if (dp->next == (struct dent *) -1)
00386                 dp->next = NULL;
00387             else
00388                 dp->next = &m_hashtbl [ reinterpret_cast<size_t>(dp->next) ];
00389         }
00390     }
00391 
00392     for (i = m_numsflags + m_numpflags, entry = m_sflaglist; --i >= 0; entry++)
00393     {
00394         if (entry->stripl)
00395             entry->strip = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->strip)]);
00396         else
00397             entry->strip = NULL;
00398         if (entry->affl)
00399             entry->affix = reinterpret_cast<ichar_t *>(&m_hashstrings[reinterpret_cast<size_t>(entry->affix)]);
00400         else
00401             entry->affix = NULL;
00402     }
00403     /*
00404     ** Warning - 'entry' and 'i' are reset in the body of the loop
00405     ** below.  Don't try to optimize it by (e.g.) moving the decrement
00406     ** of i into the loop condition.
00407     */
00408     for (i = m_numsflags, entry = m_sflaglist;  i > 0;  i--, entry++)
00409     {
00410         if (entry->affl == 0)
00411         {
00412             cp = NULL;
00413             ind = &m_sflagindex[0];
00414             viazero = 1;
00415         }
00416         else
00417         {
00418             cp = entry->affix + entry->affl - 1;
00419             ind = &m_sflagindex[*cp];
00420             viazero = 0;
00421             while (ind->numents == 0  &&  ind->pu.fp != NULL)
00422             {
00423                 if (cp == entry->affix)
00424                 {
00425                     ind = &ind->pu.fp[0];
00426                     viazero = 1;
00427                 }
00428                 else
00429                 {
00430                     ind = &ind->pu.fp[*--cp];
00431                     viazero = 0;
00432                 }
00433             }
00434         }
00435         if (ind->numents == 0)
00436             ind->pu.ent = entry;
00437         ind->numents++;
00438         /*
00439         ** If this index entry has more than MAXSEARCH flags in
00440         ** it, we will split it into subentries to reduce the
00441         ** searching.  However, the split doesn't make sense in
00442         ** two cases:  (a) if we are already at the end of the
00443         ** current affix, or (b) if all the entries in the list
00444         ** have identical affixes.  Since the list is sorted, (b)
00445         ** is true if the first and last affixes in the list
00446         ** are identical.
00447         */
00448         if (!viazero  &&  ind->numents >= MAXSEARCH
00449           &&  icharcmp (entry->affix, ind->pu.ent->affix) != 0)
00450         {
00451             /* Sneaky trick:  back up and reprocess */
00452             entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */
00453             i = m_numsflags - (entry - m_sflaglist);
00454             ind->pu.fp =
00455               (struct flagptr *)
00456             calloc (static_cast<unsigned>(SET_SIZE + m_hashheader.nstrchars),
00457               sizeof (struct flagptr));
00458             if (ind->pu.fp == NULL)
00459             {
00460                 fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
00461                 return (-1);
00462             }
00463             ind->numents = 0;
00464         }
00465     }
00466     /*
00467     ** Warning - 'entry' and 'i' are reset in the body of the loop
00468     ** below.  Don't try to optimize it by (e.g.) moving the decrement
00469     ** of i into the loop condition.
00470     */
00471     for (i = m_numpflags, entry = m_pflaglist;  i > 0;  i--, entry++)
00472     {
00473         if (entry->affl == 0)
00474         {
00475             cp = NULL;
00476             ind = &m_pflagindex[0];
00477             viazero = 1;
00478         }
00479         else
00480         {
00481             cp = entry->affix;
00482             ind = &m_pflagindex[*cp++];
00483             viazero = 0;
00484             while (ind->numents == 0  &&  ind->pu.fp != NULL)
00485             {
00486                 if (*cp == 0)
00487                 {
00488                     ind = &ind->pu.fp[0];
00489                     viazero = 1;
00490                 }
00491                 else
00492                 {
00493                     ind = &ind->pu.fp[*cp++];
00494                     viazero = 0;
00495                 }
00496             }
00497         }
00498         if (ind->numents == 0)
00499             ind->pu.ent = entry;
00500         ind->numents++;
00501         /*
00502         ** If this index entry has more than MAXSEARCH flags in
00503         ** it, we will split it into subentries to reduce the
00504         ** searching.  However, the split doesn't make sense in
00505         ** two cases:  (a) if we are already at the end of the
00506         ** current affix, or (b) if all the entries in the list
00507         ** have identical affixes.  Since the list is sorted, (b)
00508         ** is true if the first and last affixes in the list
00509         ** are identical.
00510         */
00511         if (!viazero  &&  ind->numents >= MAXSEARCH
00512           &&  icharcmp (entry->affix, ind->pu.ent->affix) != 0)
00513         {
00514             /* Sneaky trick:  back up and reprocess */
00515             entry = ind->pu.ent - 1; /* -1 is for entry++ in loop */
00516             i = m_numpflags - (entry - m_pflaglist);
00517             ind->pu.fp =
00518               static_cast<struct flagptr *>(calloc(SET_SIZE + m_hashheader.nstrchars,
00519                 sizeof (struct flagptr)));
00520             if (ind->pu.fp == NULL)
00521             {
00522                 fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
00523                 return (-1);
00524             }
00525             ind->numents = 0;
00526         }
00527     }
00528 #ifdef INDEXDUMP
00529     fprintf (stderr, "Prefix index table:\n");
00530     dumpindex (m_pflagindex, 0);
00531     fprintf (stderr, "Suffix index table:\n");
00532     dumpindex (m_sflagindex, 0);
00533 #endif
00534     if (m_hashheader.nstrchartype == 0)
00535         m_chartypes = NULL;
00536     else
00537     {
00538         m_chartypes = (struct strchartype *)
00539           malloc (m_hashheader.nstrchartype * sizeof (struct strchartype));
00540         if (m_chartypes == NULL)
00541         {
00542             fprintf (stderr, LOOKUP_C_NO_LANG_SPACE);
00543             return (-1);
00544         }
00545         for (i = 0, nextchar = m_hashheader.strtypestart;
00546           i < m_hashheader.nstrchartype;
00547           i++)
00548         {
00549             m_chartypes[i].name = &m_hashstrings[nextchar];
00550             nextchar += strlen (m_chartypes[i].name) + 1;
00551             m_chartypes[i].deformatter = &m_hashstrings[nextchar];
00552             nextchar += strlen (m_chartypes[i].deformatter) + 1;
00553             m_chartypes[i].suffixes = &m_hashstrings[nextchar];
00554             while (m_hashstrings[nextchar] != '\0')
00555                 nextchar += strlen (&m_hashstrings[nextchar]) + 1;
00556             nextchar++;
00557         }
00558     }
00559 
00560     initckch(NULL);   
00561    
00562     return (0);
00563 }
00564 
00565 #ifndef FREEP
00566 #define FREEP(p)    do { if (p) free(p); } while (0)
00567 #endif
00568 
00572 void ISpellChecker::initckch (char *wchars)
00573 {
00574     register ichar_t    c;
00575     char                num[4];
00576 
00577     for (c = 0; c < static_cast<ichar_t>(SET_SIZE+ m_hashheader.nstrchars); ++c)
00578     {
00579         if (iswordch (c))
00580         {
00581             if (!mylower (c))
00582             {
00583                 m_Try[m_Trynum] = c;
00584                 ++m_Trynum;
00585             }
00586         }
00587         else if (isboundarych (c))
00588         {
00589             m_Try[m_Trynum] = c;
00590             ++m_Trynum;
00591         }
00592     }
00593     if (wchars != NULL)
00594     {
00595         while (m_Trynum < SET_SIZE  &&  *wchars != '\0')
00596         {
00597             if (*wchars != 'n'  &&  *wchars != '\\')
00598             {
00599                 c = *wchars;
00600                 ++wchars;
00601             }
00602             else
00603             {
00604                 ++wchars;
00605                 num[0] = '\0';
00606                 num[1] = '\0';
00607                 num[2] = '\0';
00608                 num[3] = '\0';
00609                 if (isdigit (wchars[0]))
00610                 {
00611                     num[0] = wchars[0];
00612                     if (isdigit (wchars[1]))
00613                     {
00614                         num[1] = wchars[1];
00615                         if (isdigit (wchars[2]))
00616                             num[2] = wchars[2];
00617                     }
00618                 }
00619                 if (wchars[-1] == 'n')
00620                 {
00621                     wchars += strlen (num);
00622                     c = atoi (num);
00623                 }
00624                 else
00625                 {
00626                     wchars += strlen (num);
00627                     c = 0;
00628                     if (num[0])
00629                         c = num[0] - '0';
00630                     if (num[1])
00631                     {
00632                         c <<= 3;
00633                         c += num[1] - '0';
00634                     }
00635                     if (num[2])
00636                     {
00637                         c <<= 3;
00638                         c += num[2] - '0';
00639                     }
00640                 }
00641             }
00642 /*          c &= NOPARITY;*/
00643             if (!m_hashheader.wordchars[c])
00644             {
00645                 m_hashheader.wordchars[c] = 1;
00646                 m_hashheader.sortorder[c] = m_hashheader.sortval++;
00647                 m_Try[m_Trynum] = c;
00648                 ++m_Trynum;
00649             }
00650         }
00651     }
00652 }
00653 
00654 /*
00655  * \param indexp
00656  */
00657 void ISpellChecker::clearindex (struct flagptr *indexp)
00658 {
00659     register int        i;
00660     for (i = 0;  i < SET_SIZE + m_hashheader.nstrchars;  i++, indexp++)
00661     {
00662         if (indexp->numents == 0 && indexp->pu.fp != NULL)
00663         {
00664             clearindex(indexp->pu.fp);
00665             free(indexp->pu.fp);
00666         }
00667     }
00668 }
00669     
00670 #ifdef INDEXDUMP
00671 static void dumpindex (indexp, depth)
00672     register struct flagptr *   indexp;
00673     register int        depth;
00674 {
00675     register int        i;
00676     int             j;
00677     int             k;
00678     char            stripbuf[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4];
00679 
00680     for (i = 0;  i < SET_SIZE + hashheader.nstrchars;  i++, indexp++)
00681     {
00682         if (indexp->numents == 0  &&  indexp->pu.fp != NULL)
00683         {
00684             for (j = depth;  --j >= 0;  )
00685                 putc (' ', stderr);
00686             if (i >= ' '  &&  i <= '~')
00687                 putc (i, stderr);
00688             else
00689                 fprintf (stderr, "0x%x", i);
00690             putc ('\n', stderr);
00691             dumpindex (indexp->pu.fp, depth + 1);
00692         }
00693         else if (indexp->numents)
00694         {
00695             for (j = depth;  --j >= 0;  )
00696                 putc (' ', stderr);
00697             if (i >= ' '  &&  i <= '~')
00698                 putc (i, stderr);
00699             else
00700                 fprintf (stderr, "0x%x", i);
00701             fprintf (stderr, " -> %d entries\n", indexp->numents);
00702             for (k = 0;  k < indexp->numents;  k++)
00703             {
00704                 for (j = depth;  --j >= 0;  )
00705                     putc (' ', stderr);
00706                 if (indexp->pu.ent[k].stripl)
00707                 {
00708                     ichartostr (stripbuf, indexp->pu.ent[k].strip,
00709                       sizeof stripbuf, 1);
00710                     fprintf (stderr, "     entry %d (-%s,%s)\n",
00711                       &indexp->pu.ent[k] - sflaglist,
00712                       stripbuf,
00713                       indexp->pu.ent[k].affl
00714                         ? ichartosstr (indexp->pu.ent[k].affix, 1) : "-");
00715                 }
00716                 else
00717                     fprintf (stderr, "     entry %d (%s)\n",
00718                       &indexp->pu.ent[k] - sflaglist,
00719                       ichartosstr (indexp->pu.ent[k].affix, 1));
00720             }
00721         }
00722     }
00723 }
00724 #endif
00725 
00726 /* n is length of s */
00727 
00728 /*
00729  * \param s
00730  * \param dotree
00731  *
00732  * \return
00733  */
00734 struct dent * ISpellChecker::ispell_lookup (ichar_t *s, int dotree)
00735 {
00736     register struct dent *  dp;
00737     register char *     s1;
00738     char            schar[INPUTWORDLEN + MAXAFFIXLEN];
00739 
00740     dp = &m_hashtbl[hash (s, m_hashsize)];
00741     if (ichartostr (schar, s, sizeof schar, 1))
00742         fprintf (stderr, WORD_TOO_LONG (schar));
00743     for (  ;  dp != NULL;  dp = dp->next)
00744     {
00745         /* quick strcmp, but only for equality */
00746         s1 = dp->word;
00747         if (s1  &&  s1[0] == schar[0]  &&  strcmp (s1 + 1, schar + 1) == 0)
00748             return dp;
00749 #ifndef NO_CAPITALIZATION_SUPPORT
00750         while (dp->flagfield & MOREVARIANTS)    /* Skip variations */
00751             dp = dp->next;
00752 #endif
00753     }
00754     return NULL;
00755 }
00756 
00757 void ISpellChecker::alloc_ispell_struct()
00758 {
00759     m_translate_in = 0;
00760 }
00761 
00762 void ISpellChecker::free_ispell_struct()
00763 {
00764 }
KDE Home | KDE Accessibility Home | Description of Access Keys