tgood.cpp

00001 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
00002 /* enchant
00003  * Copyright (C) 2003 Dom Lachowicz
00004  *
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Lesser General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2.1 of the License, or (at your option) any later version.
00009  *
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Lesser General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU Lesser General Public
00016  * License along with this library; if not, write to the
00017  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  * Boston, MA 02110-1301, USA.
00019  *
00020  * In addition, as a special exception, Dom Lachowicz
00021  * gives permission to link the code of this program with
00022  * non-LGPL Spelling Provider libraries (eg: a MSFT Office
00023  * spell checker backend) and distribute linked combinations including
00024  * the two.  You must obey the GNU Lesser General Public License in all
00025  * respects for all of the code used other than said providers.  If you modify
00026  * this file, you may extend this exception to your version of the
00027  * file, but you are not obligated to do so.  If you do not wish to
00028  * do so, delete this exception statement from your version.
00029  */
00030 
00031 /*
00032  * Copyright 1987, 1988, 1989, 1992, 1993, Geoff Kuenning, Granada Hills, CA
00033  * All rights reserved.
00034  *
00035  * Redistribution and use in source and binary forms, with or without
00036  * modification, are permitted provided that the following conditions
00037  * are met:
00038  *
00039  * 1. Redistributions of source code must retain the above copyright
00040  *    notice, this list of conditions and the following disclaimer.
00041  * 2. Redistributions in binary form must reproduce the above copyright
00042  *    notice, this list of conditions and the following disclaimer in the
00043  *    documentation and/or other materials provided with the distribution.
00044  * 3. All modifications to the source code must be clearly marked as
00045  *    such.  Binary redistributions based on modified source code
00046  *    must be clearly marked as modified versions in the documentation
00047  *    and/or other materials provided with the distribution.
00048  * 4. All advertising materials mentioning features or use of this software
00049  *    must display the following acknowledgment:
00050  *      This product includes software developed by Geoff Kuenning and
00051  *      other unpaid contributors.
00052  * 5. The name of Geoff Kuenning may not be used to endorse or promote
00053  *    products derived from this software without specific prior
00054  *    written permission.
00055  *
00056  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
00057  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00058  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00059  * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
00060  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00061  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00062  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00063  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00064  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00065  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00066  * SUCH DAMAGE.
00067  */
00068 
00069 /*
00070  * Table-driven version of good.c.
00071  *
00072  * Geoff Kuenning, July 1987
00073  */
00074 
00075 /*
00076  * $Log$
00077  * Revision 1.1  2004/01/31 16:44:12  zrusin
00078  * ISpell plugin.
00079  *
00080  * Revision 1.4  2003/08/14 17:51:29  dom
00081  * update license - exception clause should be Lesser GPL
00082  *
00083  * Revision 1.3  2003/07/28 20:40:28  dom
00084  * fix up the license clause, further win32-registry proof some directory getting functions
00085  *
00086  * Revision 1.2  2003/07/16 22:52:56  dom
00087  * LGPL + exception license
00088  *
00089  * Revision 1.1  2003/07/15 01:15:09  dom
00090  * ispell enchant backend
00091  *
00092  * Revision 1.2  2003/01/29 05:50:12  hippietrail
00093  *
00094  * Fixed my mess in EncodingManager.
00095  * Changed many C casts to C++ casts.
00096  *
00097  * Revision 1.1  2003/01/24 05:52:36  hippietrail
00098  *
00099  * Refactored ispell code. Old ispell global variables had been put into
00100  * an allocated structure, a pointer to which was passed to many functions.
00101  * I have now made all such functions and variables private members of the
00102  * ISpellChecker class. It was C OO, now it's C++ OO.
00103  *
00104  * I've fixed the makefiles and tested compilation but am unable to test
00105  * operation. Please back out my changes if they cause problems which
00106  * are not obvious or easy to fix.
00107  *
00108  * Revision 1.6  2003/01/06 18:48:42  dom
00109  * ispell cleanup, start of using new 'add' save features
00110  *
00111  * Revision 1.5  2002/09/19 05:31:20  hippietrail
00112  *
00113  * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
00114  * K&R function declarations removed, converted to Doxygen style comments
00115  * where possible.  No code has been changed (I hope).  Compiles for me but
00116  * unable to test.
00117  *
00118  * Revision 1.4  2002/09/17 03:03:31  hippietrail
00119  *
00120  * After seeking permission on the developer list I've reformatted all the
00121  * spelling source which seemed to have parts which used 2, 3, 4, and 8
00122  * spaces for tabs.  It should all look good with our standard 4-space
00123  * tabs now.
00124  * I've concentrated just on indentation in the actual code.  More prettying
00125  * could be done.
00126  * * NO code changes were made *
00127  *
00128  * Revision 1.3  2002/09/13 17:20:14  mpritchett
00129  * Fix more warnings for Linux build
00130  *
00131  * Revision 1.2  2001/05/12 16:05:42  thomasf
00132  * Big pseudo changes to ispell to make it pass around a structure rather
00133  * than rely on all sorts of gloabals willy nilly here and there.  Also
00134  * fixed our spelling class to work with accepting suggestions once more.
00135  * This code is dirty, gross and ugly (not to mention still not supporting
00136  * multiple hash sized just yet) but it works on my machine and will no
00137  * doubt break other machines.
00138  *
00139  * Revision 1.1  2001/04/15 16:01:24  tomas_f
00140  * moving to spell/xp
00141  *
00142  * Revision 1.7  1999/10/20 06:03:56  sterwill
00143  * Changed C++-style comments to C-style comments in C code.
00144  *
00145  * Revision 1.6  1999/10/20 03:19:35  paul
00146  * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary.  It ain't pretty, but at least we don't crash there any more.
00147  *
00148  * Revision 1.5  1999/04/13 17:12:51  jeff
00149  * Applied "Darren O. Benham" <gecko@benham.net> spell check changes.
00150  * Fixed crash on Win32 with the new code.
00151  *
00152  * Revision 1.4  1998/12/29 14:55:33  eric
00153  *
00154  * I've doctored the ispell code pretty extensively here.  It is now
00155  * warning-free on Win32.  It also *works* on Win32 now, since I
00156  * replaced all the I/O calls with ANSI standard ones.
00157  *
00158  * Revision 1.4  1998/12/29 14:55:33  eric
00159  *
00160  * I've doctored the ispell code pretty extensively here.  It is now
00161  * warning-free on Win32.  It also *works* on Win32 now, since I
00162  * replaced all the I/O calls with ANSI standard ones.
00163  *
00164  * Revision 1.3  1998/12/28 23:11:30  eric
00165  *
00166  * modified spell code and integration to build on Windows.
00167  * This is still a hack.
00168  *
00169  * Actually, it doesn't yet WORK on Windows.  It just builds.
00170  * SpellCheckInit is failing for some reason.
00171  *
00172  * Revision 1.2  1998/12/28 22:16:22  eric
00173  *
00174  * These changes begin to incorporate the spell checker into AbiWord.  Most
00175  * of this is a hack.
00176  *
00177  * 1.  added other/spell to the -I list in config/abi_defs
00178  * 2.  replaced other/spell/Makefile with one which is more like
00179  *  our build system.
00180  * 3.  added other/spell to other/Makefile so that the build will now
00181  *  dive down and build the spell check library.
00182  * 4.  added the AbiSpell library to the Makefiles in wp/main
00183  * 5.  added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp.
00184  *  This call is a HACK and should be replaced with something
00185  *  proper later.
00186  * 6.  added code to fv_View.cpp as follows:
00187  *  whenever you double-click on a word, the spell checker
00188  *  verifies that word and prints its status to stdout.
00189  *
00190  * Caveats:
00191  * 1.  This will break the Windows build.  I'm going to work on fixing it
00192  *  now.
00193  * 2.  This only works if your dictionary is in /usr/lib/ispell/american.hash.
00194  *  The dictionary location is currently hard-coded.  This will be
00195  *  fixed as well.
00196  *
00197  * Anyway, such as it is, it works.
00198  *
00199  * Revision 1.1  1998/12/28 18:04:43  davet
00200  * Spell checker code stripped from ispell.  At this point, there are
00201  * two external routines...  the Init routine, and a check-a-word routine
00202  * which returns a boolean value, and takes a 16 bit char string.
00203  * The code resembles the ispell code as much as possible still.
00204  *
00205  * Revision 1.32  1994/11/02  06:56:16  geoff
00206  * Remove the anyword feature, which I've decided is a bad idea.
00207  *
00208  * Revision 1.31  1994/10/25  05:46:25  geoff
00209  * Add support for the FF_ANYWORD (affix applies to all words, even if
00210  * flag bit isn't set) flag option.
00211  *
00212  * Revision 1.30  1994/05/24  06:23:08  geoff
00213  * Don't create a hit if "allhits" is clear and capitalization
00214  * mismatches.  This cures a bug where a word could be in the dictionary
00215  * and yet not found.
00216  *
00217  * Revision 1.29  1994/05/17  06:44:21  geoff
00218  * Add support for controlled compound formation and the COMPOUNDONLY
00219  * option to affix flags.
00220  *
00221  * Revision 1.28  1994/01/25  07:12:13  geoff
00222  * Get rid of all old RCS log lines in preparation for the 3.1 release.
00223  *
00224  */
00225 
00226 #include <ctype.h>
00227 #include <stdlib.h>
00228 #include <string.h>
00229 
00230 #include "ispell_checker.h"
00231 
00243 void ISpellChecker::chk_aff (ichar_t *word, ichar_t *ucword, 
00244               int len, int ignoreflagbits, int allhits, int pfxopts, int sfxopts)
00245 {
00246     register ichar_t *  cp;     /* Pointer to char to index on */
00247     struct flagptr *    ind;        /* Flag index table to test */
00248 
00249     pfx_list_chk (word, ucword, len, pfxopts, sfxopts, &m_pflagindex[0],
00250       ignoreflagbits, allhits);
00251     cp = ucword;
00252     /* HACK: bail on unrecognized chars */
00253     if (*cp >= (SET_SIZE + MAXSTRINGCHARS))
00254         return;
00255     ind = &m_pflagindex[*cp++];
00256     while (ind->numents == 0  &&  ind->pu.fp != NULL)
00257     {
00258         if (*cp == 0)
00259             return;
00260         if (ind->pu.fp[0].numents)
00261         {
00262             pfx_list_chk (word, ucword, len, pfxopts, sfxopts, &ind->pu.fp[0],
00263               ignoreflagbits, allhits);
00264             if (m_numhits  &&  !allhits  &&  /* !cflag  && */  !ignoreflagbits)
00265                 return;
00266         }
00267         /* HACK: bail on unrecognized chars */
00268         if (*cp >= (SET_SIZE + MAXSTRINGCHARS))
00269             return;
00270         ind = &ind->pu.fp[*cp++];
00271     }
00272     pfx_list_chk (word, ucword, len, pfxopts, sfxopts, ind, ignoreflagbits,
00273       allhits);
00274     if (m_numhits  &&  !allhits  &&  /* !cflag  &&*/  !ignoreflagbits)
00275         return;
00276     chk_suf (word, ucword, len, sfxopts, static_cast<struct flagent *>(NULL),
00277       ignoreflagbits, allhits);
00278 }
00279 
00292 void ISpellChecker::pfx_list_chk (ichar_t *word, ichar_t *ucword, int len, int optflags, 
00293                     int sfxopts, struct flagptr * ind, int ignoreflagbits, int allhits)
00294 {
00295     int         cond;       /* Condition number */
00296     register ichar_t *  cp;     /* Pointer into end of ucword */
00297     struct dent *   dent;       /* Dictionary entry we found */
00298     int         entcount;   /* Number of entries to process */
00299     register struct flagent *
00300             flent;      /* Current table entry */
00301     int         preadd;     /* Length added to tword2 as prefix */
00302     register int    tlen;       /* Length of tword */
00303     ichar_t     tword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; /* Tmp cpy */
00304     ichar_t     tword2[sizeof tword]; /* 2nd copy for ins_root_cap */
00305 
00306     for (flent = ind->pu.ent, entcount = ind->numents;
00307       entcount > 0;
00308       flent++, entcount--)
00309     {
00310         /*
00311          * If this is a compound-only affix, ignore it unless we're
00312          * looking for that specific thing.
00313          */
00314         if ((flent->flagflags & FF_COMPOUNDONLY) != 0
00315           &&  (optflags & FF_COMPOUNDONLY) == 0)
00316             continue;
00317 
00318         /*
00319          * See if the prefix matches.
00320          */
00321         tlen = len - flent->affl;
00322         if (tlen > 0
00323           &&  (flent->affl == 0
00324             ||  icharncmp (flent->affix, ucword, flent->affl) == 0)
00325           &&  tlen + flent->stripl >= flent->numconds)
00326         {
00327             /*
00328              * The prefix matches.  Remove it, replace it by the "strip"
00329              * string (if any), and check the original conditions.
00330              */
00331             if (flent->stripl)
00332                 icharcpy (tword, flent->strip);
00333             icharcpy (tword + flent->stripl, ucword + flent->affl);
00334             cp = tword;
00335             for (cond = 0;  cond < flent->numconds;  cond++)
00336             {
00337                 if ((flent->conds[*cp++] & (1 << cond)) == 0)
00338                     break;
00339             }
00340             if (cond >= flent->numconds)
00341             {
00342                 /*
00343                  * The conditions match.  See if the word is in the
00344                  * dictionary.
00345                  */
00346                 tlen += flent->stripl;
00347 
00348                 if (ignoreflagbits)
00349                 {
00350                     if ((dent = ispell_lookup (tword, 1)) != NULL)
00351                     {
00352                         cp = tword2;
00353                         if (flent->affl)
00354                         {
00355                             icharcpy (cp, flent->affix);
00356                             cp += flent->affl;
00357                             *cp++ = '+';
00358                         }
00359                         preadd = cp - tword2;
00360                         icharcpy (cp, tword);
00361                         cp += tlen;
00362                         if (flent->stripl)
00363                         {
00364                             *cp++ = '-';
00365                             icharcpy (cp, flent->strip);
00366                         }
00367                     }
00368                 }
00369                 else if ((dent = ispell_lookup (tword, 1)) != NULL
00370                   &&  TSTMASKBIT (dent->mask, flent->flagbit))
00371                 {
00372                     if (m_numhits < MAX_HITS)
00373                     {
00374                         m_hits[m_numhits].dictent = dent;
00375                         m_hits[m_numhits].prefix = flent;
00376                         m_hits[m_numhits].suffix = NULL;
00377                         m_numhits++;
00378                     }
00379                     if (!allhits)
00380                     {
00381 #ifndef NO_CAPITALIZATION_SUPPORT
00382                         if (cap_ok (word, &m_hits[0], len))
00383                             return;
00384                         m_numhits = 0;
00385 #else /* NO_CAPITALIZATION_SUPPORT */
00386                         return;
00387 #endif /* NO_CAPITALIZATION_SUPPORT */
00388                     }
00389                 }
00390                 /*
00391                  * Handle cross-products.
00392                  */
00393                 if (flent->flagflags & FF_CROSSPRODUCT)
00394                         chk_suf (word, tword, tlen, sfxopts | FF_CROSSPRODUCT,
00395                     flent, ignoreflagbits, allhits);
00396             }
00397         }
00398     }
00399 }
00400 
00412 void
00413 ISpellChecker::chk_suf (ichar_t *word, ichar_t *ucword, 
00414                     int len, int optflags, struct flagent *pfxent, 
00415                     int ignoreflagbits, int allhits)
00416 {
00417     register ichar_t *  cp;     /* Pointer to char to index on */
00418     struct flagptr *    ind;        /* Flag index table to test */
00419 
00420     suf_list_chk (word, ucword, len, &m_sflagindex[0], optflags, pfxent,
00421       ignoreflagbits, allhits);
00422     cp = ucword + len - 1;
00423     /* HACK: bail on unrecognized chars */
00424     if (*cp >= (SET_SIZE + MAXSTRINGCHARS))
00425         return;
00426     ind = &m_sflagindex[*cp];
00427     while (ind->numents == 0  &&  ind->pu.fp != NULL)
00428     {
00429         if (cp == ucword)
00430             return;
00431         if (ind->pu.fp[0].numents)
00432         {
00433             suf_list_chk (word, ucword, len, &ind->pu.fp[0],
00434               optflags, pfxent, ignoreflagbits, allhits);
00435             if (m_numhits != 0  &&  !allhits  &&  /* !cflag  && */  !ignoreflagbits)
00436                 return;
00437         }
00438         /* HACK: bail on unrecognized chars */
00439         if (*(cp-1) >= (SET_SIZE + MAXSTRINGCHARS))
00440             return;
00441         ind = &ind->pu.fp[*--cp];
00442     }
00443     suf_list_chk (word, ucword, len, ind, optflags, pfxent,
00444       ignoreflagbits, allhits);
00445 }
00446     
00457 void ISpellChecker::suf_list_chk (ichar_t *word, ichar_t *ucword, 
00458                           int len, struct flagptr *ind, int optflags, 
00459                           struct flagent *pfxent, int ignoreflagbits, int allhits)
00460 {
00461     register ichar_t *  cp;     /* Pointer into end of ucword */
00462     int         cond;       /* Condition number */
00463     struct dent *   dent;       /* Dictionary entry we found */
00464     int         entcount;   /* Number of entries to process */
00465     register struct flagent *
00466             flent;      /* Current table entry */
00467     int         preadd;     /* Length added to tword2 as prefix */
00468     register int    tlen;       /* Length of tword */
00469     ichar_t     tword[INPUTWORDLEN + 4 * MAXAFFIXLEN + 4]; /* Tmp cpy */
00470     ichar_t     tword2[sizeof tword]; /* 2nd copy for ins_root_cap */
00471 
00472     icharcpy (tword, ucword);
00473     for (flent = ind->pu.ent, entcount = ind->numents;
00474       entcount > 0;
00475       flent++, entcount--)
00476     {
00477         if ((optflags & FF_CROSSPRODUCT) != 0
00478           &&  (flent->flagflags & FF_CROSSPRODUCT) == 0)
00479             continue;
00480         /*
00481          * If this is a compound-only affix, ignore it unless we're
00482          * looking for that specific thing.
00483          */
00484         if ((flent->flagflags & FF_COMPOUNDONLY) != 0
00485           &&  (optflags & FF_COMPOUNDONLY) == 0)
00486             continue;
00487 
00488         /*
00489          * See if the suffix matches.
00490          */
00491         tlen = len - flent->affl;
00492         if (tlen > 0
00493           &&  (flent->affl == 0
00494             ||  icharcmp (flent->affix, ucword + tlen) == 0)
00495           &&  tlen + flent->stripl >= flent->numconds)
00496         {
00497             /*
00498              * The suffix matches.  Remove it, replace it by the "strip"
00499              * string (if any), and check the original conditions.
00500              */
00501             icharcpy (tword, ucword);
00502             cp = tword + tlen;
00503             if (flent->stripl)
00504             {
00505                 icharcpy (cp, flent->strip);
00506                 tlen += flent->stripl;
00507                 cp = tword + tlen;
00508             }
00509             else
00510                 *cp = '\0';
00511             for (cond = flent->numconds;  --cond >= 0;  )
00512             {
00513                 if ((flent->conds[*--cp] & (1 << cond)) == 0)
00514                     break;
00515             }
00516             if (cond < 0)
00517             {
00518                 /*
00519                  * The conditions match.  See if the word is in the
00520                  * dictionary.
00521                  */
00522                 if (ignoreflagbits)
00523                 {
00524                     if ((dent = ispell_lookup (tword, 1)) != NULL)
00525                     {
00526                         cp = tword2;
00527                         if ((optflags & FF_CROSSPRODUCT)
00528                           &&  pfxent->affl != 0)
00529                         {
00530                             icharcpy (cp, pfxent->affix);
00531                             cp += pfxent->affl;
00532                             *cp++ = '+';
00533                         }
00534                         preadd = cp - tword2;
00535                         icharcpy (cp, tword);
00536                         cp += tlen;
00537                         if ((optflags & FF_CROSSPRODUCT)
00538                           &&  pfxent->stripl != 0)
00539                         {
00540                             *cp++ = '-';
00541                             icharcpy (cp, pfxent->strip);
00542                             cp += pfxent->stripl;
00543                         }
00544                         if (flent->stripl)
00545                         {
00546                             *cp++ = '-';
00547                             icharcpy (cp, flent->strip);
00548                             cp += flent->stripl;
00549                         }
00550                         if (flent->affl)
00551                         {
00552                             *cp++ = '+';
00553                             icharcpy (cp, flent->affix);
00554                             cp += flent->affl;
00555                         }
00556                     }
00557                 }
00558                 else if ((dent = ispell_lookup (tword, 1)) != NULL
00559                   &&  TSTMASKBIT (dent->mask, flent->flagbit)
00560                   &&  ((optflags & FF_CROSSPRODUCT) == 0
00561                     || TSTMASKBIT (dent->mask, pfxent->flagbit)))
00562                 {
00563                     if (m_numhits < MAX_HITS)
00564                     {
00565                         m_hits[m_numhits].dictent = dent;
00566                         m_hits[m_numhits].prefix = pfxent;
00567                         m_hits[m_numhits].suffix = flent;
00568                         m_numhits++;
00569                     }
00570                     if (!allhits)
00571                     {
00572 #ifndef NO_CAPITALIZATION_SUPPORT
00573                         if (cap_ok (word, &m_hits[0], len))
00574                             return;
00575                         m_numhits = 0;
00576 #else /* NO_CAPITALIZATION_SUPPORT */
00577                         return;
00578 #endif /* NO_CAPITALIZATION_SUPPORT */
00579                     }
00580                 }
00581             }
00582         }
00583     }
00584 }
00585 
00597 int ISpellChecker::expand_pre (char *croot, ichar_t *rootword, MASKTYPE mask[], 
00598                 int option, char *extra)
00599 {
00600     int             entcount;   /* No. of entries to process */
00601     int             explength;  /* Length of expansions */
00602     register struct flagent *
00603                 flent;      /* Current table entry */
00604 
00605     for (flent = m_pflaglist, entcount = m_numpflags, explength = 0;
00606       entcount > 0;
00607       flent++, entcount--)
00608     {
00609         if (TSTMASKBIT (mask, flent->flagbit))
00610             explength +=
00611               pr_pre_expansion (croot, rootword, flent, mask, option, extra);
00612     }
00613     return explength;
00614 }
00615 
00628 int ISpellChecker::pr_pre_expansion ( char *croot, ichar_t *rootword, 
00629                             struct flagent *flent, MASKTYPE mask[], int option, 
00630                             char *extra)
00631 {
00632     int             cond;       /* Current condition number */
00633     register ichar_t *      nextc;      /* Next case choice */
00634     int             tlen;       /* Length of tword */
00635     ichar_t         tword[INPUTWORDLEN + MAXAFFIXLEN]; /* Temp */
00636 
00637     tlen = icharlen (rootword);
00638     if (flent->numconds > tlen)
00639         return 0;
00640     tlen -= flent->stripl;
00641     if (tlen <= 0)
00642         return 0;
00643     tlen += flent->affl;
00644     for (cond = 0, nextc = rootword;  cond < flent->numconds;  cond++)
00645     {
00646         if ((flent->conds[mytoupper (*nextc++)] & (1 << cond)) == 0)
00647             return 0;
00648     }
00649     /*
00650      * The conditions are satisfied.  Copy the word, add the prefix,
00651      * and make it the proper case.   This code is carefully written
00652      * to match that ins_cap and cap_ok.  Note that the affix, as
00653      * inserted, is uppercase.
00654      *
00655      * There is a tricky bit here:  if the root is capitalized, we
00656      * want a capitalized result.  If the root is followcase, however,
00657      * we want to duplicate the case of the first remaining letter
00658      * of the root.  In other words, "Loved/U" should generate "Unloved",
00659      * but "LOved/U" should generate "UNLOved" and "lOved/U" should
00660      * produce "unlOved".
00661      */
00662     if (flent->affl)
00663     {
00664         icharcpy (tword, flent->affix);
00665         nextc = tword + flent->affl;
00666     }
00667     icharcpy (nextc, rootword + flent->stripl);
00668     if (myupper (rootword[0]))
00669     {
00670         /* We must distinguish followcase from capitalized and all-upper */
00671         for (nextc = rootword + 1;  *nextc;  nextc++)
00672         {
00673             if (!myupper (*nextc))
00674                 break;
00675         }
00676         if (*nextc)
00677         {
00678             /* It's a followcase or capitalized word.  Figure out which. */
00679             for (  ;  *nextc;  nextc++)
00680             {
00681                 if (myupper (*nextc))
00682                     break;
00683             }
00684             if (*nextc)
00685             {
00686                 /* It's followcase. */
00687                 if (!myupper (tword[flent->affl]))
00688                     forcelc (tword, flent->affl);
00689             }
00690             else
00691             {
00692                 /* It's capitalized */
00693                 forcelc (tword + 1, tlen - 1);
00694             }
00695         }
00696     }
00697     else
00698     {
00699         /* Followcase or all-lower, we don't care which */
00700         if (!myupper (*nextc))
00701             forcelc (tword, flent->affl);
00702     }
00703     if (option == 3)
00704         printf ("\n%s", croot);
00705     if (option != 4)
00706         printf (" %s%s", ichartosstr (tword, 1), extra);
00707     if (flent->flagflags & FF_CROSSPRODUCT)
00708         return tlen
00709           + expand_suf (croot, tword, mask, FF_CROSSPRODUCT, option, extra);
00710     else
00711         return tlen;
00712 }
00713 
00726 int ISpellChecker::expand_suf (char *croot, ichar_t *rootword, MASKTYPE mask[], 
00727                 int optflags, int option, char *extra)
00728 {
00729     int             entcount;   /* No. of entries to process */
00730     int             explength;  /* Length of expansions */
00731     register struct flagent *
00732                 flent;      /* Current table entry */
00733 
00734     for (flent = m_sflaglist, entcount = m_numsflags, explength = 0;
00735       entcount > 0;
00736       flent++, entcount--)
00737     {
00738         if (TSTMASKBIT (mask, flent->flagbit))
00739         {
00740             if ((optflags & FF_CROSSPRODUCT) == 0
00741               ||  (flent->flagflags & FF_CROSSPRODUCT))
00742             explength +=
00743               pr_suf_expansion (croot, rootword, flent, option, extra);
00744         }
00745     }
00746     return explength;
00747 }
00748 
00760 int ISpellChecker::pr_suf_expansion (char *croot, ichar_t *rootword, 
00761                             struct flagent *flent, int option, char *extra)
00762 {
00763     int             cond;       /* Current condition number */
00764     register ichar_t *      nextc;      /* Next case choice */
00765     int             tlen;       /* Length of tword */
00766     ichar_t         tword[INPUTWORDLEN + MAXAFFIXLEN]; /* Temp */
00767 
00768     tlen = icharlen (rootword);
00769     cond = flent->numconds;
00770     if (cond > tlen)
00771         return 0;
00772     if (tlen - flent->stripl <= 0)
00773         return 0;
00774     for (nextc = rootword + tlen;  --cond >= 0;  )
00775     {
00776         if ((flent->conds[mytoupper (*--nextc)] & (1 << cond)) == 0)
00777             return 0;
00778     }
00779     /*
00780      * The conditions are satisfied.  Copy the word, add the suffix,
00781      * and make it match the case of the last remaining character of the
00782      * root.  Again, this code carefully matches ins_cap and cap_ok.
00783      */
00784     icharcpy (tword, rootword);
00785     nextc = tword + tlen - flent->stripl;
00786     if (flent->affl)
00787     {
00788         icharcpy (nextc, flent->affix);
00789         if (!myupper (nextc[-1]))
00790             forcelc (nextc, flent->affl);
00791     }
00792     else
00793         *nextc = 0;
00794     if (option == 3)
00795         printf ("\n%s", croot);
00796     if (option != 4)
00797         printf (" %s%s", ichartosstr (tword, 1), extra);
00798     return tlen + flent->affl - flent->stripl;
00799 }
00800 
00805 void ISpellChecker::forcelc (ichar_t *dst, int len)         /* Force to lowercase */
00806 {
00807 
00808     for (  ;  --len >= 0;  dst++)
00809         *dst = mytolower (*dst);
00810 }
KDE Home | KDE Accessibility Home | Description of Access Keys