good.cpp

00001 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
00002 /* enchant
00003  * Copyright (C) 2003 Dom Lachowicz
00004  *
00005  * This library is free software; you can redistribute it and/or
00006  * modify it under the terms of the GNU Lesser General Public
00007  * License as published by the Free Software Foundation; either
00008  * version 2.1 of the License, or (at your option) any later version.
00009  *
00010  * This library is distributed in the hope that it will be useful,
00011  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  * Lesser General Public License for more details.
00014  *
00015  * You should have received a copy of the GNU Lesser General Public
00016  * License along with this library; if not, write to the
00017  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  * Boston, MA 02110-1301, USA.
00019  *
00020  * In addition, as a special exception, Dom Lachowicz
00021  * gives permission to link the code of this program with
00022  * non-LGPL Spelling Provider libraries (eg: a MSFT Office
00023  * spell checker backend) and distribute linked combinations including
00024  * the two.  You must obey the GNU Lesser General Public License in all
00025  * respects for all of the code used other than said providers.  If you modify
00026  * this file, you may extend this exception to your version of the
00027  * file, but you are not obligated to do so.  If you do not wish to
00028  * do so, delete this exception statement from your version.
00029  */
00030 
00031 /*
00032  * good.c - see if a word or its root word
00033  * is in the dictionary.
00034  *
00035  * Pace Willisson, 1983
00036  *
00037  * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA
00038  * All rights reserved.
00039  *
00040  * Redistribution and use in source and binary forms, with or without
00041  * modification, are permitted provided that the following conditions
00042  * are met:
00043  *
00044  * 1. Redistributions of source code must retain the above copyright
00045  *    notice, this list of conditions and the following disclaimer.
00046  * 2. Redistributions in binary form must reproduce the above copyright
00047  *    notice, this list of conditions and the following disclaimer in the
00048  *    documentation and/or other materials provided with the distribution.
00049  * 3. All modifications to the source code must be clearly marked as
00050  *    such.  Binary redistributions based on modified source code
00051  *    must be clearly marked as modified versions in the documentation
00052  *    and/or other materials provided with the distribution.
00053  * 4. All advertising materials mentioning features or use of this software
00054  *    must display the following acknowledgment:
00055  *      This product includes software developed by Geoff Kuenning and
00056  *      other unpaid contributors.
00057  * 5. The name of Geoff Kuenning may not be used to endorse or promote
00058  *    products derived from this software without specific prior
00059  *    written permission.
00060  *
00061  * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND
00062  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00063  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00064  * ARE DISCLAIMED.  IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE
00065  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
00066  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
00067  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
00068  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
00069  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
00070  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
00071  * SUCH DAMAGE.
00072  */
00073 
00074 /*
00075  * $Log$
00076  * Revision 1.1  2004/01/31 16:44:12  zrusin
00077  * ISpell plugin.
00078  *
00079  * Revision 1.4  2003/08/14 17:51:26  dom
00080  * update license - exception clause should be Lesser GPL
00081  *
00082  * Revision 1.3  2003/07/28 20:40:25  dom
00083  * fix up the license clause, further win32-registry proof some directory getting functions
00084  *
00085  * Revision 1.2  2003/07/16 22:52:37  dom
00086  * LGPL + exception license
00087  *
00088  * Revision 1.1  2003/07/15 01:15:04  dom
00089  * ispell enchant backend
00090  *
00091  * Revision 1.2  2003/01/29 05:50:11  hippietrail
00092  *
00093  * Fixed my mess in EncodingManager.
00094  * Changed many C casts to C++ casts.
00095  *
00096  * Revision 1.1  2003/01/24 05:52:32  hippietrail
00097  *
00098  * Refactored ispell code. Old ispell global variables had been put into
00099  * an allocated structure, a pointer to which was passed to many functions.
00100  * I have now made all such functions and variables private members of the
00101  * ISpellChecker class. It was C OO, now it's C++ OO.
00102  *
00103  * I've fixed the makefiles and tested compilation but am unable to test
00104  * operation. Please back out my changes if they cause problems which
00105  * are not obvious or easy to fix.
00106  *
00107  * Revision 1.6  2003/01/06 18:48:38  dom
00108  * ispell cleanup, start of using new 'add' save features
00109  *
00110  * Revision 1.5  2002/09/19 05:31:15  hippietrail
00111  *
00112  * More Ispell cleanup.  Conditional globals and DEREF macros are removed.
00113  * K&R function declarations removed, converted to Doxygen style comments
00114  * where possible.  No code has been changed (I hope).  Compiles for me but
00115  * unable to test.
00116  *
00117  * Revision 1.4  2002/09/17 03:03:29  hippietrail
00118  *
00119  * After seeking permission on the developer list I've reformatted all the
00120  * spelling source which seemed to have parts which used 2, 3, 4, and 8
00121  * spaces for tabs.  It should all look good with our standard 4-space
00122  * tabs now.
00123  * I've concentrated just on indentation in the actual code.  More prettying
00124  * could be done.
00125  * * NO code changes were made *
00126  *
00127  * Revision 1.3  2002/09/13 17:20:12  mpritchett
00128  * Fix more warnings for Linux build
00129  *
00130  * Revision 1.2  2001/05/12 16:05:42  thomasf
00131  * Big pseudo changes to ispell to make it pass around a structure rather
00132  * than rely on all sorts of gloabals willy nilly here and there.  Also
00133  * fixed our spelling class to work with accepting suggestions once more.
00134  * This code is dirty, gross and ugly (not to mention still not supporting
00135  * multiple hash sized just yet) but it works on my machine and will no
00136  * doubt break other machines.
00137  *
00138  * Revision 1.1  2001/04/15 16:01:24  tomas_f
00139  * moving to spell/xp
00140  *
00141  * Revision 1.5  2000/02/09 22:35:25  sterwill
00142  * Clean up some warnings
00143  *
00144  * Revision 1.4  1998/12/29 14:55:32  eric
00145  *
00146  * I've doctored the ispell code pretty extensively here.  It is now
00147  * warning-free on Win32.  It also *works* on Win32 now, since I
00148  * replaced all the I/O calls with ANSI standard ones.
00149  *
00150  * Revision 1.3  1998/12/28 23:11:30  eric
00151  *
00152  * modified spell code and integration to build on Windows.
00153  * This is still a hack.
00154  *
00155  * Actually, it doesn't yet WORK on Windows.  It just builds.
00156  * SpellCheckInit is failing for some reason.
00157  *
00158  * Revision 1.2  1998/12/28 22:16:22  eric
00159  *
00160  * These changes begin to incorporate the spell checker into AbiWord.  Most
00161  * of this is a hack.
00162  *
00163  * 1.  added other/spell to the -I list in config/abi_defs
00164  * 2.  replaced other/spell/Makefile with one which is more like
00165  *  our build system.
00166  * 3.  added other/spell to other/Makefile so that the build will now
00167  *  dive down and build the spell check library.
00168  * 4.  added the AbiSpell library to the Makefiles in wp/main
00169  * 5.  added a call to SpellCheckInit in wp/main/unix/UnixMain.cpp.
00170  *  This call is a HACK and should be replaced with something
00171  *  proper later.
00172  * 6.  added code to fv_View.cpp as follows:
00173  *  whenever you double-click on a word, the spell checker
00174  *  verifies that word and prints its status to stdout.
00175  *
00176  * Caveats:
00177  * 1.  This will break the Windows build.  I'm going to work on fixing it
00178  *  now.
00179  * 2.  This only works if your dictionary is in /usr/lib/ispell/american.hash.
00180  *  The dictionary location is currently hard-coded.  This will be
00181  *  fixed as well.
00182  *
00183  * Anyway, such as it is, it works.
00184  *
00185  * Revision 1.1  1998/12/28 18:04:43  davet
00186  * Spell checker code stripped from ispell.  At this point, there are
00187  * two external routines...  the Init routine, and a check-a-word routine
00188  * which returns a boolean value, and takes a 16 bit char string.
00189  * The code resembles the ispell code as much as possible still.
00190  *
00191  * Revision 1.43  1994/11/02  06:56:05  geoff
00192  * Remove the anyword feature, which I've decided is a bad idea.
00193  *
00194  * Revision 1.42  1994/10/25  05:45:59  geoff
00195  * Add support for an affix that will work with any word, even if there's
00196  * no explicit flag.
00197  *
00198  * Revision 1.41  1994/05/24  06:23:06  geoff
00199  * Let tgood decide capitalization questions, rather than doing it ourselves.
00200  *
00201  * Revision 1.40  1994/05/17  06:44:10  geoff
00202  * Add support for controlled compound formation and the COMPOUNDONLY
00203  * option to affix flags.
00204  *
00205  * Revision 1.39  1994/01/25  07:11:31  geoff
00206  * Get rid of all old RCS log lines in preparation for the 3.1 release.
00207  *
00208  */
00209 
00210 #include <ctype.h>
00211 #include <stdio.h>
00212 #include <stdlib.h>
00213 #include <string.h>
00214 
00215 #include "ispell_checker.h"
00216 
00217 
00218 int     good P ((ichar_t * word, int ignoreflagbits, int allhits,
00219              int pfxopts, int sfxopts));
00220 
00221 #ifndef NO_CAPITALIZATION_SUPPORT
00222 
00232 static int entryhasaffixes (struct dent *dent, struct success *hit)
00233 {
00234     if (hit->prefix  &&  !TSTMASKBIT (dent->mask, hit->prefix->flagbit))
00235         return 0;
00236     if (hit->suffix  &&  !TSTMASKBIT (dent->mask, hit->suffix->flagbit))
00237         return 0;
00238     return 1;           /* Yes, these affixes are legal */
00239 }
00240 
00241 /*
00242  * \param word
00243  * \param hit
00244  * \param len
00245  *
00246  * \return
00247  */
00248 int ISpellChecker::cap_ok (ichar_t *word, struct success *hit, int len)
00249 {
00250     register ichar_t *      dword;
00251     register ichar_t *      w;
00252     register struct dent *  dent;
00253     ichar_t         dentword[INPUTWORDLEN + MAXAFFIXLEN];
00254     int             preadd;
00255     int             prestrip;
00256     int             sufadd;
00257     ichar_t *       limit;
00258     long            thiscap;
00259     long            dentcap;
00260 
00261     thiscap = whatcap (word);
00262     /*
00263     ** All caps is always legal, regardless of affixes.
00264     */
00265     preadd = prestrip = sufadd = 0;
00266     if (thiscap == ALLCAPS)
00267         return 1;
00268     else if (thiscap == FOLLOWCASE)
00269     {
00270         /* Set up some constants for the while(1) loop below */
00271         if (hit->prefix)
00272         {
00273             preadd = hit->prefix->affl;
00274             prestrip = hit->prefix->stripl;
00275         }
00276         else
00277             preadd = prestrip = 0;
00278         sufadd = hit->suffix ? hit->suffix->affl : 0;
00279     }
00280     /*
00281     ** Search the variants for one that matches what we have.  Note
00282     ** that thiscap can't be ALLCAPS, since we already returned
00283     ** for that case.
00284     */
00285     dent = hit->dictent;
00286     for (  ;  ;  )
00287     {
00288         dentcap = captype (dent->flagfield);
00289         if (dentcap != thiscap)
00290         {
00291             if (dentcap == ANYCASE  &&  thiscap == CAPITALIZED
00292              &&  entryhasaffixes (dent, hit))
00293                 return 1;
00294         }
00295         else                /* captypes match */
00296         {
00297             if (thiscap != FOLLOWCASE)
00298             {
00299                 if (entryhasaffixes (dent, hit))
00300                     return 1;
00301             }
00302             else
00303             {
00304                 /*
00305                 ** Make sure followcase matches exactly.
00306                 ** Life is made more difficult by the
00307                 ** possibility of affixes.  Start with
00308                 ** the prefix.
00309                 */
00310                 strtoichar (dentword, dent->word, INPUTWORDLEN, 1);
00311                 dword = dentword;
00312                 limit = word + preadd;
00313                 if (myupper (dword[prestrip]))
00314                 {
00315                     for (w = word;  w < limit;  w++)
00316                     {
00317                         if (mylower (*w))
00318                             goto doublecontinue;
00319                     }
00320                 }
00321                 else
00322                 {
00323                     for (w = word;  w < limit;  w++)
00324                     {
00325                         if (myupper (*w))
00326                             goto doublecontinue;
00327                     }
00328                 }
00329                 dword += prestrip;
00330                 /* Do root part of word */
00331                 limit = dword + len - preadd - sufadd;
00332                 while (dword < limit)
00333                 {
00334                     if (*dword++ != *w++)
00335                         goto doublecontinue;
00336                 }
00337                 /* Do suffix */
00338                 dword = limit - 1;
00339                 if (myupper (*dword))
00340                 {
00341                     for (  ;  *w;  w++)
00342                     {
00343                         if (mylower (*w))
00344                             goto doublecontinue;
00345                     }
00346                 }
00347                 else
00348                 {
00349                     for (  ;  *w;  w++)
00350                     {
00351                         if (myupper (*w))
00352                             goto doublecontinue;
00353                     }
00354                 }
00355                 /*
00356                 ** All failure paths go to "doublecontinue,"
00357                 ** so if we get here it must match.
00358                 */
00359                 if (entryhasaffixes (dent, hit))
00360                     return 1;
00361                 doublecontinue: ;
00362             }
00363         }
00364         if ((dent->flagfield & MOREVARIANTS) == 0)
00365             break;
00366         dent = dent->next;
00367     }
00368 
00369     /* No matches found */
00370     return 0;
00371 }
00372 #endif
00373 
00374 #ifndef NO_CAPITALIZATION_SUPPORT
00375 
00384 int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int allhits, int pfxopts, int sfxopts)
00385 #else
00386 /* ARGSUSED */
00387 int ISpellChecker::good (ichar_t *w, int ignoreflagbits, int dummy, int pfxopts, int sfxopts)
00388 #endif
00389 {
00390     ichar_t     nword[INPUTWORDLEN + MAXAFFIXLEN];
00391     register ichar_t *  p;
00392     register ichar_t *  q;
00393     register int    n;
00394     register struct dent * dp;
00395 
00396     /*
00397     ** Make an uppercase copy of the word we are checking.
00398     */
00399     for (p = w, q = nword;  *p;  )
00400         *q++ = mytoupper (*p++);
00401     *q = 0;
00402     n = q - nword;
00403 
00404     m_numhits = 0;
00405 
00406     if ((dp = ispell_lookup (nword, 1)) != NULL)
00407     {
00408         m_hits[0].dictent = dp;
00409         m_hits[0].prefix = NULL;
00410         m_hits[0].suffix = NULL;
00411 #ifndef NO_CAPITALIZATION_SUPPORT
00412         if (allhits  ||  cap_ok (w, &m_hits[0], n))
00413             m_numhits = 1;
00414 #else
00415         m_numhits = 1;
00416 #endif
00417     }
00418 
00419     if (m_numhits  &&  !allhits)
00420         return 1;
00421 
00422     /* try stripping off affixes */
00423 
00424     chk_aff (w, nword, n, ignoreflagbits, allhits, pfxopts, sfxopts);
00425 
00426     return m_numhits;
00427 }
00428 
00429 
00430 
00431 
KDE Home | KDE Accessibility Home | Description of Access Keys