ispell.h
00001 /* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */ 00002 /* enchant 00003 * Copyright (C) 2003 Dom Lachowicz 00004 * 00005 * This library is free software; you can redistribute it and/or 00006 * modify it under the terms of the GNU Lesser General Public 00007 * License as published by the Free Software Foundation; either 00008 * version 2.1 of the License, or (at your option) any later version. 00009 * 00010 * This library is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00013 * Lesser General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU Lesser General Public 00016 * License along with this library; if not, write to the 00017 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00018 * Boston, MA 02110-1301, USA. 00019 * 00020 * In addition, as a special exception, Dom Lachowicz 00021 * gives permission to link the code of this program with 00022 * non-LGPL Spelling Provider libraries (eg: a MSFT Office 00023 * spell checker backend) and distribute linked combinations including 00024 * the two. You must obey the GNU Lesser General Public License in all 00025 * respects for all of the code used other than said providers. If you modify 00026 * this file, you may extend this exception to your version of the 00027 * file, but you are not obligated to do so. If you do not wish to 00028 * do so, delete this exception statement from your version. 00029 */ 00030 00031 #ifndef ISPELL_H 00032 #define ISPELL_H 00033 00034 #include <sys/types.h> 00035 00036 /* 00037 * $Id: ispell.h 465272 2005-09-29 09:47:40Z mueller $ 00038 */ 00039 00040 /* 00041 * Copyright 1992, 1993, Geoff Kuenning, Granada Hills, CA 00042 * All rights reserved. 00043 * 00044 * Redistribution and use in source and binary forms, with or without 00045 * modification, are permitted provided that the following conditions 00046 * are met: 00047 * 00048 * 1. Redistributions of source code must retain the above copyright 00049 * notice, this list of conditions and the following disclaimer. 00050 * 2. Redistributions in binary form must reproduce the above copyright 00051 * notice, this list of conditions and the following disclaimer in the 00052 * documentation and/or other materials provided with the distribution. 00053 * 3. All modifications to the source code must be clearly marked as 00054 * such. Binary redistributions based on modified source code 00055 * must be clearly marked as modified versions in the documentation 00056 * and/or other materials provided with the distribution. 00057 * 4. All advertising materials mentioning features or use of this software 00058 * must display the following acknowledgment: 00059 * This product includes software developed by Geoff Kuenning and 00060 * other unpaid contributors. 00061 * 5. The name of Geoff Kuenning may not be used to endorse or promote 00062 * products derived from this software without specific prior 00063 * written permission. 00064 * 00065 * THIS SOFTWARE IS PROVIDED BY GEOFF KUENNING AND CONTRIBUTORS ``AS IS'' AND 00066 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 00067 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 00068 * ARE DISCLAIMED. IN NO EVENT SHALL GEOFF KUENNING OR CONTRIBUTORS BE LIABLE 00069 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 00070 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 00071 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 00072 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 00073 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 00074 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 00075 * SUCH DAMAGE. 00076 */ 00077 00078 /* 00079 * $Log$ 00080 * Revision 1.1 2004/01/31 16:44:12 zrusin 00081 * ISpell plugin. 00082 * 00083 * Revision 1.4 2003/08/14 17:51:27 dom 00084 * update license - exception clause should be Lesser GPL 00085 * 00086 * Revision 1.3 2003/07/28 20:40:26 dom 00087 * fix up the license clause, further win32-registry proof some directory getting functions 00088 * 00089 * Revision 1.2 2003/07/16 22:52:40 dom 00090 * LGPL + exception license 00091 * 00092 * Revision 1.1 2003/07/15 01:15:06 dom 00093 * ispell enchant backend 00094 * 00095 * Revision 1.10 2003/01/24 05:52:33 hippietrail 00096 * 00097 * Refactored ispell code. Old ispell global variables had been put into 00098 * an allocated structure, a pointer to which was passed to many functions. 00099 * I have now made all such functions and variables private members of the 00100 * ISpellChecker class. It was C OO, now it's C++ OO. 00101 * 00102 * I've fixed the makefiles and tested compilation but am unable to test 00103 * operation. Please back out my changes if they cause problems which 00104 * are not obvious or easy to fix. 00105 * 00106 * Revision 1.9 2002/09/19 05:31:15 hippietrail 00107 * 00108 * More Ispell cleanup. Conditional globals and DEREF macros are removed. 00109 * K&R function declarations removed, converted to Doxygen style comments 00110 * where possible. No code has been changed (I hope). Compiles for me but 00111 * unable to test. 00112 * 00113 * Revision 1.8 2002/09/17 03:03:29 hippietrail 00114 * 00115 * After seeking permission on the developer list I've reformatted all the 00116 * spelling source which seemed to have parts which used 2, 3, 4, and 8 00117 * spaces for tabs. It should all look good with our standard 4-space 00118 * tabs now. 00119 * I've concentrated just on indentation in the actual code. More prettying 00120 * could be done. 00121 * * NO code changes were made * 00122 * 00123 * Revision 1.7 2002/03/22 14:31:57 dom 00124 * fix mg's compile problem 00125 * 00126 * Revision 1.6 2002/03/05 16:55:52 dom 00127 * compound word support, tested against swedish 00128 * 00129 * Revision 1.5 2001/08/10 18:32:40 dom 00130 * Spelling and iconv updates. god, i hate iconv 00131 * 00132 * Revision 1.4 2001/06/26 16:33:27 dom 00133 * 128 StringChars and some other stuff 00134 * 00135 * Revision 1.3 2001/05/12 16:05:42 thomasf 00136 * Big pseudo changes to ispell to make it pass around a structure rather 00137 * than rely on all sorts of gloabals willy nilly here and there. Also 00138 * fixed our spelling class to work with accepting suggestions once more. 00139 * This code is dirty, gross and ugly (not to mention still not supporting 00140 * multiple hash sized just yet) but it works on my machine and will no 00141 * doubt break other machines. 00142 * 00143 * Revision 1.2 2001/04/18 00:59:36 thomasf 00144 * Removed the duplicate declarations of variables that was causing build 00145 * to bail. This new ispell stuff is a total mess. 00146 * 00147 * Revision 1.1 2001/04/15 16:01:24 tomas_f 00148 * moving to spell/xp 00149 * 00150 * Revision 1.13 2001/04/13 12:33:12 tamlin 00151 * ispell can now be used from C++ 00152 * 00153 * Revision 1.12 2001/03/25 01:30:02 tomb 00154 * 1. Fixed ispell #define problems on Win32 00155 * 2. Changed the way that togglable toolbars are tracked so that Full 00156 * Screen mode works right on Windows 00157 * 3. Fixed SET_GATHER macro in ap_Win32Dialog_Options.h 00158 * 4. Fixed Toggle Case dialog to default to Sentence Case when loaded 00159 * 5. Added #define for Auto Save checkbox (though I haven't updated the 00160 * Prefs dialog yet) 00161 * 00162 * Revision 1.11 2001/03/24 23:28:41 dom 00163 * Make C++ aware and watch out for VOID on Win32 00164 * 00165 * Revision 1.10 1999/12/21 18:46:29 sterwill 00166 * ispell patch for non-English dictionaries by Henrik Berg <henrik@lansen.se> 00167 * 00168 * Revision 1.9 1999/10/20 03:19:35 paul 00169 * Hacked ispell code to ignore any characters that don't fit in the lookup tables loaded from the dictionary. It ain't pretty, but at least we don't crash there any more. 00170 * 00171 * Revision 1.8 1999/09/29 23:33:32 justin 00172 * Updates to the underlying ispell-based code to support suggested corrections. 00173 * 00174 * Revision 1.7 1999/04/13 17:12:51 jeff 00175 * Applied "Darren O. Benham" <gecko@benham.net> spell check changes. 00176 * Fixed crash on Win32 with the new code. 00177 * 00178 * Revision 1.6 1999/01/07 05:14:22 sterwill 00179 * So it builds on Unix... it might break win32 in ispell, since ut_types 00180 * is no longer included. This is a temporary solution to a larger problem 00181 * of including C++ headers in C source files. 00182 * 00183 * Revision 1.6 1999/01/07 05:14:22 sterwill 00184 * So it builds on Unix... it might break win32 in ispell, since ut_types 00185 * is no longer included. This is a temporary solution to a larger problem 00186 * of including C++ headers in C source files. 00187 * 00188 * Revision 1.5 1999/01/07 05:02:25 sterwill 00189 * Checking in half-broken to avoid tree lossage 00190 * 00191 * Revision 1.4 1999/01/07 01:07:48 paul 00192 * Fixed spell leaks. 00193 * 00194 * Revision 1.3 1998/12/29 15:03:54 eric 00195 * 00196 * minor fix to ispell.h to get things to compile on Linux again. 00197 * 00198 * Revision 1.2 1998/12/29 14:55:33 eric 00199 * 00200 * I've doctored the ispell code pretty extensively here. It is now 00201 * warning-free on Win32. It also *works* on Win32 now, since I 00202 * replaced all the I/O calls with ANSI standard ones. 00203 * 00204 * Revision 1.1 1998/12/28 18:04:43 davet 00205 * Spell checker code stripped from ispell. At this point, there are 00206 * two external routines... the Init routine, and a check-a-word routine 00207 * which returns a boolean value, and takes a 16 bit char string. 00208 * The code resembles the ispell code as much as possible still. 00209 * 00210 * Revision 1.68 1995/03/06 02:42:41 geoff 00211 * Be vastly more paranoid about parenthesizing macro arguments. This 00212 * fixes a bug in defmt.c where a complex argument was passed to 00213 * isstringch. 00214 * 00215 * Revision 1.67 1995/01/03 19:24:12 geoff 00216 * Get rid of a non-global declaration. 00217 * 00218 * Revision 1.66 1994/12/27 23:08:49 geoff 00219 * Fix a lot of subtly bad assumptions about the widths of ints and longs 00220 * which only show up on 64-bit machines like the Cray and the DEC Alpha. 00221 * 00222 * Revision 1.65 1994/11/02 06:56:10 geoff 00223 * Remove the anyword feature, which I've decided is a bad idea. 00224 * 00225 * Revision 1.64 1994/10/25 05:46:18 geoff 00226 * Add the FF_ANYWORD flag for defining an affix that will apply to any 00227 * word, even if not explicitly specified. (Good for French.) 00228 * 00229 * Revision 1.63 1994/09/16 04:48:28 geoff 00230 * Make stringdups and laststringch unsigned ints, and dupnos a plain 00231 * int, so that we can handle more than 128 stringchars and stringchar 00232 * types. 00233 * 00234 * Revision 1.62 1994/09/01 06:06:39 geoff 00235 * Change erasechar/killchar to uerasechar/ukillchar to avoid 00236 * shared-library problems on HP systems. 00237 * 00238 * Revision 1.61 1994/08/31 05:58:35 geoff 00239 * Add contextoffset, used in -a mode to handle extremely long lines. 00240 * 00241 * Revision 1.60 1994/05/17 06:44:15 geoff 00242 * Add support for controlled compound formation and the COMPOUNDONLY 00243 * option to affix flags. 00244 * 00245 * Revision 1.59 1994/03/15 06:25:16 geoff 00246 * Change deftflag's initialization so we can tell if -t/-n appeared. 00247 * 00248 * Revision 1.58 1994/02/07 05:53:28 geoff 00249 * Add typecasts to the the 7-bit versions of ichar* routines 00250 * 00251 * Revision 1.57 1994/01/25 07:11:48 geoff 00252 * Get rid of all old RCS log lines in preparation for the 3.1 release. 00253 * 00254 */ 00255 00256 #include <stdio.h> 00257 /* #include "ut_types.h" */ 00258 00259 #include "ispell_def.h" 00260 00261 #ifdef __cplusplus 00262 extern "C" { 00263 #endif /* c++ */ 00264 00265 /* largest amount that a word might be extended by adding affixes */ 00266 #ifndef MAXAFFIXLEN 00267 #define MAXAFFIXLEN 20 00268 #endif 00269 00270 /* 00271 ** Number of mask bits (affix flags) supported. Must be 32, 64, 128, or 00272 ** 256. If MASKBITS is 32 or 64, there are really only 26 or 58 flags 00273 ** available, respectively. If it is 32, the flags are named with the 00274 ** 26 English uppercase letters; lowercase will be converted to uppercase. 00275 ** If MASKBITS is 64, the 58 flags are named 'A' through 'z' in ASCII 00276 ** order, including the 6 special characters from 'Z' to 'a': "[\]^_`". 00277 ** If MASKBITS is 128 or 256, all the 7-bit or 8-bit characters, 00278 ** respectively, are theoretically available, though a few (newline, slash, 00279 ** null byte) are pretty hard to actually use successfully. 00280 ** 00281 ** Note that a number of non-English affix files depend on having a 00282 ** larger value for MASKBITS. See the affix files for more 00283 ** information. 00284 */ 00285 00286 #ifndef MASKBITS 00287 #define MASKBITS 64 00288 #endif 00289 00290 extern int gnMaskBits; 00291 00292 /* 00293 ** C type to use for masks. This should be a type that the processor 00294 ** accesses efficiently. 00295 ** 00296 ** MASKTYPE_WIDTH must correctly reflect the number of bits in a 00297 ** MASKTYPE. Unfortunately, it is also required to be a constant at 00298 ** preprocessor time, which means you can't use the sizeof operator to 00299 ** define it. 00300 ** 00301 ** Note that MASKTYPE *must* match MASKTYPE_WIDTH or you may get 00302 ** division-by-zero errors! 00303 */ 00304 #ifndef MASKTYPE 00305 #define MASKTYPE long 00306 #endif 00307 #ifndef MASKTYPE_WIDTH 00308 #define MASKTYPE_WIDTH 32 00309 #endif 00310 00311 /* program: this should be coded now in init */ 00312 00313 #if MASKBITS < MASKTYPE_WIDTH 00314 #undef MASKBITS 00315 #define MASKBITS MASKTYPE_WIDTH 00316 #endif /* MASKBITS < MASKTYPE_WIDTH */ 00317 00318 /* 00319 ** Maximum hash table fullness percentage. Larger numbers trade space 00320 ** for time. 00321 **/ 00322 #ifndef MAXPCT 00323 #define MAXPCT 70 /* Expand table when 70% full */ 00324 #endif 00325 00326 /* 00327 ** Maximum number of "string" characters that can be defined in a 00328 ** language (affix) file. Don't forget that an upper/lower string 00329 ** character counts as two! 00330 */ 00331 #ifndef MAXSTRINGCHARS 00332 #define MAXSTRINGCHARS 128 00333 #endif /* MAXSTRINGCHARS */ 00334 00335 /* 00336 ** Maximum length of a "string" character. The default is appropriate for 00337 ** nroff-style characters starting with a backslash. 00338 */ 00339 #ifndef MAXSTRINGCHARLEN 00340 #define MAXSTRINGCHARLEN 10 00341 #endif /* MAXSTRINGCHARLEN */ 00342 00343 /* 00344 ** Maximum number of "hits" expected on a word. This is basically the 00345 ** number of different ways different affixes can produce the same word. 00346 ** For example, with "english.aff", "brothers" can be produced 3 ways: 00347 ** "brothers," "brother+s", or "broth+ers". If this is too low, no major 00348 ** harm will be done, but ispell may occasionally forget a capitalization. 00349 */ 00350 #ifndef MAX_HITS 00351 #define MAX_HITS 10 00352 #endif 00353 00354 /* 00355 ** Maximum number of capitalization variations expected in any word. 00356 ** Besides the obvious all-lower, all-upper, and capitalized versions, 00357 ** this includes followcase variants. If this is too low, no real 00358 ** harm will be done, but ispell may occasionally fail to suggest a 00359 ** correct capitalization. 00360 */ 00361 #ifndef MAX_CAPS 00362 #define MAX_CAPS 10 00363 #endif /* MAX_CAPS */ 00364 00365 /* buffer size to use for file names if not in sys/param.h */ 00366 #ifndef MAXPATHLEN 00367 #define MAXPATHLEN 512 00368 #endif 00369 00370 /* 00371 ** Maximum language-table search size. Smaller numbers make ispell 00372 ** run faster, at the expense of more memory (the lowest reasonable value 00373 ** is 2). If a given character appears in a significant position in 00374 ** more than MAXSEARCH suffixes, it will be given its own index table. 00375 ** If you change this, define INDEXDUMP in lookup.c to be sure your 00376 ** index table looks reasonable. 00377 */ 00378 #ifndef MAXSEARCH 00379 #define MAXSEARCH 4 00380 #endif 00381 00382 #if defined(__STDC__) || defined(__cplusplus) 00383 #define P(x) x 00384 #ifndef VOID 00385 #define VOID void 00386 #endif 00387 #else /* __STDC__ */ 00388 #define P(x) () 00389 #ifndef VOID 00390 #define VOID char 00391 #endif 00392 #define const 00393 #endif /* __STDC__ */ 00394 00395 #ifdef NO8BIT 00396 #define SET_SIZE 128 00397 #else 00398 #define SET_SIZE 256 00399 #endif 00400 00401 #define MASKSIZE (gnMaskBits / MASKTYPE_WIDTH) 00402 00403 #ifdef lint 00404 extern int TSTMASKBIT P ((MASKTYPE * mask, int bit)); 00405 #else /* lint */ 00406 /* The following is really testing for MASKSIZE <= 1, but cpp can't do that */ 00407 #define TSTMASKBIT(mask, bit) \ 00408 ((mask)[(bit) / MASKTYPE_WIDTH] & \ 00409 ((MASKTYPE) 1 << ((bit) & (MASKTYPE_WIDTH - 1)))) 00410 #endif /* lint */ 00411 00412 #if MASKBITS > 64 00413 #define FULLMASKSET 00414 #endif 00415 00416 #if MASKBITS <= 32 00417 #define FLAGBASE ((MASKTYPE_WIDTH) - 6) 00418 #else 00419 # if MASKBITS <= 64 00420 #define FLAGBASE ((MASKTYPE_WIDTH) - 6) 00421 # else 00422 #define FLAGBASE 0 00423 # endif 00424 #endif 00425 00426 /* 00427 ** Data type for internal word storage. If necessary, we use shorts rather 00428 ** than chars so that string characters can be encoded as a single unit. 00429 */ 00430 #if (SET_SIZE + MAXSTRINGCHARS) <= 256 00431 #ifndef lint 00432 #define ICHAR_IS_CHAR 00433 #endif /* lint */ 00434 #endif 00435 00436 #ifdef ICHAR_IS_CHAR 00437 typedef unsigned char ichar_t; /* Internal character */ 00438 #define icharlen(s) strlen ((char *) (s)) 00439 #define icharcpy(a, b) strcpy ((char *) (a), (char *) (b)) 00440 #define icharcmp(a, b) strcmp ((char *) (a), (char *) (b)) 00441 #define icharncmp(a, b, n) strncmp ((char *) (a), (char *) (b), (n)) 00442 #define chartoichar(x) ((ichar_t) (x)) 00443 #else 00444 typedef unsigned short ichar_t; /* Internal character */ 00445 #define chartoichar(x) ((ichar_t) (unsigned char) (x)) 00446 00447 /* 00448 * Structure used to record data about successful lookups; these values 00449 * are used in the ins_root_cap routine to produce correct capitalizations. 00450 */ 00451 struct success 00452 { 00453 struct dent * dictent; /* Header of dict entry chain for wd */ 00454 struct flagent * prefix; /* Prefix flag used, or NULL */ 00455 struct flagent * suffix; /* Suffix flag used, or NULL */ 00456 }; 00457 00458 ichar_t* icharcpy (ichar_t* out, ichar_t* in); 00459 int icharlen (ichar_t* in); 00460 int icharcmp (ichar_t* s1, ichar_t* s2); 00461 int icharncmp (ichar_t* s1, ichar_t* s2, int n); 00462 00463 #endif 00464 00465 struct dent 00466 { 00467 struct dent * next; 00468 char * word; 00469 MASKTYPE mask[2]; 00470 #ifdef FULLMASKSET 00471 char flags; 00472 #endif 00473 }; 00474 00475 /* 00476 ** Flags in the directory entry. If FULLMASKSET is undefined, these are 00477 ** stored in the highest bits of the last longword of the mask field. If 00478 ** FULLMASKSET is defined, they are stored in the extra "flags" field. 00479 #ifndef NO_CAPITALIZATION_SUPPORT 00480 ** 00481 ** If a word has only one capitalization form, and that form is not 00482 ** FOLLOWCASE, it will have exactly one entry in the dictionary. The 00483 ** legal capitalizations will be indicated by the 2-bit capitalization 00484 ** field, as follows: 00485 ** 00486 ** ALLCAPS The word must appear in all capitals. 00487 ** CAPITALIZED The word must be capitalized (e.g., London). 00488 ** It will also be accepted in all capitals. 00489 ** ANYCASE The word may appear in lowercase, capitalized, 00490 ** or all-capitals. 00491 ** 00492 ** Regardless of the capitalization flags, the "word" field of the entry 00493 ** will point to an all-uppercase copy of the word. This is to simplify 00494 ** the large portion of the code that doesn't care about capitalization. 00495 ** Ispell will generate the correct version when needed. 00496 ** 00497 ** If a word has more than one capitalization, there will be multiple 00498 ** entries for it, linked together by the "next" field. The initial 00499 ** entry for such words will be a dummy entry, primarily for use by code 00500 ** that ignores capitalization. The "word" field of this entry will 00501 ** again point to an all-uppercase copy of the word. The "mask" field 00502 ** will contain the logical OR of the mask fields of all variants. 00503 ** A header entry is indicated by a capitalization type of ALLCAPS, 00504 ** with the MOREVARIANTS bit set. 00505 ** 00506 ** The following entries will define the individual variants. Each 00507 ** entry except the last has the MOREVARIANTS flag set, and each 00508 ** contains one of the following capitalization options: 00509 ** 00510 ** ALLCAPS The word must appear in all capitals. 00511 ** CAPITALIZED The word must be capitalized (e.g., London). 00512 ** It will also be accepted in all capitals. 00513 ** FOLLOWCASE The word must be capitalized exactly like the 00514 ** sample in the entry. Prefix (suffix) characters 00515 ** must be rendered in the case of the first (last) 00516 ** "alphabetic" character. It will also be accepted 00517 ** in all capitals. ("Alphabetic" means "mentioned 00518 ** in a 'casechars' statement".) 00519 ** ANYCASE The word may appear in lowercase, capitalized, 00520 ** or all-capitals. 00521 ** 00522 ** The "mask" field for the entry contains only the affix flag bits that 00523 ** are legal for that capitalization. The "word" field will be null 00524 ** except for FOLLOWCASE entries, where it will point to the 00525 ** correctly-capitalized spelling of the root word. 00526 ** 00527 ** It is worth discussing why the ALLCAPS option is used in 00528 ** the header entry. The header entry accepts an all-capitals 00529 ** version of the root plus every affix (this is always legal, since 00530 ** words get capitalized in headers and so forth). Further, all of 00531 ** the following variant entries will reject any all-capitals form 00532 ** that is illegal due to an affix. 00533 ** 00534 ** Finally, note that variations in the KEEP flag can cause a multiple-variant 00535 ** entry as well. For example, if the personal dictionary contains "ALPHA", 00536 ** (KEEP flag set) and the user adds "alpha" with the KEEP flag clear, a 00537 ** multiple-variant entry will be created so that "alpha" will be accepted 00538 ** but only "ALPHA" will actually be kept. 00539 #endif 00540 */ 00541 #ifdef FULLMASKSET 00542 #define flagfield flags 00543 #else 00544 #define flagfield mask[1] 00545 #endif 00546 #define USED ((MASKTYPE) 1 << (FLAGBASE + 0)) 00547 #define KEEP ((MASKTYPE) 1 << (FLAGBASE + 1)) 00548 #ifdef NO_CAPITALIZATION_SUPPORT 00549 #define ALLFLAGS (USED | KEEP) 00550 #else /* NO_CAPITALIZATION_SUPPORT */ 00551 #define ANYCASE ((MASKTYPE) 0 << (FLAGBASE + 2)) 00552 #define ALLCAPS ((MASKTYPE) 1 << (FLAGBASE + 2)) 00553 #define CAPITALIZED ((MASKTYPE) 2 << (FLAGBASE + 2)) 00554 #define FOLLOWCASE ((MASKTYPE) 3 << (FLAGBASE + 2)) 00555 #define CAPTYPEMASK ((MASKTYPE) 3 << (FLAGBASE + 2)) 00556 #define MOREVARIANTS ((MASKTYPE) 1 << (FLAGBASE + 4)) 00557 #define ALLFLAGS (USED | KEEP | CAPTYPEMASK | MOREVARIANTS) 00558 #define captype(x) ((x) & CAPTYPEMASK) 00559 #endif /* NO_CAPITALIZATION_SUPPORT */ 00560 00561 /* 00562 * Language tables used to encode prefix and suffix information. 00563 */ 00564 struct flagent 00565 { 00566 ichar_t * strip; /* String to strip off */ 00567 ichar_t * affix; /* Affix to append */ 00568 short flagbit; /* Flag bit this ent matches */ 00569 short stripl; /* Length of strip */ 00570 short affl; /* Length of affix */ 00571 short numconds; /* Number of char conditions */ 00572 short flagflags; /* Modifiers on this flag */ 00573 char conds[SET_SIZE + MAXSTRINGCHARS]; /* Adj. char conds */ 00574 }; 00575 00576 /* 00577 * Bits in flagflags 00578 */ 00579 #define FF_CROSSPRODUCT (1 << 0) /* Affix does cross-products */ 00580 #define FF_COMPOUNDONLY (1 << 1) /* Afx works in compounds */ 00581 00582 union ptr_union /* Aid for building flg ptrs */ 00583 { 00584 struct flagptr * fp; /* Pointer to more indexing */ 00585 struct flagent * ent; /* First of a list of ents */ 00586 }; 00587 00588 struct flagptr 00589 { 00590 union ptr_union pu; /* Ent list or more indexes */ 00591 int numents; /* If zero, pu.fp is valid */ 00592 }; 00593 00594 /* 00595 * Description of a single string character type. 00596 */ 00597 struct strchartype 00598 { 00599 char * name; /* Name of the type */ 00600 char * deformatter; /* Deformatter to use */ 00601 char * suffixes; /* File suffixes, null seps */ 00602 }; 00603 00604 /* 00605 * Header placed at the beginning of the hash file. 00606 */ 00607 struct hashheader 00608 { 00609 unsigned short magic; /* Magic number for ID */ 00610 unsigned short compileoptions; /* How we were compiled */ 00611 short maxstringchars; /* Max # strchrs we support */ 00612 short maxstringcharlen; /* Max strchr len supported */ 00613 short compoundmin; /* Min lth of compound parts */ 00614 short compoundbit; /* Flag 4 compounding roots */ 00615 int stringsize; /* Size of string table */ 00616 int lstringsize; /* Size of lang. str tbl */ 00617 int tblsize; /* No. entries in hash tbl */ 00618 int stblsize; /* No. entries in sfx tbl */ 00619 int ptblsize; /* No. entries in pfx tbl */ 00620 int sortval; /* Largest sort ID assigned */ 00621 int nstrchars; /* No. strchars defined */ 00622 int nstrchartype; /* No. strchar types */ 00623 int strtypestart; /* Start of strtype table */ 00624 char nrchars[5]; /* Nroff special characters */ 00625 char texchars[13]; /* TeX special characters */ 00626 char compoundflag; /* Compund-word handling */ 00627 char defhardflag; /* Default tryveryhard flag */ 00628 char flagmarker; /* "Start-of-flags" char */ 00629 unsigned short sortorder[SET_SIZE + MAXSTRINGCHARS]; /* Sort ordering */ 00630 ichar_t lowerconv[SET_SIZE + MAXSTRINGCHARS]; /* Lower-conversion table */ 00631 ichar_t upperconv[SET_SIZE + MAXSTRINGCHARS]; /* Upper-conversion table */ 00632 char wordchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for chars found in wrds */ 00633 char upperchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for uppercase chars */ 00634 char lowerchars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for lowercase chars */ 00635 char boundarychars[SET_SIZE + MAXSTRINGCHARS]; /* NZ for boundary chars */ 00636 char stringstarts[SET_SIZE]; /* NZ if char can start str */ 00637 char stringchars[MAXSTRINGCHARS][MAXSTRINGCHARLEN + 1]; /* String chars */ 00638 unsigned int stringdups[MAXSTRINGCHARS]; /* No. of "base" char */ 00639 int dupnos[MAXSTRINGCHARS]; /* Dup char ID # */ 00640 unsigned short magic2; /* Second magic for dbl chk */ 00641 }; 00642 00643 /* hash table magic number */ 00644 #define MAGIC 0x9602 00645 00646 /* compile options, put in the hash header for consistency checking */ 00647 #ifdef NO8BIT 00648 # define MAGIC8BIT 0x01 00649 #else 00650 # define MAGIC8BIT 0x00 00651 #endif 00652 #ifdef NO_CAPITALIZATION_SUPPORT 00653 # define MAGICCAPITALIZATION 0x00 00654 #else 00655 # define MAGICCAPITALIZATION 0x02 00656 #endif 00657 # define MAGICMASKSET 0x04 00658 00659 #if MASKBITS <= 32 00660 # define MAGICMASKSET 0x00 00661 #else 00662 # if MASKBITS <= 64 00663 # else 00664 # if MASKBITS <= 128 00665 # define MAGICMASKSET 0x08 00666 # else 00667 # define MAGICMASKSET 0x0C 00668 # endif 00669 # endif 00670 #endif 00671 00672 #define COMPILEOPTIONS (MAGIC8BIT | MAGICCAPITALIZATION | MAGICMASKSET) 00673 00674 /* 00675 ** Offsets into the nroff special-character array 00676 */ 00677 #define NRLEFTPAREN hashheader.nrchars[0] 00678 #define NRRIGHTPAREN hashheader.nrchars[1] 00679 #define NRDOT hashheader.nrchars[2] 00680 #define NRBACKSLASH hashheader.nrchars[3] 00681 #define NRSTAR hashheader.nrchars[4] 00682 00683 /* 00684 ** Offsets into the TeX special-character array 00685 */ 00686 #define TEXLEFTPAREN hashheader.texchars[0] 00687 #define TEXRIGHTPAREN hashheader.texchars[1] 00688 #define TEXLEFTSQUARE hashheader.texchars[2] 00689 #define TEXRIGHTSQUARE hashheader.texchars[3] 00690 #define TEXLEFTCURLY hashheader.texchars[4] 00691 #define TEXRIGHTCURLY hashheader.texchars[5] 00692 #define TEXLEFTANGLE hashheader.texchars[6] 00693 #define TEXRIGHTANGLE hashheader.texchars[7] 00694 #define TEXBACKSLASH hashheader.texchars[8] 00695 #define TEXDOLLAR hashheader.texchars[9] 00696 #define TEXSTAR hashheader.texchars[10] 00697 #define TEXDOT hashheader.texchars[11] 00698 #define TEXPERCENT hashheader.texchars[12] 00699 00700 /* 00701 ** Values for compoundflag 00702 */ 00703 #define COMPOUND_NEVER 0 /* Compound words are never good */ 00704 #define COMPOUND_ANYTIME 1 /* Accept run-together words */ 00705 #define COMPOUND_CONTROLLED 2 /* Compounds controlled by afx flags */ 00706 /* 00707 ** These macros are similar to the ones above, but they take into account 00708 ** the possibility of string characters. Note well that they take a POINTER, 00709 ** not a character. 00710 ** 00711 ** The "l_" versions set "len" to the length of the string character as a 00712 ** handy side effect. (Note that the global "laststringch" is also set, 00713 ** and sometimes used, by these macros.) 00714 ** 00715 ** The "l1_" versions go one step further and guarantee that the "len" 00716 ** field is valid for *all* characters, being set to 1 even if the macro 00717 ** returns false. This macro is a great example of how NOT to write 00718 ** readable C. 00719 */ 00720 /*TF NOTE: This is actually defined in code (makedent) now */ 00721 #if 0 00722 #define isstringch(ptr, canon) (isstringstart (*(ptr)) \ 00723 && stringcharlen ((ptr), (canon)) > 0) 00724 #define l_isstringch(ptr, len, canon) \ 00725 (isstringstart (*(ptr)) \ 00726 && (len = stringcharlen ((ptr), (canon))) \ 00727 > 0) 00728 #define l1_isstringch(ptr, len, canon) \ 00729 (len = 1, \ 00730 isstringstart ((unsigned char)(*(ptr))) \ 00731 && ((len = \ 00732 stringcharlen ((ptr), (canon))) \ 00733 > 0 \ 00734 ? 1 : (len = 1, 0))) 00735 #endif 00736 00737 /* 00738 * Sizes of buffers returned by ichartosstr/strtosichar. 00739 */ 00740 #define ICHARTOSSTR_SIZE (INPUTWORDLEN + 4 * MAXAFFIXLEN + 4) 00741 #define STRTOSICHAR_SIZE ((INPUTWORDLEN + 4 * MAXAFFIXLEN + 4) \ 00742 * sizeof (ichar_t)) 00743 /* TF CHANGE: We should fill this as a structure 00744 and then use it throughout. 00745 */ 00746 00747 /* 00748 * Initialized variables. These are generated using macros so that they 00749 * may be consistently declared in all programs. Numerous examples of 00750 * usage are given below. 00751 */ 00752 #ifdef MAIN 00753 #define INIT(decl, init) decl = init 00754 #else 00755 #define INIT(decl, init) extern decl 00756 #endif 00757 00758 #ifdef MINIMENU 00759 INIT (int minimenusize, 2); /* MUST be either 2 or zero */ 00760 #else /* MINIMENU */ 00761 INIT (int minimenusize, 0); /* MUST be either 2 or zero */ 00762 #endif /* MINIMENU */ 00763 00764 INIT (int eflag, 0); /* NZ for expand mode */ 00765 INIT (int dumpflag, 0); /* NZ to do dump mode */ 00766 INIT (int fflag, 0); /* NZ if -f specified */ 00767 #ifndef USG 00768 INIT (int sflag, 0); /* NZ to stop self after EOF */ 00769 #endif 00770 INIT (int vflag, 0); /* NZ to display characters as M-xxx */ 00771 INIT (int xflag, DEFNOBACKUPFLAG); /* NZ to suppress backups */ 00772 INIT (int deftflag, -1); /* NZ for TeX mode by default */ 00773 INIT (int tflag, DEFTEXFLAG); /* NZ for TeX mode in current file */ 00774 INIT (int prefstringchar, -1); /* Preferred string character type */ 00775 00776 INIT (int terse, 0); /* NZ for "terse" mode */ 00777 00778 INIT (char tempfile[MAXPATHLEN], ""); /* Name of file we're spelling into */ 00779 00780 INIT (int minword, MINWORD); /* Longest always-legal word */ 00781 INIT (int sortit, 1); /* Sort suggestions alphabetically */ 00782 INIT (int compoundflag, -1); /* How to treat compounds: see above */ 00783 INIT (int tryhardflag, -1); /* Always call tryveryhard */ 00784 00785 INIT (char * currentfile, NULL); /* Name of current input file */ 00786 00787 /* Odd numbers for math mode in LaTeX; even for LR or paragraph mode */ 00788 INIT (int math_mode, 0); 00789 /* P -- paragraph or LR mode 00790 * b -- parsing a \begin statement 00791 * e -- parsing an \end statement 00792 * r -- parsing a \ref type of argument. 00793 * m -- looking for a \begin{minipage} argument. 00794 */ 00795 INIT (char LaTeX_Mode, 'P'); 00796 00797 #ifdef __cplusplus 00798 } 00799 #endif /* c++ */ 00800 00801 #endif /* ISPELL_H */