00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "kmimemagic.h"
00020 #include <kdebug.h>
00021 #include <kapplication.h>
00022 #include <qfile.h>
00023 #include <ksimpleconfig.h>
00024 #include <kstandarddirs.h>
00025 #include <kstaticdeleter.h>
00026 #include <klargefile.h>
00027 #include <assert.h>
00028
00029 static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb);
00030 static void process(struct config_rec* conf, const QString &);
00031 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes);
00032 static int tagmagic(unsigned char *buf, int nbytes);
00033 static int textmagic(struct config_rec* conf, unsigned char *, int);
00034
00035 static void tryit(struct config_rec* conf, unsigned char *buf, int nb);
00036 static int match(struct config_rec* conf, unsigned char *, int);
00037
00038 KMimeMagic* KMimeMagic::s_pSelf;
00039 static KStaticDeleter<KMimeMagic> kmimemagicsd;
00040
00041 KMimeMagic* KMimeMagic::self()
00042 {
00043 if( !s_pSelf )
00044 initStatic();
00045 return s_pSelf;
00046 }
00047
00048 void KMimeMagic::initStatic()
00049 {
00050 s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() );
00051 s_pSelf->setFollowLinks( true );
00052 }
00053
00054 #include <stdio.h>
00055 #include <unistd.h>
00056 #include <stdlib.h>
00057 #include <sys/wait.h>
00058 #include <sys/types.h>
00059 #include <sys/stat.h>
00060 #include <fcntl.h>
00061 #include <errno.h>
00062 #include <ctype.h>
00063 #include <time.h>
00064 #include <utime.h>
00065 #include <stdarg.h>
00066 #include <qregexp.h>
00067 #include <qstring.h>
00068
00069
00070
00071
00072
00073
00074
00075
00076 #if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00077 #define DEBUG_LINENUMBERS
00078 #endif
00079
00080
00081
00082
00083 #define MIME_BINARY_UNKNOWN "application/octet-stream"
00084 #define MIME_BINARY_UNREADABLE "application/x-unreadable"
00085 #define MIME_BINARY_ZEROSIZE "application/x-zerosize"
00086 #define MIME_TEXT_UNKNOWN "text/plain"
00087 #define MIME_TEXT_PLAIN "text/plain"
00088 #define MIME_INODE_DIR "inode/directory"
00089 #define MIME_INODE_CDEV "inode/chardevice"
00090 #define MIME_INODE_BDEV "inode/blockdevice"
00091 #define MIME_INODE_FIFO "inode/fifo"
00092 #define MIME_INODE_LINK "inode/link"
00093 #define MIME_INODE_SOCK "inode/socket"
00094
00095 #define MIME_APPL_TROFF "application/x-troff"
00096 #define MIME_APPL_TAR "application/x-tar"
00097 #define MIME_TEXT_FORTRAN "text/x-fortran"
00098
00099 #define MAXMIMESTRING 256
00100
00101 #define HOWMANY 4000
00102 #define MAXDESC 50
00103 #define MAXstring 64
00104
00105 typedef union VALUETYPE {
00106 unsigned char b;
00107 unsigned short h;
00108 unsigned long l;
00109 char s[MAXstring];
00110 unsigned char hs[2];
00111 unsigned char hl[4];
00112 } VALUETYPE;
00113
00114 struct magic {
00115 struct magic *next;
00116 #ifdef DEBUG_LINENUMBERS
00117 int lineno;
00118 #endif
00119
00120 short flag;
00121 #define INDIR 1
00122 #define UNSIGNED 2
00123 short cont_level;
00124 struct {
00125 char type;
00126 long offset;
00127 } in;
00128 long offset;
00129 unsigned char reln;
00130 char type;
00131 char vallen;
00132 #define BYTE 1
00133 #define SHORT 2
00134 #define LONG 4
00135 #define STRING 5
00136 #define DATE 6
00137 #define BESHORT 7
00138 #define BELONG 8
00139 #define BEDATE 9
00140 #define LESHORT 10
00141 #define LELONG 11
00142 #define LEDATE 12
00143 VALUETYPE value;
00144 unsigned long mask;
00145 char nospflag;
00146
00147
00148 char desc[MAXDESC];
00149 };
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165 #define RECORDSIZE 512
00166 #define NAMSIZ 100
00167 #define TUNMLEN 32
00168 #define TGNMLEN 32
00169
00170 union record {
00171 char charptr[RECORDSIZE];
00172 struct header {
00173 char name[NAMSIZ];
00174 char mode[8];
00175 char uid[8];
00176 char gid[8];
00177 char size[12];
00178 char mtime[12];
00179 char chksum[8];
00180 char linkflag;
00181 char linkname[NAMSIZ];
00182 char magic[8];
00183 char uname[TUNMLEN];
00184 char gname[TGNMLEN];
00185 char devmajor[8];
00186 char devminor[8];
00187 } header;
00188 };
00189
00190
00191 #define TMAGIC "ustar "
00192
00193
00194
00195
00196 static int is_tar(unsigned char *, int);
00197 static unsigned long signextend(struct magic *, unsigned long);
00198 static int getvalue(struct magic *, char **);
00199 static int hextoint(int);
00200 static char *getstr(char *, char *, int, int *);
00201 static int mget(union VALUETYPE *, unsigned char *, struct magic *, int);
00202 static int mcheck(union VALUETYPE *, struct magic *);
00203 static int mconvert(union VALUETYPE *, struct magic *);
00204 static long from_oct(int, char *);
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220 #define L_HTML 0x001
00221 #define L_C 0x002
00222 #define L_MAKE 0x004
00223 #define L_PLI 0x008
00224 #define L_MACH 0x010
00225 #define L_PAS 0x020
00226 #define L_JAVA 0x040
00227 #define L_CPP 0x080
00228 #define L_MAIL 0x100
00229 #define L_NEWS 0x200
00230 #define L_DIFF 0x400
00231 #define L_OBJC 0x800
00232
00233 #define P_HTML 0
00234 #define P_C 1
00235 #define P_MAKE 2
00236 #define P_PLI 3
00237 #define P_MACH 4
00238 #define P_PAS 5
00239 #define P_JAVA 6
00240 #define P_CPP 7
00241 #define P_MAIL 8
00242 #define P_NEWS 9
00243 #define P_DIFF 10
00244 #define P_OBJC 11
00245
00246 typedef struct asc_type {
00247 const char *type;
00248 int kwords;
00249 double weight;
00250 } asc_type;
00251
00252 static const asc_type types[] = {
00253 { "text/html", 19, 2 },
00254 { "text/x-c", 13, 1 },
00255 { "text/x-makefile", 4, 1.9 },
00256 { "text/x-pli", 1, 3 },
00257 { "text/x-assembler", 6, 2.1 },
00258 { "text/x-pascal", 1, 1 },
00259 { "text/x-java", 12, 1 },
00260 { "text/x-c++", 19, 1 },
00261 { "message/rfc822", 4, 1.9 },
00262 { "message/news", 3, 2 },
00263 { "text/x-diff", 4, 2 },
00264 { "text/x-objc", 10, 1 }
00265 };
00266
00267 #define NTYPES (sizeof(types)/sizeof(asc_type))
00268
00269 static struct names {
00270 const char *name;
00271 short type;
00272 } const names[] = {
00273 {
00274 "<html", L_HTML
00275 },
00276 {
00277 "<HTML", L_HTML
00278 },
00279 {
00280 "<head", L_HTML
00281 },
00282 {
00283 "<HEAD", L_HTML
00284 },
00285 {
00286 "<body", L_HTML
00287 },
00288 {
00289 "<BODY", L_HTML
00290 },
00291 {
00292 "<title", L_HTML
00293 },
00294 {
00295 "<TITLE", L_HTML
00296 },
00297 {
00298 "<h1", L_HTML
00299 },
00300 {
00301 "<H1", L_HTML
00302 },
00303 {
00304 "<a", L_HTML
00305 },
00306 {
00307 "<A", L_HTML
00308 },
00309 {
00310 "<img", L_HTML
00311 },
00312 {
00313 "<IMG", L_HTML
00314 },
00315 {
00316 "<!--", L_HTML
00317 },
00318 {
00319 "<!doctype", L_HTML
00320 },
00321 {
00322 "<!DOCTYPE", L_HTML
00323 },
00324 {
00325 "<div", L_HTML
00326 },
00327 {
00328 "<DIV", L_HTML
00329 },
00330 {
00331 "<frame", L_HTML
00332 },
00333 {
00334 "<FRAME", L_HTML
00335 },
00336 {
00337 "<frameset", L_HTML
00338 },
00339 {
00340 "<FRAMESET", L_HTML
00341 },
00342 {
00343 "<script", L_HTML
00344 },
00345 {
00346 "<SCRIPT", L_HTML
00347 },
00348 {
00349 "/*", L_C|L_CPP|L_JAVA|L_OBJC
00350 },
00351 {
00352 "//", L_C|L_CPP|L_JAVA|L_OBJC
00353 },
00354 {
00355 "#include", L_C|L_CPP
00356 },
00357 {
00358 "#ifdef", L_C|L_CPP
00359 },
00360 {
00361 "#ifndef", L_C|L_CPP
00362 },
00363 {
00364 "bool", L_C|L_CPP
00365 },
00366 {
00367 "char", L_C|L_CPP|L_JAVA|L_OBJC
00368 },
00369 {
00370 "int", L_C|L_CPP|L_JAVA|L_OBJC
00371 },
00372 {
00373 "float", L_C|L_CPP|L_JAVA|L_OBJC
00374 },
00375 {
00376 "void", L_C|L_CPP|L_JAVA|L_OBJC
00377 },
00378 {
00379 "extern", L_C|L_CPP
00380 },
00381 {
00382 "struct", L_C|L_CPP
00383 },
00384 {
00385 "union", L_C|L_CPP
00386 },
00387 {
00388 "implements", L_JAVA
00389 },
00390 {
00391 "super", L_JAVA
00392 },
00393 {
00394 "import", L_JAVA
00395 },
00396 {
00397 "class", L_CPP|L_JAVA
00398 },
00399 {
00400 "public", L_CPP|L_JAVA
00401 },
00402 {
00403 "private", L_CPP|L_JAVA
00404 },
00405 {
00406 "explicit", L_CPP
00407 },
00408 {
00409 "virtual", L_CPP
00410 },
00411 {
00412 "namespace", L_CPP
00413 },
00414 {
00415 "#import", L_OBJC
00416 },
00417 {
00418 "@interface", L_OBJC
00419 },
00420 {
00421 "@implementation", L_OBJC
00422 },
00423 {
00424 "@protocol", L_OBJC
00425 },
00426 {
00427 "CFLAGS", L_MAKE
00428 },
00429 {
00430 "LDFLAGS", L_MAKE
00431 },
00432 {
00433 "all:", L_MAKE
00434 },
00435 {
00436 ".PHONY:", L_MAKE
00437 },
00438 {
00439 "srcdir", L_MAKE
00440 },
00441 {
00442 "exec_prefix", L_MAKE
00443 },
00444
00445
00446
00447
00448 {
00449 ".ascii", L_MACH
00450 },
00451 {
00452 ".asciiz", L_MACH
00453 },
00454 {
00455 ".byte", L_MACH
00456 },
00457 {
00458 ".even", L_MACH
00459 },
00460 {
00461 ".globl", L_MACH
00462 },
00463 {
00464 "clr", L_MACH
00465 },
00466 {
00467 "(input", L_PAS
00468 },
00469 {
00470 "dcl", L_PLI
00471 },
00472 {
00473 "Received:", L_MAIL
00474 },
00475
00476
00477
00478 {
00479 "Return-Path:", L_MAIL
00480 },
00481 {
00482 "Cc:", L_MAIL
00483 },
00484 {
00485 "Newsgroups:", L_NEWS
00486 },
00487 {
00488 "Path:", L_NEWS
00489 },
00490 {
00491 "Organization:", L_NEWS
00492 },
00493 {
00494 "---", L_DIFF
00495 },
00496 {
00497 "+++", L_DIFF
00498 },
00499 {
00500 "***", L_DIFF
00501 },
00502 {
00503 "@@", L_DIFF
00504 },
00505 {
00506 NULL, 0
00507 }
00508 };
00509
00520 class KMimeMagicUtimeConf
00521 {
00522 public:
00523 KMimeMagicUtimeConf()
00524 {
00525 tmpDirs << QString::fromLatin1("/tmp");
00526
00527
00528
00529 QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" );
00530 if ( !confDirs.isEmpty() )
00531 {
00532 QString globalConf = confDirs.last() + "kmimemagicrc";
00533 if ( QFile::exists( globalConf ) )
00534 {
00535 KSimpleConfig cfg( globalConf );
00536 cfg.setGroup( "Settings" );
00537 tmpDirs = cfg.readListEntry( "atimeDirs" );
00538 }
00539 if ( confDirs.count() > 1 )
00540 {
00541 QString localConf = confDirs.first() + "kmimemagicrc";
00542 if ( QFile::exists( localConf ) )
00543 {
00544 KSimpleConfig cfg( localConf );
00545 cfg.setGroup( "Settings" );
00546 tmpDirs += cfg.readListEntry( "atimeDirs" );
00547 }
00548 }
00549 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00550 {
00551 QString dir = *it;
00552 if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' )
00553 (*it) += '/';
00554 }
00555 }
00556 #if 0
00557
00558 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00559 kdDebug(7018) << " atimeDir: " << *it << endl;
00560 #endif
00561 }
00562
00563 bool restoreAccessTime( const QString & file ) const
00564 {
00565 QString dir = file.left( file.findRev( '/' ) );
00566 bool res = tmpDirs.contains( dir );
00567
00568 return res;
00569 }
00570 QStringList tmpDirs;
00571 };
00572
00573
00574 struct config_rec {
00575 bool followLinks;
00576 QString resultBuf;
00577 int accuracy;
00578
00579 struct magic *magic,
00580 *last;
00581 KMimeMagicUtimeConf * utimeConf;
00582 };
00583
00584 #ifdef MIME_MAGIC_DEBUG_TABLE
00585 static void
00586 test_table()
00587 {
00588 struct magic *m;
00589 struct magic *prevm = NULL;
00590
00591 kdDebug(7018) << "test_table : started" << endl;
00592 for (m = conf->magic; m; m = m->next) {
00593 if (isprint((((unsigned long) m) >> 24) & 255) &&
00594 isprint((((unsigned long) m) >> 16) & 255) &&
00595 isprint((((unsigned long) m) >> 8) & 255) &&
00596 isprint(((unsigned long) m) & 255)) {
00597
00598
00599 (((unsigned long) m) >> 24) & 255,
00600 (((unsigned long) m) >> 16) & 255,
00601 (((unsigned long) m) >> 8) & 255,
00602 ((unsigned long) m) & 255,
00603 prevm ? prevm->lineno : -1);
00604 break;
00605 }
00606 prevm = m;
00607 }
00608 }
00609 #endif
00610
00611 #define EATAB {while (isascii((unsigned char) *l) && \
00612 isspace((unsigned char) *l)) ++l;}
00613
00614 int KMimeMagic::parse_line(char *line, int *rule, int lineno)
00615 {
00616 int ws_offset;
00617
00618
00619 if (line[0]) {
00620 line[strlen(line) - 1] = '\0';
00621 }
00622
00623 ws_offset = 0;
00624 while (line[ws_offset] && isspace(line[ws_offset])) {
00625 ws_offset++;
00626 }
00627
00628
00629 if (line[ws_offset] == 0) {
00630 return 0;
00631 }
00632
00633 if (line[ws_offset] == '#')
00634 return 0;
00635
00636
00637 (*rule)++;
00638
00639
00640 return (parse(line + ws_offset, lineno) != 0);
00641 }
00642
00643
00644
00645
00646 int KMimeMagic::apprentice( const QString& magicfile )
00647 {
00648 FILE *f;
00649 char line[BUFSIZ + 1];
00650 int errs = 0;
00651 int lineno;
00652 int rule = 0;
00653 QCString fname;
00654
00655 if (magicfile.isEmpty())
00656 return -1;
00657 fname = QFile::encodeName(magicfile);
00658 f = fopen(fname, "r");
00659 if (f == NULL) {
00660 kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl;
00661 return -1;
00662 }
00663
00664
00665 for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00666 if (parse_line(line, &rule, lineno))
00667 errs++;
00668
00669 fclose(f);
00670
00671 #ifdef DEBUG_APPRENTICE
00672 kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00673 kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00674 #endif
00675
00676 #ifdef MIME_MAGIC_DEBUG_TABLE
00677 test_table();
00678 #endif
00679
00680 return (errs ? -1 : 0);
00681 }
00682
00683 int KMimeMagic::buff_apprentice(char *buff)
00684 {
00685 char line[BUFSIZ + 2];
00686 int errs = 0;
00687 int lineno = 1;
00688 char *start = buff;
00689 char *end;
00690 int count = 0;
00691 int rule = 0;
00692 int len = strlen(buff) + 1;
00693
00694
00695 do {
00696 count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00697 strncpy(line, start, count);
00698 line[count] = '\0';
00699 if ((end = strchr(line, '\n'))) {
00700 *(++end) = '\0';
00701 count = strlen(line);
00702 } else
00703 strcat(line, "\n");
00704 start += count;
00705 len -= count;
00706 if (parse_line(line, &rule, lineno))
00707 errs++;
00708 lineno++;
00709 } while (len > 0);
00710
00711 #ifdef DEBUG_APPRENTICE
00712 kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00713 kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00714 #endif
00715
00716 #ifdef MIME_MAGIC_DEBUG_TABLE
00717 test_table();
00718 #endif
00719
00720 return (errs ? -1 : 0);
00721 }
00722
00723
00724
00725
00726 static unsigned long
00727 signextend(struct magic *m, unsigned long v)
00728 {
00729 if (!(m->flag & UNSIGNED))
00730 switch (m->type) {
00731
00732
00733
00734
00735
00736 case BYTE:
00737 v = (char) v;
00738 break;
00739 case SHORT:
00740 case BESHORT:
00741 case LESHORT:
00742 v = (short) v;
00743 break;
00744 case DATE:
00745 case BEDATE:
00746 case LEDATE:
00747 case LONG:
00748 case BELONG:
00749 case LELONG:
00750 v = (long) v;
00751 break;
00752 case STRING:
00753 break;
00754 default:
00755 kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl;
00756 return 998;
00757 }
00758 return v;
00759 }
00760
00761
00762
00763
00764 int KMimeMagic::parse(char *l, int
00765 #ifdef DEBUG_LINENUMBERS
00766 lineno
00767 #endif
00768 )
00769 {
00770 int i = 0;
00771 struct magic *m;
00772 char *t,
00773 *s;
00774
00775 if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) {
00776 kdError(7018) << "parse: Out of memory." << endl;
00777 return -1;
00778 }
00779
00780 m->next = NULL;
00781 if (!conf->magic || !conf->last) {
00782 conf->magic = conf->last = m;
00783 } else {
00784 conf->last->next = m;
00785 conf->last = m;
00786 }
00787
00788
00789 m->flag = 0;
00790 m->cont_level = 0;
00791 #ifdef DEBUG_LINENUMBERS
00792 m->lineno = lineno;
00793 #endif
00794
00795 while (*l == '>') {
00796 ++l;
00797 m->cont_level++;
00798 }
00799
00800 if (m->cont_level != 0 && *l == '(') {
00801 ++l;
00802 m->flag |= INDIR;
00803 }
00804
00805 m->offset = (int) strtol(l, &t, 0);
00806 if (l == t) {
00807 kdError(7018) << "parse: offset " << l << " invalid" << endl;
00808 }
00809 l = t;
00810
00811 if (m->flag & INDIR) {
00812 m->in.type = LONG;
00813 m->in.offset = 0;
00814
00815
00816
00817 if (*l == '.') {
00818 switch (*++l) {
00819 case 'l':
00820 m->in.type = LONG;
00821 break;
00822 case 's':
00823 m->in.type = SHORT;
00824 break;
00825 case 'b':
00826 m->in.type = BYTE;
00827 break;
00828 default:
00829 kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl;
00830 break;
00831 }
00832 l++;
00833 }
00834 s = l;
00835 if (*l == '+' || *l == '-')
00836 l++;
00837 if (isdigit((unsigned char) *l)) {
00838 m->in.offset = strtol(l, &t, 0);
00839 if (*s == '-')
00840 m->in.offset = -m->in.offset;
00841 } else
00842 t = l;
00843 if (*t++ != ')') {
00844 kdError(7018) << "parse: missing ')' in indirect offset" << endl;
00845 }
00846 l = t;
00847 }
00848 while (isascii((unsigned char) *l) && isdigit((unsigned char) *l))
00849 ++l;
00850 EATAB;
00851
00852 #define NBYTE 4
00853 #define NSHORT 5
00854 #define NLONG 4
00855 #define NSTRING 6
00856 #define NDATE 4
00857 #define NBESHORT 7
00858 #define NBELONG 6
00859 #define NBEDATE 6
00860 #define NLESHORT 7
00861 #define NLELONG 6
00862 #define NLEDATE 6
00863
00864 if (*l == 'u') {
00865 ++l;
00866 m->flag |= UNSIGNED;
00867 }
00868
00869 if (strncmp(l, "byte", NBYTE) == 0) {
00870 m->type = BYTE;
00871 l += NBYTE;
00872 } else if (strncmp(l, "short", NSHORT) == 0) {
00873 m->type = SHORT;
00874 l += NSHORT;
00875 } else if (strncmp(l, "long", NLONG) == 0) {
00876 m->type = LONG;
00877 l += NLONG;
00878 } else if (strncmp(l, "string", NSTRING) == 0) {
00879 m->type = STRING;
00880 l += NSTRING;
00881 } else if (strncmp(l, "date", NDATE) == 0) {
00882 m->type = DATE;
00883 l += NDATE;
00884 } else if (strncmp(l, "beshort", NBESHORT) == 0) {
00885 m->type = BESHORT;
00886 l += NBESHORT;
00887 } else if (strncmp(l, "belong", NBELONG) == 0) {
00888 m->type = BELONG;
00889 l += NBELONG;
00890 } else if (strncmp(l, "bedate", NBEDATE) == 0) {
00891 m->type = BEDATE;
00892 l += NBEDATE;
00893 } else if (strncmp(l, "leshort", NLESHORT) == 0) {
00894 m->type = LESHORT;
00895 l += NLESHORT;
00896 } else if (strncmp(l, "lelong", NLELONG) == 0) {
00897 m->type = LELONG;
00898 l += NLELONG;
00899 } else if (strncmp(l, "ledate", NLEDATE) == 0) {
00900 m->type = LEDATE;
00901 l += NLEDATE;
00902 } else {
00903 kdError(7018) << "parse: type " << l << " invalid" << endl;
00904 return -1;
00905 }
00906
00907 if (*l == '&') {
00908 ++l;
00909 m->mask = signextend(m, strtol(l, &l, 0));
00910 } else
00911 m->mask = (unsigned long) ~0L;
00912 EATAB;
00913
00914 switch (*l) {
00915 case '>':
00916 case '<':
00917
00918 case '&':
00919 case '^':
00920 case '=':
00921 m->reln = *l;
00922 ++l;
00923 break;
00924 case '!':
00925 if (m->type != STRING) {
00926 m->reln = *l;
00927 ++l;
00928 break;
00929 }
00930
00931 default:
00932 if (*l == 'x' && isascii((unsigned char) l[1]) &&
00933 isspace((unsigned char) l[1])) {
00934 m->reln = *l;
00935 ++l;
00936 goto GetDesc;
00937 }
00938 m->reln = '=';
00939 break;
00940 }
00941 EATAB;
00942
00943 if (getvalue(m, &l))
00944 return -1;
00945
00946
00947
00948 GetDesc:
00949 EATAB;
00950 if (l[0] == '\b') {
00951 ++l;
00952 m->nospflag = 1;
00953 } else if ((l[0] == '\\') && (l[1] == 'b')) {
00954 ++l;
00955 ++l;
00956 m->nospflag = 1;
00957 } else
00958 m->nospflag = 0;
00959
00960 while (*l != '\0' && *l != '#' && i < MAXDESC-1)
00961 m->desc[i++] = *l++;
00962 m->desc[i] = '\0';
00963
00964 while (--i>0 && isspace( m->desc[i] ))
00965 m->desc[i] = '\0';
00966
00967
00968
00969
00970 #ifdef DEBUG_APPRENTICE
00971 kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl;
00972 #endif
00973 return 0;
00974 }
00975
00976
00977
00978
00979
00980
00981 static int
00982 getvalue(struct magic *m, char **p)
00983 {
00984 int slen;
00985
00986 if (m->type == STRING) {
00987 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
00988 m->vallen = slen;
00989 } else if (m->reln != 'x')
00990 m->value.l = signextend(m, strtol(*p, p, 0));
00991 return 0;
00992 }
00993
00994
00995
00996
00997
00998
00999 static char *
01000 getstr(register char *s, register char *p, int plen, int *slen)
01001 {
01002 char *origs = s,
01003 *origp = p;
01004 char *pmax = p + plen - 1;
01005 register int c;
01006 register int val;
01007
01008 while ((c = *s++) != '\0') {
01009 if (isspace((unsigned char) c))
01010 break;
01011 if (p >= pmax) {
01012 kdError(7018) << "String too long: " << origs << endl;
01013 break;
01014 }
01015 if (c == '\\') {
01016 switch (c = *s++) {
01017
01018 case '\0':
01019 goto out;
01020
01021 default:
01022 *p++ = (char) c;
01023 break;
01024
01025 case 'n':
01026 *p++ = '\n';
01027 break;
01028
01029 case 'r':
01030 *p++ = '\r';
01031 break;
01032
01033 case 'b':
01034 *p++ = '\b';
01035 break;
01036
01037 case 't':
01038 *p++ = '\t';
01039 break;
01040
01041 case 'f':
01042 *p++ = '\f';
01043 break;
01044
01045 case 'v':
01046 *p++ = '\v';
01047 break;
01048
01049
01050 case '0':
01051 case '1':
01052 case '2':
01053 case '3':
01054 case '4':
01055 case '5':
01056 case '6':
01057 case '7':
01058 val = c - '0';
01059 c = *s++;
01060 if (c >= '0' && c <= '7') {
01061 val = (val << 3) | (c - '0');
01062 c = *s++;
01063 if (c >= '0' && c <= '7')
01064 val = (val << 3) | (c - '0');
01065 else
01066 --s;
01067 } else
01068 --s;
01069 *p++ = (char) val;
01070 break;
01071
01072
01073 case 'x':
01074 val = 'x';
01075 c = hextoint(*s++);
01076 if (c >= 0) {
01077 val = c;
01078 c = hextoint(*s++);
01079 if (c >= 0) {
01080 val = (val << 4) + c;
01081 c = hextoint(*s++);
01082 if (c >= 0) {
01083 val = (val << 4) + c;
01084 } else
01085 --s;
01086 } else
01087 --s;
01088 } else
01089 --s;
01090 *p++ = (char) val;
01091 break;
01092 }
01093 } else
01094 *p++ = (char) c;
01095 }
01096 out:
01097 *p = '\0';
01098 *slen = p - origp;
01099
01100
01101 return s;
01102 }
01103
01104
01105
01106 static int
01107 hextoint(int c)
01108 {
01109 if (!isascii((unsigned char) c))
01110 return -1;
01111 if (isdigit((unsigned char) c))
01112 return c - '0';
01113 if ((c >= 'a') && (c <= 'f'))
01114 return c + 10 - 'a';
01115 if ((c >= 'A') && (c <= 'F'))
01116 return c + 10 - 'A';
01117 return -1;
01118 }
01119
01120
01121
01122
01123 static int
01124 mconvert(union VALUETYPE *p, struct magic *m)
01125 {
01126 switch (m->type) {
01127 case BYTE:
01128 return 1;
01129 case STRING:
01130
01131 p->s[sizeof(p->s) - 1] = '\0';
01132 return 1;
01133 #ifndef WORDS_BIGENDIAN
01134 case SHORT:
01135 #endif
01136 case BESHORT:
01137 p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
01138 return 1;
01139 #ifndef WORDS_BIGENDIAN
01140 case LONG:
01141 case DATE:
01142 #endif
01143 case BELONG:
01144 case BEDATE:
01145 p->l = (long)
01146 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01147 return 1;
01148 #ifdef WORDS_BIGENDIAN
01149 case SHORT:
01150 #endif
01151 case LESHORT:
01152 p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
01153 return 1;
01154 #ifdef WORDS_BIGENDIAN
01155 case LONG:
01156 case DATE:
01157 #endif
01158 case LELONG:
01159 case LEDATE:
01160 p->l = (long)
01161 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01162 return 1;
01163 default:
01164 kdError(7018) << "mconvert: invalid type " << m->type << endl;
01165 return 0;
01166 }
01167 }
01168
01169
01170 static int
01171 mget(union VALUETYPE *p, unsigned char *s, struct magic *m,
01172 int nbytes)
01173 {
01174 long offset = m->offset;
01175 switch ( m->type )
01176 {
01177 case BYTE:
01178 if ( offset + 1 > nbytes-1 )
01179 return 0;
01180 break;
01181 case SHORT:
01182 case BESHORT:
01183 case LESHORT:
01184 if ( offset + 2 > nbytes-1 )
01185 return 0;
01186 break;
01187 case LONG:
01188 case BELONG:
01189 case LELONG:
01190 case DATE:
01191 case BEDATE:
01192 case LEDATE:
01193 if ( offset + 4 > nbytes-1 )
01194 return 0;
01195 break;
01196 case STRING:
01197 break;
01198 }
01199
01200
01201
01202
01203 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01204 {
01205 int have = nbytes - offset;
01206 memset(p, 0, sizeof(union VALUETYPE));
01207 if (have > 0)
01208 memcpy(p, s + offset, have);
01209 } else
01210 memcpy(p, s + offset, sizeof(union VALUETYPE));
01211
01212 if (!mconvert(p, m))
01213 return 0;
01214
01215 if (m->flag & INDIR) {
01216
01217 switch (m->in.type) {
01218 case BYTE:
01219 offset = p->b + m->in.offset;
01220 break;
01221 case SHORT:
01222 offset = p->h + m->in.offset;
01223 break;
01224 case LONG:
01225 offset = p->l + m->in.offset;
01226 break;
01227 }
01228
01229 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01230 return 0;
01231
01232 memcpy(p, s + offset, sizeof(union VALUETYPE));
01233
01234 if (!mconvert(p, m))
01235 return 0;
01236 }
01237 return 1;
01238 }
01239
01240 static int
01241 mcheck(union VALUETYPE *p, struct magic *m)
01242 {
01243 register unsigned long l = m->value.l;
01244 register unsigned long v;
01245 int matched;
01246
01247 if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
01248 kdError(7018) << "BOINK" << endl;
01249 return 1;
01250 }
01251 switch (m->type) {
01252 case BYTE:
01253 v = p->b;
01254 break;
01255
01256 case SHORT:
01257 case BESHORT:
01258 case LESHORT:
01259 v = p->h;
01260 break;
01261
01262 case LONG:
01263 case BELONG:
01264 case LELONG:
01265 case DATE:
01266 case BEDATE:
01267 case LEDATE:
01268 v = p->l;
01269 break;
01270
01271 case STRING:
01272 l = 0;
01273
01274
01275
01276
01277
01278 v = 0;
01279 {
01280 register unsigned char *a = (unsigned char *) m->value.s;
01281 register unsigned char *b = (unsigned char *) p->s;
01282 register int len = m->vallen;
01283 Q_ASSERT(len);
01284
01285 while (--len >= 0)
01286 if ((v = *b++ - *a++) != 0)
01287 break;
01288 }
01289 break;
01290 default:
01291 kdError(7018) << "mcheck: invalid type " << m->type << endl;
01292 return 0;
01293 }
01294 #if 0
01295 qDebug("Before signextend %08x", v);
01296 #endif
01297 v = signextend(m, v) & m->mask;
01298 #if 0
01299 qDebug("After signextend %08x", v);
01300 #endif
01301
01302 switch (m->reln) {
01303 case 'x':
01304 matched = 1;
01305 break;
01306
01307 case '!':
01308 matched = v != l;
01309 break;
01310
01311 case '=':
01312 matched = v == l;
01313 break;
01314
01315 case '>':
01316 if (m->flag & UNSIGNED)
01317 matched = v > l;
01318 else
01319 matched = (long) v > (long) l;
01320 break;
01321
01322 case '<':
01323 if (m->flag & UNSIGNED)
01324 matched = v < l;
01325 else
01326 matched = (long) v < (long) l;
01327 break;
01328
01329 case '&':
01330 matched = (v & l) == l;
01331 break;
01332
01333 case '^':
01334 matched = (v & l) != l;
01335 break;
01336
01337 default:
01338 matched = 0;
01339 kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl;
01340 break;
01341 }
01342
01343 return matched;
01344 }
01345
01346
01347
01348
01349
01350
01351 void process(struct config_rec* conf, const QString & fn)
01352 {
01353 int fd = 0;
01354 unsigned char buf[HOWMANY + 1];
01355 KDE_struct_stat sb;
01356 int nbytes = 0;
01357 int tagbytes = 0;
01358 QCString fileName = QFile::encodeName( fn );
01359
01360
01361
01362
01363 if (fsmagic(conf, fileName, &sb) != 0) {
01364
01365 return;
01366 }
01367 if ((fd = KDE_open(fileName, O_RDONLY)) < 0) {
01368
01369
01370
01371
01372
01373
01374 conf->resultBuf = MIME_BINARY_UNREADABLE;
01375 return;
01376 }
01377
01378
01379
01380 if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) {
01381 kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl;
01382 conf->resultBuf = MIME_BINARY_UNREADABLE;
01383 return;
01384 }
01385 if ((tagbytes = tagmagic(buf, nbytes))) {
01386
01387 lseek(fd, tagbytes, SEEK_SET);
01388 nbytes = read(fd, (char*)buf, HOWMANY);
01389 if (nbytes < 0) {
01390 conf->resultBuf = MIME_BINARY_UNREADABLE;
01391 return;
01392 }
01393 }
01394 if (nbytes == 0) {
01395 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01396 } else {
01397 buf[nbytes++] = '\0';
01398 tryit(conf, buf, nbytes);
01399 }
01400
01401 if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01402 {
01403
01404
01405
01406
01407
01408 struct utimbuf utbuf;
01409 utbuf.actime = sb.st_atime;
01410 utbuf.modtime = sb.st_mtime;
01411 (void) utime(fileName, &utbuf);
01412 }
01413 (void) close(fd);
01414 }
01415
01416
01417 static void tryit(struct config_rec* conf, unsigned char *buf, int nb)
01418 {
01419
01420 if (match(conf, buf, nb))
01421 return;
01422
01423
01424 if (ascmagic(conf, buf, nb) == 1)
01425 return;
01426
01427
01428 if (textmagic(conf, buf, nb))
01429 return;
01430
01431
01432 conf->resultBuf = MIME_BINARY_UNKNOWN;
01433 conf->accuracy = 0;
01434 }
01435
01436 static int
01437 fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb)
01438 {
01439 int ret = 0;
01440
01441
01442
01443
01444
01445 ret = KDE_lstat(fn, sb);
01446
01447 if (ret) {
01448 return 1;
01449
01450 }
01451
01452
01453
01454
01455
01456
01457 switch (sb->st_mode & S_IFMT) {
01458 case S_IFDIR:
01459 conf->resultBuf = MIME_INODE_DIR;
01460 return 1;
01461 case S_IFCHR:
01462 conf->resultBuf = MIME_INODE_CDEV;
01463 return 1;
01464 case S_IFBLK:
01465 conf->resultBuf = MIME_INODE_BDEV;
01466 return 1;
01467
01468 #ifdef S_IFIFO
01469 case S_IFIFO:
01470 conf->resultBuf = MIME_INODE_FIFO;
01471 return 1;
01472 #endif
01473 #ifdef S_IFLNK
01474 case S_IFLNK:
01475 {
01476 char buf[BUFSIZ + BUFSIZ + 4];
01477 register int nch;
01478 KDE_struct_stat tstatbuf;
01479
01480 if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01481 conf->resultBuf = MIME_INODE_LINK;
01482
01483 return 1;
01484 }
01485 buf[nch] = '\0';
01486
01487 if (*buf == '/') {
01488 if (KDE_stat(buf, &tstatbuf) < 0) {
01489 conf->resultBuf = MIME_INODE_LINK;
01490
01491 return 1;
01492 }
01493 } else {
01494 char *tmp;
01495 char buf2[BUFSIZ + BUFSIZ + 4];
01496
01497 strncpy(buf2, fn, BUFSIZ);
01498 buf2[BUFSIZ] = 0;
01499
01500 if ((tmp = strrchr(buf2, '/')) == NULL) {
01501 tmp = buf;
01502 } else {
01503
01504 *++tmp = '\0';
01505 strcat(buf2, buf);
01506 tmp = buf2;
01507 }
01508 if (KDE_stat(tmp, &tstatbuf) < 0) {
01509 conf->resultBuf = MIME_INODE_LINK;
01510
01511 return 1;
01512 } else
01513 strcpy(buf, tmp);
01514 }
01515 if (conf->followLinks)
01516 process( conf, QFile::decodeName( buf ) );
01517 else
01518 conf->resultBuf = MIME_INODE_LINK;
01519 return 1;
01520 }
01521 return 1;
01522 #endif
01523 #ifdef S_IFSOCK
01524 #ifndef __COHERENT__
01525 case S_IFSOCK:
01526 conf->resultBuf = MIME_INODE_SOCK;
01527 return 1;
01528 #endif
01529 #endif
01530 case S_IFREG:
01531 break;
01532 default:
01533 kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl;
01534
01535 }
01536
01537
01538
01539
01540 if (sb->st_size == 0) {
01541 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01542 return 1;
01543 }
01544 return 0;
01545 }
01546
01547
01548
01549
01550
01551
01552
01553
01554
01555
01556
01557
01558
01559
01560
01561
01562
01563
01564
01565
01566
01567
01568
01569
01570
01571
01572
01573 static int
01574 match(struct config_rec* conf, unsigned char *s, int nbytes)
01575 {
01576 int cont_level = 0;
01577 union VALUETYPE p;
01578 struct magic *m;
01579
01580 #ifdef DEBUG_MIMEMAGIC
01581 kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
01582 for (m = conf->magic; m; m = m->next) {
01583 if (isprint((((unsigned long) m) >> 24) & 255) &&
01584 isprint((((unsigned long) m) >> 16) & 255) &&
01585 isprint((((unsigned long) m) >> 8) & 255) &&
01586 isprint(((unsigned long) m) & 255)) {
01587 kdDebug(7018) << "match: POINTER CLOBBERED! " << endl;
01588 break;
01589 }
01590 }
01591 #endif
01592
01593 for (m = conf->magic; m; m = m->next) {
01594 #ifdef DEBUG_MIMEMAGIC
01595 kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl;
01596 #endif
01597 memset(&p, 0, sizeof(union VALUETYPE));
01598
01599
01600 if (!mget(&p, s, m, nbytes) ||
01601 !mcheck(&p, m)) {
01602 struct magic *m_cont;
01603
01604
01605
01606
01607 if (!m->next || (m->next->cont_level == 0)) {
01608 continue;
01609 }
01610 m_cont = m->next;
01611 while (m_cont && (m_cont->cont_level != 0)) {
01612 #ifdef DEBUG_MIMEMAGIC
01613 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl;
01614 #endif
01615
01616
01617
01618
01619 m = m_cont;
01620 m_cont = m_cont->next;
01621 }
01622 continue;
01623 }
01624
01625
01626 #ifdef DEBUG_MIMEMAGIC
01627 kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01628 #endif
01629
01630
01631 conf->resultBuf = m->desc;
01632
01633 cont_level++;
01634
01635
01636
01637
01638 m = m->next;
01639 while (m && (m->cont_level != 0)) {
01640 #ifdef DEBUG_MIMEMAGIC
01641 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01642 #endif
01643 if (cont_level >= m->cont_level) {
01644 if (cont_level > m->cont_level) {
01645
01646
01647
01648
01649 cont_level = m->cont_level;
01650 }
01651 if (mget(&p, s, m, nbytes) &&
01652 mcheck(&p, m)) {
01653
01654
01655
01656
01657
01658
01659 #ifdef DEBUG_MIMEMAGIC
01660 kdDebug(7018) << "continuation matched" << endl;
01661 #endif
01662 conf->resultBuf = m->desc;
01663 cont_level++;
01664 }
01665 }
01666
01667 m = m->next;
01668 }
01669
01670
01671 if ( !conf->resultBuf.isEmpty() )
01672 {
01673 #ifdef DEBUG_MIMEMAGIC
01674 kdDebug(7018) << "match: matched" << endl;
01675 #endif
01676 return 1;
01677 }
01678 }
01679 #ifdef DEBUG_MIMEMAGIC
01680 kdDebug(7018) << "match: failed" << endl;
01681 #endif
01682 return 0;
01683 }
01684
01685
01686
01687 static int tagmagic(unsigned char *buf, int nbytes)
01688 {
01689 if(nbytes<40) return 0;
01690 if(buf[0] == 'I' && buf[1] == 'D' && buf[2] == '3') {
01691 int size = 10;
01692
01693 if(buf[3] > 4) return 0;
01694 if(buf[5] & 0x0F) return 0;
01695
01696 if(buf[5] & 0x10) size += 10;
01697
01698 size += buf[9];
01699 size += buf[8] << 7;
01700 size += buf[7] << 14;
01701 size += buf[6] << 21;
01702 return size;
01703 }
01704 return 0;
01705 }
01706
01707 struct Token {
01708 char *data;
01709 int length;
01710 };
01711
01712 struct Tokenizer
01713 {
01714 Tokenizer(char* buf, int nbytes) {
01715 data = buf;
01716 length = nbytes;
01717 pos = 0;
01718 }
01719 bool isNewLine() {
01720 return newline;
01721 }
01722 Token* nextToken() {
01723 if (pos == 0)
01724 newline = true;
01725 else
01726 newline = false;
01727 token.data = data+pos;
01728 token.length = 0;
01729 while(pos<length) {
01730 switch (data[pos]) {
01731 case '\n':
01732 newline = true;
01733 case '\0':
01734 case '\t':
01735 case ' ':
01736 case '\r':
01737 case '\f':
01738 case ',':
01739 case ';':
01740 case '>':
01741 if (token.length == 0) token.data++;
01742 else
01743 return &token;
01744 break;
01745 default:
01746 token.length++;
01747 }
01748 pos++;
01749 }
01750 return &token;
01751 }
01752
01753 private:
01754 Token token;
01755 char* data;
01756 int length;
01757 int pos;
01758 bool newline;
01759 };
01760
01761
01762
01763
01764 static inline bool STREQ(const Token *token, const char *b) {
01765 const char *a = token->data;
01766 int len = token->length;
01767 if (a == b) return true;
01768 while(*a && *b && len > 0) {
01769 if (*a != *b) return false;
01770 a++; b++; len--;
01771 }
01772 return (len == 0 && *b == 0);
01773 }
01774
01775 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes)
01776 {
01777 int i;
01778 double pct, maxpct, pctsum;
01779 double pcts[NTYPES];
01780 int mostaccurate, tokencount;
01781 int typeset, jonly, conly, jconly, objconly, cpponly;
01782 int has_escapes = 0;
01783
01784
01785
01786
01787 conf->accuracy = 70;
01788
01789
01790
01791
01792
01793
01794 if (*buf == '.') {
01795 unsigned char *tp = buf + 1;
01796
01797 while (isascii(*tp) && isspace(*tp))
01798 ++tp;
01799 if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') &&
01800 isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) {
01801 conf->resultBuf = MIME_APPL_TROFF;
01802 return 1;
01803 }
01804 }
01805 if ((*buf == 'c' || *buf == 'C') &&
01806 isascii(*(buf + 1)) && isspace(*(buf + 1))) {
01807
01808 conf->resultBuf = MIME_TEXT_FORTRAN;
01809 return 1;
01810 }
01811 assert(nbytes-1 < HOWMANY + 1);
01812
01813 has_escapes = (memchr(buf, '\033', nbytes) != NULL);
01814 Tokenizer tokenizer((char*)buf, nbytes);
01815 const Token* token;
01816 bool linecomment = false, blockcomment = false;
01817 const struct names *p;
01818 int typecount[NTYPES];
01819
01820
01821
01822
01823 memset(&typecount, 0, sizeof(typecount));
01824 typeset = 0;
01825 jonly = 0;
01826 conly = 0;
01827 jconly = 0;
01828 objconly = 0;
01829 cpponly = 0;
01830 tokencount = 0;
01831 bool foundClass = false;
01832
01833
01834 while ((token = tokenizer.nextToken())->length > 0) {
01835 #ifdef DEBUG_MIMEMAGIC
01836 kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl;
01837 #endif
01838 if (linecomment && tokenizer.isNewLine())
01839 linecomment = false;
01840 if (blockcomment && STREQ(token, "*/")) {
01841 blockcomment = false;
01842 continue;
01843 }
01844 for (p = names; p->name ; p++) {
01845 if (STREQ(token, p->name)) {
01846 #ifdef DEBUG_MIMEMAGIC
01847 kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl;
01848 #endif
01849 tokencount++;
01850 typeset |= p->type;
01851 if(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) {
01852 if (linecomment || blockcomment) {
01853 continue;
01854 }
01855 else {
01856 switch(p->type & (L_C|L_CPP|L_JAVA|L_OBJC))
01857 {
01858 case L_JAVA:
01859 jonly++;
01860 break;
01861 case L_OBJC:
01862 objconly++;
01863 break;
01864 case L_CPP:
01865 cpponly++;
01866 break;
01867 case (L_CPP|L_JAVA):
01868 jconly++;
01869 if ( !foundClass && STREQ(token, "class") )
01870 foundClass = true;
01871 break;
01872 case (L_C|L_CPP):
01873 conly++;
01874 break;
01875 default:
01876 if (STREQ(token, "//")) linecomment = true;
01877 if (STREQ(token, "/*")) blockcomment = true;
01878 }
01879 }
01880 }
01881 for (i = 0; i < (int)NTYPES; i++) {
01882 if ((1 << i) & p->type) typecount[i]++;
01883 }
01884 }
01885 }
01886 }
01887
01888 if (typeset & (L_C|L_CPP|L_JAVA|L_OBJC)) {
01889 conf->accuracy = 60;
01890 if (!(typeset & ~(L_C|L_CPP|L_JAVA|L_OBJC))) {
01891 #ifdef DEBUG_MIMEMAGIC
01892 kdDebug(7018) << "C/C++/Java/ObjC: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " objconly=" << objconly << endl;
01893 #endif
01894 if (jonly > 1 && foundClass) {
01895
01896 conf->resultBuf = QString(types[P_JAVA].type);
01897 return 1;
01898 }
01899 if (jconly > 1) {
01900
01901 if (typecount[P_JAVA] < typecount[P_CPP])
01902 conf->resultBuf = QString(types[P_CPP].type);
01903 else
01904 conf->resultBuf = QString(types[P_JAVA].type);
01905 return 1;
01906 }
01907 if (conly + cpponly > 1) {
01908
01909 if (cpponly > 0)
01910 conf->resultBuf = QString(types[P_CPP].type);
01911 else
01912 conf->resultBuf = QString(types[P_C].type);
01913 return 1;
01914 }
01915 if (objconly > 0) {
01916 conf->resultBuf = QString(types[P_OBJC].type);
01917 return 1;
01918 }
01919 }
01920 }
01921
01922
01923
01924
01925
01926 mostaccurate = -1;
01927 maxpct = pctsum = 0.0;
01928 for (i = 0; i < (int)NTYPES; i++) {
01929 if (typecount[i] > 1) {
01930 pct = (double)typecount[i] / (double)types[i].kwords *
01931 (double)types[i].weight;
01932 pcts[i] = pct;
01933 pctsum += pct;
01934 if (pct > maxpct) {
01935 maxpct = pct;
01936 mostaccurate = i;
01937 }
01938 #ifdef DEBUG_MIMEMAGIC
01939 kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl;
01940 #endif
01941 }
01942 }
01943 if (mostaccurate >= 0) {
01944 if ( mostaccurate != P_JAVA || foundClass )
01945 {
01946 conf->accuracy = (int)(pcts[mostaccurate] / pctsum * 60);
01947 #ifdef DEBUG_MIMEMAGIC
01948 kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << conf->accuracy << endl;
01949 #endif
01950 conf->resultBuf = QString(types[mostaccurate].type);
01951 return 1;
01952 }
01953 }
01954
01955 switch (is_tar(buf, nbytes)) {
01956 case 1:
01957
01958 conf->resultBuf = MIME_APPL_TAR;
01959 conf->accuracy = 90;
01960 return 1;
01961 case 2:
01962
01963 conf->resultBuf = MIME_APPL_TAR;
01964 conf->accuracy = 90;
01965 return 1;
01966 }
01967
01968 for (i = 0; i < nbytes; i++) {
01969 if (!isascii(*(buf + i)))
01970 return 0;
01971 }
01972
01973
01974 conf->accuracy = 90;
01975 if (has_escapes) {
01976
01977
01978 conf->resultBuf = MIME_TEXT_UNKNOWN;
01979 } else {
01980
01981 conf->resultBuf = MIME_TEXT_PLAIN;
01982 }
01983 return 1;
01984 }
01985
01986
01987 #define TEXT_MAXLINELEN 300
01988
01989
01990
01991
01992
01993 static int textmagic(struct config_rec* conf, unsigned char * buf, int nbytes)
01994 {
01995 int i;
01996 unsigned char *cp;
01997
01998 nbytes--;
01999
02000
02001 for (i = 0, cp = buf; i < nbytes; i++, cp++)
02002 if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
02003 return 0;
02004
02005
02006
02007
02008 for (i = 0; i < nbytes;) {
02009 cp = (unsigned char *) memchr(buf, '\n', nbytes - i);
02010 if (cp == NULL) {
02011
02012 if (i + TEXT_MAXLINELEN >= nbytes)
02013 break;
02014 else
02015 return 0;
02016 }
02017 if (cp - buf > TEXT_MAXLINELEN)
02018 return 0;
02019 i += (cp - buf + 1);
02020 buf = cp + 1;
02021 }
02022 conf->resultBuf = MIME_TEXT_PLAIN;
02023 return 1;
02024 }
02025
02026
02027
02028
02029
02030
02031
02032
02033
02034
02035
02036
02037
02038
02039
02040 #define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
02041
02042
02043
02044
02045
02046
02047 static int
02048 is_tar(unsigned char *buf, int nbytes)
02049 {
02050 register union record *header = (union record *) buf;
02051 register int i;
02052 register long sum,
02053 recsum;
02054 register char *p;
02055
02056 if (nbytes < (int)sizeof(union record))
02057 return 0;
02058
02059 recsum = from_oct(8, header->header.chksum);
02060
02061 sum = 0;
02062 p = header->charptr;
02063 for (i = sizeof(union record); --i >= 0;) {
02064
02065
02066
02067
02068 sum += 0xFF & *p++;
02069 }
02070
02071
02072 for (i = sizeof(header->header.chksum); --i >= 0;)
02073 sum -= 0xFF & header->header.chksum[i];
02074 sum += ' ' * sizeof header->header.chksum;
02075
02076 if (sum != recsum)
02077 return 0;
02078
02079 if (0 == strcmp(header->header.magic, TMAGIC))
02080 return 2;
02081
02082 return 1;
02083 }
02084
02085
02086
02087
02088
02089
02090
02091 static long
02092 from_oct(int digs, char *where)
02093 {
02094 register long value;
02095
02096 while (isspace(*where)) {
02097 where++;
02098 if (--digs <= 0)
02099 return -1;
02100 }
02101 value = 0;
02102 while (digs > 0 && isodigit(*where)) {
02103 value = (value << 3) | (*where++ - '0');
02104 --digs;
02105 }
02106
02107 if (digs > 0 && *where && !isspace(*where))
02108 return -1;
02109
02110 return value;
02111 }
02112
02113 KMimeMagic::KMimeMagic()
02114 {
02115
02116 QString mimefile = locate( "mime", "magic" );
02117 init( mimefile );
02118
02119 QStringList snippets = KGlobal::dirs()->findAllResources( "config", "magic/*.magic", true );
02120 for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
02121 if ( !mergeConfig( *it ) )
02122 kdWarning() << k_funcinfo << "Failed to parse " << *it << endl;
02123 }
02124
02125 KMimeMagic::KMimeMagic(const QString & _configfile)
02126 {
02127 init( _configfile );
02128 }
02129
02130 void KMimeMagic::init( const QString& _configfile )
02131 {
02132 int result;
02133 conf = new config_rec;
02134
02135
02136 conf->magic = conf->last = NULL;
02137 magicResult = NULL;
02138 conf->followLinks = false;
02139
02140 conf->utimeConf = 0L;
02141
02142 result = apprentice(_configfile);
02143 if (result == -1)
02144 return;
02145 #ifdef MIME_MAGIC_DEBUG_TABLE
02146 test_table();
02147 #endif
02148 }
02149
02150
02151
02152
02153
02154 KMimeMagic::~KMimeMagic()
02155 {
02156 if (conf) {
02157 struct magic *p = conf->magic;
02158 struct magic *q;
02159 while (p) {
02160 q = p;
02161 p = p->next;
02162 free(q);
02163 }
02164 delete conf->utimeConf;
02165 delete conf;
02166 }
02167 delete magicResult;
02168 }
02169
02170 bool
02171 KMimeMagic::mergeConfig(const QString & _configfile)
02172 {
02173 kdDebug(7018) << k_funcinfo << _configfile << endl;
02174 int result;
02175
02176 if (_configfile.isEmpty())
02177 return false;
02178 result = apprentice(_configfile);
02179 if (result == -1) {
02180 return false;
02181 }
02182 #ifdef MIME_MAGIC_DEBUG_TABLE
02183 test_table();
02184 #endif
02185 return true;
02186 }
02187
02188 bool
02189 KMimeMagic::mergeBufConfig(char * _configbuf)
02190 {
02191 int result;
02192
02193 if (conf) {
02194 result = buff_apprentice(_configbuf);
02195 if (result == -1)
02196 return false;
02197 #ifdef MIME_MAGIC_DEBUG_TABLE
02198 test_table();
02199 #endif
02200 return true;
02201 }
02202 return false;
02203 }
02204
02205 void
02206 KMimeMagic::setFollowLinks( bool _enable )
02207 {
02208 conf->followLinks = _enable;
02209 }
02210
02211 KMimeMagicResult *
02212 KMimeMagic::findBufferType(const QByteArray &array)
02213 {
02214 unsigned char buf[HOWMANY + 1];
02215
02216 conf->resultBuf = QString::null;
02217 if ( !magicResult )
02218 magicResult = new KMimeMagicResult();
02219 magicResult->setInvalid();
02220 conf->accuracy = 100;
02221
02222 int nbytes = array.size();
02223
02224 if (nbytes > HOWMANY)
02225 nbytes = HOWMANY;
02226 memcpy(buf, array.data(), nbytes);
02227 if (nbytes == 0) {
02228 conf->resultBuf = MIME_BINARY_ZEROSIZE;
02229 } else {
02230 buf[nbytes++] = '\0';
02231 tryit(conf, buf, nbytes);
02232 }
02233
02234 magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02235 magicResult->setAccuracy(conf->accuracy);
02236 return magicResult;
02237 }
02238
02239 static void
02240 refineResult(KMimeMagicResult *r, const QString & _filename)
02241 {
02242 QString tmp = r->mimeType();
02243 if (tmp.isEmpty())
02244 return;
02245 if ( tmp == "text/x-c" || tmp == "text/x-objc" )
02246 {
02247 if ( _filename.right(2) == ".h" )
02248 tmp += "hdr";
02249 else
02250 tmp += "src";
02251 r->setMimeType(tmp);
02252 }
02253 else
02254 if ( tmp == "text/x-c++" )
02255 {
02256 if ( _filename.endsWith(".h")
02257 || _filename.endsWith(".hh")
02258 || _filename.endsWith(".H")
02259 || !_filename.right(4).contains('.'))
02260 tmp += "hdr";
02261 else
02262 tmp += "src";
02263 r->setMimeType(tmp);
02264 }
02265 }
02266
02267 KMimeMagicResult *
02268 KMimeMagic::findBufferFileType( const QByteArray &data,
02269 const QString &fn)
02270 {
02271 KMimeMagicResult * r = findBufferType( data );
02272 refineResult(r, fn);
02273 return r;
02274 }
02275
02276
02277
02278
02279 KMimeMagicResult* KMimeMagic::findFileType(const QString & fn)
02280 {
02281 #ifdef DEBUG_MIMEMAGIC
02282 kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl;
02283 #endif
02284 conf->resultBuf = QString::null;
02285
02286 if ( !magicResult )
02287 magicResult = new KMimeMagicResult();
02288 magicResult->setInvalid();
02289 conf->accuracy = 100;
02290
02291 if ( !conf->utimeConf )
02292 conf->utimeConf = new KMimeMagicUtimeConf();
02293
02294
02295 process(conf, fn );
02296
02297
02298
02299 magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02300 magicResult->setAccuracy(conf->accuracy);
02301 refineResult(magicResult, fn);
02302 return magicResult;
02303 }