00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "kmimemagic.h"
00020 #include <kdebug.h>
00021 #include <kapplication.h>
00022 #include <qfile.h>
00023 #include <ksimpleconfig.h>
00024 #include <kstandarddirs.h>
00025 #include <kstaticdeleter.h>
00026 #include <klargefile.h>
00027 #include <assert.h>
00028
00029 static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb);
00030 static void process(struct config_rec* conf, const QString &);
00031 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes);
00032 static int tagmagic(unsigned char *buf, int nbytes);
00033 static int textmagic(struct config_rec* conf, unsigned char *, int);
00034
00035 static void tryit(struct config_rec* conf, unsigned char *buf, int nb);
00036 static int match(struct config_rec* conf, unsigned char *, int);
00037
00038 KMimeMagic* KMimeMagic::s_pSelf;
00039 static KStaticDeleter<KMimeMagic> kmimemagicsd;
00040
00041 KMimeMagic* KMimeMagic::self()
00042 {
00043 if( !s_pSelf )
00044 initStatic();
00045 return s_pSelf;
00046 }
00047
00048 void KMimeMagic::initStatic()
00049 {
00050 s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() );
00051 s_pSelf->setFollowLinks( true );
00052 }
00053
00054 #include <stdio.h>
00055 #include <unistd.h>
00056 #include <stdlib.h>
00057 #include <sys/wait.h>
00058 #include <sys/types.h>
00059 #include <sys/stat.h>
00060 #include <fcntl.h>
00061 #include <errno.h>
00062 #include <ctype.h>
00063 #include <time.h>
00064 #include <utime.h>
00065 #include <stdarg.h>
00066 #include <qregexp.h>
00067 #include <qstring.h>
00068
00069
00070
00071
00072
00073
00074
00075
00076 #if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00077 #define DEBUG_LINENUMBERS
00078 #endif
00079
00080
00081
00082
00083 #define MIME_BINARY_UNKNOWN "application/octet-stream"
00084 #define MIME_BINARY_UNREADABLE "application/x-unreadable"
00085 #define MIME_BINARY_ZEROSIZE "application/x-zerosize"
00086 #define MIME_TEXT_UNKNOWN "text/plain"
00087 #define MIME_TEXT_PLAIN "text/plain"
00088 #define MIME_INODE_DIR "inode/directory"
00089 #define MIME_INODE_CDEV "inode/chardevice"
00090 #define MIME_INODE_BDEV "inode/blockdevice"
00091 #define MIME_INODE_FIFO "inode/fifo"
00092 #define MIME_INODE_LINK "inode/link"
00093 #define MIME_INODE_SOCK "inode/socket"
00094
00095 #define MIME_APPL_TROFF "application/x-troff"
00096 #define MIME_APPL_TAR "application/x-tar"
00097 #define MIME_TEXT_FORTRAN "text/x-fortran"
00098
00099 #define MAXMIMESTRING 256
00100
00101 #define HOWMANY 4000
00102 #define MAXDESC 50
00103 #define MAXstring 64
00104
00105 typedef union VALUETYPE {
00106 unsigned char b;
00107 unsigned short h;
00108 unsigned long l;
00109 char s[MAXstring];
00110 unsigned char hs[2];
00111 unsigned char hl[4];
00112 } VALUETYPE;
00113
00114 struct magic {
00115 struct magic *next;
00116 #ifdef DEBUG_LINENUMBERS
00117 int lineno;
00118 #endif
00119
00120 short flag;
00121 #define INDIR 1
00122 #define UNSIGNED 2
00123 short cont_level;
00124 struct {
00125 char type;
00126 long offset;
00127 } in;
00128 long offset;
00129 unsigned char reln;
00130 char type;
00131 char vallen;
00132 #define BYTE 1
00133 #define SHORT 2
00134 #define LONG 4
00135 #define STRING 5
00136 #define DATE 6
00137 #define BESHORT 7
00138 #define BELONG 8
00139 #define BEDATE 9
00140 #define LESHORT 10
00141 #define LELONG 11
00142 #define LEDATE 12
00143 VALUETYPE value;
00144 unsigned long mask;
00145 char nospflag;
00146
00147
00148 char desc[MAXDESC];
00149 };
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165 #define RECORDSIZE 512
00166 #define NAMSIZ 100
00167 #define TUNMLEN 32
00168 #define TGNMLEN 32
00169
00170 union record {
00171 char charptr[RECORDSIZE];
00172 struct header {
00173 char name[NAMSIZ];
00174 char mode[8];
00175 char uid[8];
00176 char gid[8];
00177 char size[12];
00178 char mtime[12];
00179 char chksum[8];
00180 char linkflag;
00181 char linkname[NAMSIZ];
00182 char magic[8];
00183 char uname[TUNMLEN];
00184 char gname[TGNMLEN];
00185 char devmajor[8];
00186 char devminor[8];
00187 } header;
00188 };
00189
00190
00191 #define TMAGIC "ustar "
00192
00193
00194
00195
00196 static int is_tar(unsigned char *, int);
00197 static unsigned long signextend(struct magic *, unsigned long);
00198 static int getvalue(struct magic *, char **);
00199 static int hextoint(int);
00200 static char *getstr(char *, char *, int, int *);
00201 static int mget(union VALUETYPE *, unsigned char *, struct magic *, int);
00202 static int mcheck(union VALUETYPE *, struct magic *);
00203 static int mconvert(union VALUETYPE *, struct magic *);
00204 static long from_oct(int, char *);
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220 #define L_HTML 0x001
00221 #define L_C 0x002
00222 #define L_MAKE 0x004
00223 #define L_PLI 0x008
00224 #define L_MACH 0x010
00225 #define L_PAS 0x020
00226 #define L_JAVA 0x040
00227 #define L_CPP 0x080
00228 #define L_MAIL 0x100
00229 #define L_NEWS 0x200
00230 #define L_DIFF 0x400
00231 #define L_OBJC 0x800
00232
00233 #define P_HTML 0
00234 #define P_C 1
00235 #define P_MAKE 2
00236 #define P_PLI 3
00237 #define P_MACH 4
00238 #define P_PAS 5
00239 #define P_JAVA 6
00240 #define P_CPP 7
00241 #define P_MAIL 8
00242 #define P_NEWS 9
00243 #define P_DIFF 10
00244 #define P_OBJC 11
00245
00246 typedef struct asc_type {
00247 const char *type;
00248 int kwords;
00249 double weight;
00250 } asc_type;
00251
00252 static const asc_type types[] = {
00253 { "text/html", 19, 2 },
00254 { "text/x-c", 13, 1 },
00255 { "text/x-makefile", 4, 1.9 },
00256 { "text/x-pli", 1, 3 },
00257 { "text/x-assembler", 6, 2.1 },
00258 { "text/x-pascal", 1, 1 },
00259 { "text/x-java", 12, 1 },
00260 { "text/x-c++", 19, 1 },
00261 { "message/rfc822", 4, 1.9 },
00262 { "message/news", 3, 2 },
00263 { "text/x-diff", 4, 2 },
00264 { "text/x-objc", 10, 1 }
00265 };
00266
00267 #define NTYPES (sizeof(types)/sizeof(asc_type))
00268
00269 static struct names {
00270 const char *name;
00271 short type;
00272 } const names[] = {
00273 {
00274 "<html", L_HTML
00275 },
00276 {
00277 "<HTML", L_HTML
00278 },
00279 {
00280 "<head", L_HTML
00281 },
00282 {
00283 "<HEAD", L_HTML
00284 },
00285 {
00286 "<body", L_HTML
00287 },
00288 {
00289 "<BODY", L_HTML
00290 },
00291 {
00292 "<title", L_HTML
00293 },
00294 {
00295 "<TITLE", L_HTML
00296 },
00297 {
00298 "<h1", L_HTML
00299 },
00300 {
00301 "<H1", L_HTML
00302 },
00303 {
00304 "<a", L_HTML
00305 },
00306 {
00307 "<A", L_HTML
00308 },
00309 {
00310 "<img", L_HTML
00311 },
00312 {
00313 "<IMG", L_HTML
00314 },
00315 {
00316 "<!--", L_HTML
00317 },
00318 {
00319 "<!doctype", L_HTML
00320 },
00321 {
00322 "<!DOCTYPE", L_HTML
00323 },
00324 {
00325 "<div", L_HTML
00326 },
00327 {
00328 "<DIV", L_HTML
00329 },
00330 {
00331 "<frame", L_HTML
00332 },
00333 {
00334 "<FRAME", L_HTML
00335 },
00336 {
00337 "<frameset", L_HTML
00338 },
00339 {
00340 "<FRAMESET", L_HTML
00341 },
00342 {
00343 "<script", L_HTML
00344 },
00345 {
00346 "<SCRIPT", L_HTML
00347 },
00348 {
00349 "/*", L_C|L_CPP|L_JAVA|L_OBJC
00350 },
00351 {
00352 "//", L_C|L_CPP|L_JAVA|L_OBJC
00353 },
00354 {
00355 "#include", L_C|L_CPP
00356 },
00357 {
00358 "#ifdef", L_C|L_CPP
00359 },
00360 {
00361 "#ifndef", L_C|L_CPP
00362 },
00363 {
00364 "bool", L_C|L_CPP
00365 },
00366 {
00367 "char", L_C|L_CPP|L_JAVA|L_OBJC
00368 },
00369 {
00370 "int", L_C|L_CPP|L_JAVA|L_OBJC
00371 },
00372 {
00373 "float", L_C|L_CPP|L_JAVA|L_OBJC
00374 },
00375 {
00376 "void", L_C|L_CPP|L_JAVA|L_OBJC
00377 },
00378 {
00379 "extern", L_C|L_CPP
00380 },
00381 {
00382 "struct", L_C|L_CPP
00383 },
00384 {
00385 "union", L_C|L_CPP
00386 },
00387 {
00388 "implements", L_JAVA
00389 },
00390 {
00391 "super", L_JAVA
00392 },
00393 {
00394 "import", L_JAVA
00395 },
00396 {
00397 "class", L_CPP|L_JAVA
00398 },
00399 {
00400 "public", L_CPP|L_JAVA
00401 },
00402 {
00403 "private", L_CPP|L_JAVA
00404 },
00405 {
00406 "explicit", L_CPP
00407 },
00408 {
00409 "virtual", L_CPP
00410 },
00411 {
00412 "namespace", L_CPP
00413 },
00414 {
00415 "#import", L_OBJC
00416 },
00417 {
00418 "@interface", L_OBJC
00419 },
00420 {
00421 "@implementation", L_OBJC
00422 },
00423 {
00424 "@protocol", L_OBJC
00425 },
00426 {
00427 "CFLAGS", L_MAKE
00428 },
00429 {
00430 "LDFLAGS", L_MAKE
00431 },
00432 {
00433 "all:", L_MAKE
00434 },
00435 {
00436 ".PHONY:", L_MAKE
00437 },
00438 {
00439 "srcdir", L_MAKE
00440 },
00441 {
00442 "exec_prefix", L_MAKE
00443 },
00444
00445
00446
00447
00448 {
00449 ".ascii", L_MACH
00450 },
00451 {
00452 ".asciiz", L_MACH
00453 },
00454 {
00455 ".byte", L_MACH
00456 },
00457 {
00458 ".even", L_MACH
00459 },
00460 {
00461 ".globl", L_MACH
00462 },
00463 {
00464 "clr", L_MACH
00465 },
00466 {
00467 "(input", L_PAS
00468 },
00469 {
00470 "dcl", L_PLI
00471 },
00472 {
00473 "Received:", L_MAIL
00474 },
00475
00476
00477
00478 {
00479 "Return-Path:", L_MAIL
00480 },
00481 {
00482 "Cc:", L_MAIL
00483 },
00484 {
00485 "Newsgroups:", L_NEWS
00486 },
00487 {
00488 "Path:", L_NEWS
00489 },
00490 {
00491 "Organization:", L_NEWS
00492 },
00493 {
00494 "---", L_DIFF
00495 },
00496 {
00497 "+++", L_DIFF
00498 },
00499 {
00500 "***", L_DIFF
00501 },
00502 {
00503 "@@", L_DIFF
00504 },
00505 {
00506 NULL, 0
00507 }
00508 };
00509
00520 class KMimeMagicUtimeConf
00521 {
00522 public:
00523 KMimeMagicUtimeConf()
00524 {
00525 tmpDirs << QString::fromLatin1("/tmp");
00526
00527
00528
00529 QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" );
00530 if ( !confDirs.isEmpty() )
00531 {
00532 QString globalConf = confDirs.last() + "kmimemagicrc";
00533 if ( QFile::exists( globalConf ) )
00534 {
00535 KSimpleConfig cfg( globalConf );
00536 cfg.setGroup( "Settings" );
00537 tmpDirs = cfg.readListEntry( "atimeDirs" );
00538 }
00539 if ( confDirs.count() > 1 )
00540 {
00541 QString localConf = confDirs.first() + "kmimemagicrc";
00542 if ( QFile::exists( localConf ) )
00543 {
00544 KSimpleConfig cfg( localConf );
00545 cfg.setGroup( "Settings" );
00546 tmpDirs += cfg.readListEntry( "atimeDirs" );
00547 }
00548 }
00549 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00550 {
00551 QString dir = *it;
00552 if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' )
00553 (*it) += '/';
00554 }
00555 }
00556 #if 0
00557
00558 for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00559 kdDebug(7018) << " atimeDir: " << *it << endl;
00560 #endif
00561 }
00562
00563 bool restoreAccessTime( const QString & file ) const
00564 {
00565 QString dir = file.left( file.findRev( '/' ) );
00566 bool res = tmpDirs.contains( dir );
00567
00568 return res;
00569 }
00570 QStringList tmpDirs;
00571 };
00572
00573
00574 struct config_rec {
00575 bool followLinks;
00576 QString resultBuf;
00577 int accuracy;
00578
00579 struct magic *magic,
00580 *last;
00581 KMimeMagicUtimeConf * utimeConf;
00582 };
00583
00584 #ifdef MIME_MAGIC_DEBUG_TABLE
00585 static void
00586 test_table()
00587 {
00588 struct magic *m;
00589 struct magic *prevm = NULL;
00590
00591 kdDebug(7018) << "test_table : started" << endl;
00592 for (m = conf->magic; m; m = m->next) {
00593 if (isprint((((unsigned long) m) >> 24) & 255) &&
00594 isprint((((unsigned long) m) >> 16) & 255) &&
00595 isprint((((unsigned long) m) >> 8) & 255) &&
00596 isprint(((unsigned long) m) & 255)) {
00597
00598
00599 (((unsigned long) m) >> 24) & 255,
00600 (((unsigned long) m) >> 16) & 255,
00601 (((unsigned long) m) >> 8) & 255,
00602 ((unsigned long) m) & 255,
00603 prevm ? prevm->lineno : -1);
00604 break;
00605 }
00606 prevm = m;
00607 }
00608 }
00609 #endif
00610
00611 #define EATAB {while (isascii((unsigned char) *l) && \
00612 isspace((unsigned char) *l)) ++l;}
00613
00614 int KMimeMagic::parse_line(char *line, int *rule, int lineno)
00615 {
00616 int ws_offset;
00617
00618
00619 if (line[0]) {
00620 line[strlen(line) - 1] = '\0';
00621 }
00622
00623 ws_offset = 0;
00624 while (line[ws_offset] && isspace(line[ws_offset])) {
00625 ws_offset++;
00626 }
00627
00628
00629 if (line[ws_offset] == 0) {
00630 return 0;
00631 }
00632
00633 if (line[ws_offset] == '#')
00634 return 0;
00635
00636
00637 (*rule)++;
00638
00639
00640 return (parse(line + ws_offset, lineno) != 0);
00641 }
00642
00643
00644
00645
00646 int KMimeMagic::apprentice( const QString& magicfile )
00647 {
00648 FILE *f;
00649 char line[BUFSIZ + 1];
00650 int errs = 0;
00651 int lineno;
00652 int rule = 0;
00653 QCString fname;
00654
00655 if (magicfile.isEmpty())
00656 return -1;
00657 fname = QFile::encodeName(magicfile);
00658 f = fopen(fname, "r");
00659 if (f == NULL) {
00660 kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl;
00661 return -1;
00662 }
00663
00664
00665 for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00666 if (parse_line(line, &rule, lineno))
00667 errs++;
00668
00669 fclose(f);
00670
00671 #ifdef DEBUG_APPRENTICE
00672 kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00673 kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00674 #endif
00675
00676 #ifdef MIME_MAGIC_DEBUG_TABLE
00677 test_table();
00678 #endif
00679
00680 return (errs ? -1 : 0);
00681 }
00682
00683 int KMimeMagic::buff_apprentice(char *buff)
00684 {
00685 char line[BUFSIZ + 2];
00686 int errs = 0;
00687 int lineno = 1;
00688 char *start = buff;
00689 char *end;
00690 int count = 0;
00691 int rule = 0;
00692 int len = strlen(buff) + 1;
00693
00694
00695 do {
00696 count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00697 strncpy(line, start, count);
00698 line[count] = '\0';
00699 if ((end = strchr(line, '\n'))) {
00700 *(++end) = '\0';
00701 count = strlen(line);
00702 } else
00703 strcat(line, "\n");
00704 start += count;
00705 len -= count;
00706 if (parse_line(line, &rule, lineno))
00707 errs++;
00708 lineno++;
00709 } while (len > 0);
00710
00711 #ifdef DEBUG_APPRENTICE
00712 kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00713 kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00714 #endif
00715
00716 #ifdef MIME_MAGIC_DEBUG_TABLE
00717 test_table();
00718 #endif
00719
00720 return (errs ? -1 : 0);
00721 }
00722
00723
00724
00725
00726 static unsigned long
00727 signextend(struct magic *m, unsigned long v)
00728 {
00729 if (!(m->flag & UNSIGNED))
00730 switch (m->type) {
00731
00732
00733
00734
00735
00736 case BYTE:
00737 v = (char) v;
00738 break;
00739 case SHORT:
00740 case BESHORT:
00741 case LESHORT:
00742 v = (short) v;
00743 break;
00744 case DATE:
00745 case BEDATE:
00746 case LEDATE:
00747 case LONG:
00748 case BELONG:
00749 case LELONG:
00750 v = (long) v;
00751 break;
00752 case STRING:
00753 break;
00754 default:
00755 kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl;
00756 return 998;
00757 }
00758 return v;
00759 }
00760
00761
00762
00763
00764 int KMimeMagic::parse(char *l, int
00765 #ifdef DEBUG_LINENUMBERS
00766 lineno
00767 #endif
00768 )
00769 {
00770 int i = 0;
00771 struct magic *m;
00772 char *t,
00773 *s;
00774
00775 if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) {
00776 kdError(7018) << "parse: Out of memory." << endl;
00777 return -1;
00778 }
00779
00780 m->next = NULL;
00781 if (!conf->magic || !conf->last) {
00782 conf->magic = conf->last = m;
00783 } else {
00784 conf->last->next = m;
00785 conf->last = m;
00786 }
00787
00788
00789 m->flag = 0;
00790 m->cont_level = 0;
00791 #ifdef DEBUG_LINENUMBERS
00792 m->lineno = lineno;
00793 #endif
00794
00795 while (*l == '>') {
00796 ++l;
00797 m->cont_level++;
00798 }
00799
00800 if (m->cont_level != 0 && *l == '(') {
00801 ++l;
00802 m->flag |= INDIR;
00803 }
00804
00805 m->offset = (int) strtol(l, &t, 0);
00806 if (l == t) {
00807 kdError(7018) << "parse: offset " << l << " invalid" << endl;
00808 }
00809 l = t;
00810
00811 if (m->flag & INDIR) {
00812 m->in.type = LONG;
00813 m->in.offset = 0;
00814
00815
00816
00817 if (*l == '.') {
00818 switch (*++l) {
00819 case 'l':
00820 m->in.type = LONG;
00821 break;
00822 case 's':
00823 m->in.type = SHORT;
00824 break;
00825 case 'b':
00826 m->in.type = BYTE;
00827 break;
00828 default:
00829 kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl;
00830 break;
00831 }
00832 l++;
00833 }
00834 s = l;
00835 if (*l == '+' || *l == '-')
00836 l++;
00837 if (isdigit((unsigned char) *l)) {
00838 m->in.offset = strtol(l, &t, 0);
00839 if (*s == '-')
00840 m->in.offset = -m->in.offset;
00841 } else
00842 t = l;
00843 if (*t++ != ')') {
00844 kdError(7018) << "parse: missing ')' in indirect offset" << endl;
00845 }
00846 l = t;
00847 }
00848 while (isascii((unsigned char) *l) && isdigit((unsigned char) *l))
00849 ++l;
00850 EATAB;
00851
00852 #define NBYTE 4
00853 #define NSHORT 5
00854 #define NLONG 4
00855 #define NSTRING 6
00856 #define NDATE 4
00857 #define NBESHORT 7
00858 #define NBELONG 6
00859 #define NBEDATE 6
00860 #define NLESHORT 7
00861 #define NLELONG 6
00862 #define NLEDATE 6
00863
00864 if (*l == 'u') {
00865 ++l;
00866 m->flag |= UNSIGNED;
00867 }
00868
00869 if (strncmp(l, "byte", NBYTE) == 0) {
00870 m->type = BYTE;
00871 l += NBYTE;
00872 } else if (strncmp(l, "short", NSHORT) == 0) {
00873 m->type = SHORT;
00874 l += NSHORT;
00875 } else if (strncmp(l, "long", NLONG) == 0) {
00876 m->type = LONG;
00877 l += NLONG;
00878 } else if (strncmp(l, "string", NSTRING) == 0) {
00879 m->type = STRING;
00880 l += NSTRING;
00881 } else if (strncmp(l, "date", NDATE) == 0) {
00882 m->type = DATE;
00883 l += NDATE;
00884 } else if (strncmp(l, "beshort", NBESHORT) == 0) {
00885 m->type = BESHORT;
00886 l += NBESHORT;
00887 } else if (strncmp(l, "belong", NBELONG) == 0) {
00888 m->type = BELONG;
00889 l += NBELONG;
00890 } else if (strncmp(l, "bedate", NBEDATE) == 0) {
00891 m->type = BEDATE;
00892 l += NBEDATE;
00893 } else if (strncmp(l, "leshort", NLESHORT) == 0) {
00894 m->type = LESHORT;
00895 l += NLESHORT;
00896 } else if (strncmp(l, "lelong", NLELONG) == 0) {
00897 m->type = LELONG;
00898 l += NLELONG;
00899 } else if (strncmp(l, "ledate", NLEDATE) == 0) {
00900 m->type = LEDATE;
00901 l += NLEDATE;
00902 } else {
00903 kdError(7018) << "parse: type " << l << " invalid" << endl;
00904 return -1;
00905 }
00906
00907 if (*l == '&') {
00908 ++l;
00909 m->mask = signextend(m, strtol(l, &l, 0));
00910 } else
00911 m->mask = (unsigned long) ~0L;
00912 EATAB;
00913
00914 switch (*l) {
00915 case '>':
00916 case '<':
00917
00918 case '&':
00919 case '^':
00920 case '=':
00921 m->reln = *l;
00922 ++l;
00923 break;
00924 case '!':
00925 if (m->type != STRING) {
00926 m->reln = *l;
00927 ++l;
00928 break;
00929 }
00930
00931 default:
00932 if (*l == 'x' && isascii((unsigned char) l[1]) &&
00933 isspace((unsigned char) l[1])) {
00934 m->reln = *l;
00935 ++l;
00936 goto GetDesc;
00937 }
00938 m->reln = '=';
00939 break;
00940 }
00941 EATAB;
00942
00943 if (getvalue(m, &l))
00944 return -1;
00945
00946
00947
00948 GetDesc:
00949 EATAB;
00950 if (l[0] == '\b') {
00951 ++l;
00952 m->nospflag = 1;
00953 } else if ((l[0] == '\\') && (l[1] == 'b')) {
00954 ++l;
00955 ++l;
00956 m->nospflag = 1;
00957 } else
00958 m->nospflag = 0;
00959
00960 while (*l != '\0' && *l != '#' && i < MAXDESC-1)
00961 m->desc[i++] = *l++;
00962 m->desc[i] = '\0';
00963
00964 while (--i>0 && isspace( m->desc[i] ))
00965 m->desc[i] = '\0';
00966
00967
00968
00969
00970 #ifdef DEBUG_APPRENTICE
00971 kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl;
00972 #endif
00973 return 0;
00974 }
00975
00976
00977
00978
00979
00980
00981 static int
00982 getvalue(struct magic *m, char **p)
00983 {
00984 int slen;
00985
00986 if (m->type == STRING) {
00987 *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
00988 m->vallen = slen;
00989 } else if (m->reln != 'x')
00990 m->value.l = signextend(m, strtol(*p, p, 0));
00991 return 0;
00992 }
00993
00994
00995
00996
00997
00998
00999 static char *
01000 getstr(register char *s, register char *p, int plen, int *slen)
01001 {
01002 char *origs = s,
01003 *origp = p;
01004 char *pmax = p + plen - 1;
01005 register int c;
01006 register int val;
01007
01008 while ((c = *s++) != '\0') {
01009 if (isspace((unsigned char) c))
01010 break;
01011 if (p >= pmax) {
01012 kdError(7018) << "String too long: " << origs << endl;
01013 break;
01014 }
01015 if (c == '\\') {
01016 switch (c = *s++) {
01017
01018 case '\0':
01019 goto out;
01020
01021 default:
01022 *p++ = (char) c;
01023 break;
01024
01025 case 'n':
01026 *p++ = '\n';
01027 break;
01028
01029 case 'r':
01030 *p++ = '\r';
01031 break;
01032
01033 case 'b':
01034 *p++ = '\b';
01035 break;
01036
01037 case 't':
01038 *p++ = '\t';
01039 break;
01040
01041 case 'f':
01042 *p++ = '\f';
01043 break;
01044
01045 case 'v':
01046 *p++ = '\v';
01047 break;
01048
01049
01050 case '0':
01051 case '1':
01052 case '2':
01053 case '3':
01054 case '4':
01055 case '5':
01056 case '6':
01057 case '7':
01058 val = c - '0';
01059 c = *s++;
01060 if (c >= '0' && c <= '7') {
01061 val = (val << 3) | (c - '0');
01062 c = *s++;
01063 if (c >= '0' && c <= '7')
01064 val = (val << 3) | (c - '0');
01065 else
01066 --s;
01067 } else
01068 --s;
01069 *p++ = (char) val;
01070 break;
01071
01072
01073 case 'x':
01074 val = 'x';
01075 c = hextoint(*s++);
01076 if (c >= 0) {
01077 val = c;
01078 c = hextoint(*s++);
01079 if (c >= 0) {
01080 val = (val << 4) + c;
01081 c = hextoint(*s++);
01082 if (c >= 0) {
01083 val = (val << 4) + c;
01084 } else
01085 --s;
01086 } else
01087 --s;
01088 } else
01089 --s;
01090 *p++ = (char) val;
01091 break;
01092 }
01093 } else
01094 *p++ = (char) c;
01095 }
01096 out:
01097 *p = '\0';
01098 *slen = p - origp;
01099
01100
01101 return s;
01102 }
01103
01104
01105
01106 static int
01107 hextoint(int c)
01108 {
01109 if (!isascii((unsigned char) c))
01110 return -1;
01111 if (isdigit((unsigned char) c))
01112 return c - '0';
01113 if ((c >= 'a') && (c <= 'f'))
01114 return c + 10 - 'a';
01115 if ((c >= 'A') && (c <= 'F'))
01116 return c + 10 - 'A';
01117 return -1;
01118 }
01119
01120
01121
01122
01123 static int
01124 mconvert(union VALUETYPE *p, struct magic *m)
01125 {
01126 switch (m->type) {
01127 case BYTE:
01128 return 1;
01129 case STRING:
01130
01131 p->s[sizeof(p->s) - 1] = '\0';
01132 return 1;
01133 #ifndef WORDS_BIGENDIAN
01134 case SHORT:
01135 #endif
01136 case BESHORT:
01137 p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
01138 return 1;
01139 #ifndef WORDS_BIGENDIAN
01140 case LONG:
01141 case DATE:
01142 #endif
01143 case BELONG:
01144 case BEDATE:
01145 p->l = (long)
01146 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01147 return 1;
01148 #ifdef WORDS_BIGENDIAN
01149 case SHORT:
01150 #endif
01151 case LESHORT:
01152 p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
01153 return 1;
01154 #ifdef WORDS_BIGENDIAN
01155 case LONG:
01156 case DATE:
01157 #endif
01158 case LELONG:
01159 case LEDATE:
01160 p->l = (long)
01161 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01162 return 1;
01163 default:
01164 kdError(7018) << "mconvert: invalid type " << m->type << endl;
01165 return 0;
01166 }
01167 }
01168
01169
01170 static int
01171 mget(union VALUETYPE *p, unsigned char *s, struct magic *m,
01172 int nbytes)
01173 {
01174 long offset = m->offset;
01175 switch ( m->type )
01176 {
01177 case BYTE:
01178 if ( offset + 1 > nbytes-1 )
01179 return 0;
01180 break;
01181 case SHORT:
01182 case BESHORT:
01183 case LESHORT:
01184 if ( offset + 2 > nbytes-1 )
01185 return 0;
01186 break;
01187 case LONG:
01188 case BELONG:
01189 case LELONG:
01190 case DATE:
01191 case BEDATE:
01192 case LEDATE:
01193 if ( offset + 4 > nbytes-1 )
01194 return 0;
01195 break;
01196 case STRING:
01197 break;
01198 }
01199
01200
01201
01202
01203 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01204 {
01205 int have = nbytes - offset;
01206 memset(p, 0, sizeof(union VALUETYPE));
01207 if (have > 0)
01208 memcpy(p, s + offset, have);
01209 } else
01210 memcpy(p, s + offset, sizeof(union VALUETYPE));
01211
01212 if (!mconvert(p, m))
01213 return 0;
01214
01215 if (m->flag & INDIR) {
01216
01217 switch (m->in.type) {
01218 case BYTE:
01219 offset = p->b + m->in.offset;
01220 break;
01221 case SHORT:
01222 offset = p->h + m->in.offset;
01223 break;
01224 case LONG:
01225 offset = p->l + m->in.offset;
01226 break;
01227 }
01228
01229 if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01230 return 0;
01231
01232 memcpy(p, s + offset, sizeof(union VALUETYPE));
01233
01234 if (!mconvert(p, m))
01235 return 0;
01236 }
01237 return 1;
01238 }
01239
01240 static int
01241 mcheck(union VALUETYPE *p, struct magic *m)
01242 {
01243 register unsigned long l = m->value.l;
01244 register unsigned long v;
01245 int matched;
01246
01247 if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
01248 kdError(7018) << "BOINK" << endl;
01249 return 1;
01250 }
01251 switch (m->type) {
01252 case BYTE:
01253 v = p->b;
01254 break;
01255
01256 case SHORT:
01257 case BESHORT:
01258 case LESHORT:
01259 v = p->h;
01260 break;
01261
01262 case LONG:
01263 case BELONG:
01264 case LELONG:
01265 case DATE:
01266 case BEDATE:
01267 case LEDATE:
01268 v = p->l;
01269 break;
01270
01271 case STRING:
01272 l = 0;
01273
01274
01275
01276
01277
01278 v = 0;
01279 {
01280 register unsigned char *a = (unsigned char *) m->value.s;
01281 register unsigned char *b = (unsigned char *) p->s;
01282 register int len = m->vallen;
01283 Q_ASSERT(len);
01284
01285 while (--len >= 0)
01286 if ((v = *b++ - *a++) != 0)
01287 break;
01288 }
01289 break;
01290 default:
01291 kdError(7018) << "mcheck: invalid type " << m->type << endl;
01292 return 0;
01293 }
01294 #if 0
01295 qDebug("Before signextend %08x", v);
01296 #endif
01297 v = signextend(m, v) & m->mask;
01298 #if 0
01299 qDebug("After signextend %08x", v);
01300 #endif
01301
01302 switch (m->reln) {
01303 case 'x':
01304 matched = 1;
01305 break;
01306
01307 case '!':
01308 matched = v != l;
01309 break;
01310
01311 case '=':
01312 matched = v == l;
01313 break;
01314
01315 case '>':
01316 if (m->flag & UNSIGNED)
01317 matched = v > l;
01318 else
01319 matched = (long) v > (long) l;
01320 break;
01321
01322 case '<':
01323 if (m->flag & UNSIGNED)
01324 matched = v < l;
01325 else
01326 matched = (long) v < (long) l;
01327 break;
01328
01329 case '&':
01330 matched = (v & l) == l;
01331 break;
01332
01333 case '^':
01334 matched = (v & l) != l;
01335 break;
01336
01337 default:
01338 matched = 0;
01339 kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl;
01340 break;
01341 }
01342
01343 return matched;
01344 }
01345
01346
01347
01348
01349
01350
01351 void process(struct config_rec* conf, const QString & fn)
01352 {
01353 int fd = 0;
01354 unsigned char buf[HOWMANY + 1];
01355 KDE_struct_stat sb;
01356 int nbytes = 0;
01357 int tagbytes = 0;
01358 QCString fileName = QFile::encodeName( fn );
01359
01360
01361
01362
01363 if (fsmagic(conf, fileName, &sb) != 0) {
01364
01365 return;
01366 }
01367 if ((fd = KDE_open(fileName, O_RDONLY)) < 0) {
01368
01369
01370
01371
01372
01373
01374 conf->resultBuf = MIME_BINARY_UNREADABLE;
01375 return;
01376 }
01377
01378
01379
01380 if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) {
01381 kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl;
01382 conf->resultBuf = MIME_BINARY_UNREADABLE;
01383 (void)close(fd);
01384 return;
01385 }
01386 if ((tagbytes = tagmagic(buf, nbytes))) {
01387
01388 lseek(fd, tagbytes, SEEK_SET);
01389 nbytes = read(fd, (char*)buf, HOWMANY);
01390 if (nbytes < 0) {
01391 conf->resultBuf = MIME_BINARY_UNREADABLE;
01392 (void)close(fd);
01393 return;
01394 }
01395 }
01396 if (nbytes == 0) {
01397 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01398 } else {
01399 buf[nbytes++] = '\0';
01400 tryit(conf, buf, nbytes);
01401 }
01402
01403 if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01404 {
01405
01406
01407
01408
01409
01410 struct utimbuf utbuf;
01411 utbuf.actime = sb.st_atime;
01412 utbuf.modtime = sb.st_mtime;
01413 (void) utime(fileName, &utbuf);
01414 }
01415 (void) close(fd);
01416 }
01417
01418
01419 static void tryit(struct config_rec* conf, unsigned char *buf, int nb)
01420 {
01421
01422 if (match(conf, buf, nb))
01423 return;
01424
01425
01426 if (ascmagic(conf, buf, nb) == 1)
01427 return;
01428
01429
01430 if (textmagic(conf, buf, nb))
01431 return;
01432
01433
01434 conf->resultBuf = MIME_BINARY_UNKNOWN;
01435 conf->accuracy = 0;
01436 }
01437
01438 static int
01439 fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb)
01440 {
01441 int ret = 0;
01442
01443
01444
01445
01446
01447 ret = KDE_lstat(fn, sb);
01448
01449 if (ret) {
01450 return 1;
01451
01452 }
01453
01454
01455
01456
01457
01458
01459 switch (sb->st_mode & S_IFMT) {
01460 case S_IFDIR:
01461 conf->resultBuf = MIME_INODE_DIR;
01462 return 1;
01463 case S_IFCHR:
01464 conf->resultBuf = MIME_INODE_CDEV;
01465 return 1;
01466 case S_IFBLK:
01467 conf->resultBuf = MIME_INODE_BDEV;
01468 return 1;
01469
01470 #ifdef S_IFIFO
01471 case S_IFIFO:
01472 conf->resultBuf = MIME_INODE_FIFO;
01473 return 1;
01474 #endif
01475 #ifdef S_IFLNK
01476 case S_IFLNK:
01477 {
01478 char buf[BUFSIZ + BUFSIZ + 4];
01479 register int nch;
01480 KDE_struct_stat tstatbuf;
01481
01482 if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01483 conf->resultBuf = MIME_INODE_LINK;
01484
01485 return 1;
01486 }
01487 buf[nch] = '\0';
01488
01489 if (*buf == '/') {
01490 if (KDE_stat(buf, &tstatbuf) < 0) {
01491 conf->resultBuf = MIME_INODE_LINK;
01492
01493 return 1;
01494 }
01495 } else {
01496 char *tmp;
01497 char buf2[BUFSIZ + BUFSIZ + 4];
01498
01499 strncpy(buf2, fn, BUFSIZ);
01500 buf2[BUFSIZ] = 0;
01501
01502 if ((tmp = strrchr(buf2, '/')) == NULL) {
01503 tmp = buf;
01504 } else {
01505
01506 *++tmp = '\0';
01507 strcat(buf2, buf);
01508 tmp = buf2;
01509 }
01510 if (KDE_stat(tmp, &tstatbuf) < 0) {
01511 conf->resultBuf = MIME_INODE_LINK;
01512
01513 return 1;
01514 } else
01515 strcpy(buf, tmp);
01516 }
01517 if (conf->followLinks)
01518 process( conf, QFile::decodeName( buf ) );
01519 else
01520 conf->resultBuf = MIME_INODE_LINK;
01521 return 1;
01522 }
01523 return 1;
01524 #endif
01525 #ifdef S_IFSOCK
01526 #ifndef __COHERENT__
01527 case S_IFSOCK:
01528 conf->resultBuf = MIME_INODE_SOCK;
01529 return 1;
01530 #endif
01531 #endif
01532 case S_IFREG:
01533 break;
01534 default:
01535 kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl;
01536
01537 }
01538
01539
01540
01541
01542 if (sb->st_size == 0) {
01543 conf->resultBuf = MIME_BINARY_ZEROSIZE;
01544 return 1;
01545 }
01546 return 0;
01547 }
01548
01549
01550
01551
01552
01553
01554
01555
01556
01557
01558
01559
01560
01561
01562
01563
01564
01565
01566
01567
01568
01569
01570
01571
01572
01573
01574
01575 static int
01576 match(struct config_rec* conf, unsigned char *s, int nbytes)
01577 {
01578 int cont_level = 0;
01579 union VALUETYPE p;
01580 struct magic *m;
01581
01582 #ifdef DEBUG_MIMEMAGIC
01583 kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
01584 for (m = conf->magic; m; m = m->next) {
01585 if (isprint((((unsigned long) m) >> 24) & 255) &&
01586 isprint((((unsigned long) m) >> 16) & 255) &&
01587 isprint((((unsigned long) m) >> 8) & 255) &&
01588 isprint(((unsigned long) m) & 255)) {
01589 kdDebug(7018) << "match: POINTER CLOBBERED! " << endl;
01590 break;
01591 }
01592 }
01593 #endif
01594
01595 for (m = conf->magic; m; m = m->next) {
01596 #ifdef DEBUG_MIMEMAGIC
01597 kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl;
01598 #endif
01599 memset(&p, 0, sizeof(union VALUETYPE));
01600
01601
01602 if (!mget(&p, s, m, nbytes) ||
01603 !mcheck(&p, m)) {
01604 struct magic *m_cont;
01605
01606
01607
01608
01609 if (!m->next || (m->next->cont_level == 0)) {
01610 continue;
01611 }
01612 m_cont = m->next;
01613 while (m_cont && (m_cont->cont_level != 0)) {
01614 #ifdef DEBUG_MIMEMAGIC
01615 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl;
01616 #endif
01617
01618
01619
01620
01621 m = m_cont;
01622 m_cont = m_cont->next;
01623 }
01624 continue;
01625 }
01626
01627
01628 #ifdef DEBUG_MIMEMAGIC
01629 kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01630 #endif
01631
01632
01633 conf->resultBuf = m->desc;
01634
01635 cont_level++;
01636
01637
01638
01639
01640 m = m->next;
01641 while (m && (m->cont_level != 0)) {
01642 #ifdef DEBUG_MIMEMAGIC
01643 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01644 #endif
01645 if (cont_level >= m->cont_level) {
01646 if (cont_level > m->cont_level) {
01647
01648
01649
01650
01651 cont_level = m->cont_level;
01652 }
01653 if (mget(&p, s, m, nbytes) &&
01654 mcheck(&p, m)) {
01655
01656
01657
01658
01659
01660
01661 #ifdef DEBUG_MIMEMAGIC
01662 kdDebug(7018) << "continuation matched" << endl;
01663 #endif
01664 conf->resultBuf = m->desc;
01665 cont_level++;
01666 }
01667 }
01668
01669 m = m->next;
01670 }
01671
01672
01673 if ( !conf->resultBuf.isEmpty() )
01674 {
01675 #ifdef DEBUG_MIMEMAGIC
01676 kdDebug(7018) << "match: matched" << endl;
01677 #endif
01678 return 1;
01679 }
01680 }
01681 #ifdef DEBUG_MIMEMAGIC
01682 kdDebug(7018) << "match: failed" << endl;
01683 #endif
01684 return 0;
01685 }
01686
01687
01688
01689 static int tagmagic(unsigned char *buf, int nbytes)
01690 {
01691 if(nbytes<40) return 0;
01692 if(buf[0] == 'I' && buf[1] == 'D' && buf[2] == '3') {
01693 int size = 10;
01694
01695 if(buf[3] > 4) return 0;
01696 if(buf[5] & 0x0F) return 0;
01697
01698 if(buf[5] & 0x10) size += 10;
01699
01700 size += buf[9];
01701 size += buf[8] << 7;
01702 size += buf[7] << 14;
01703 size += buf[6] << 21;
01704 return size;
01705 }
01706 return 0;
01707 }
01708
01709 struct Token {
01710 char *data;
01711 int length;
01712 };
01713
01714 struct Tokenizer
01715 {
01716 Tokenizer(char* buf, int nbytes) {
01717 data = buf;
01718 length = nbytes;
01719 pos = 0;
01720 }
01721 bool isNewLine() {
01722 return newline;
01723 }
01724 Token* nextToken() {
01725 if (pos == 0)
01726 newline = true;
01727 else
01728 newline = false;
01729 token.data = data+pos;
01730 token.length = 0;
01731 while(pos<length) {
01732 switch (data[pos]) {
01733 case '\n':
01734 newline = true;
01735 case '\0':
01736 case '\t':
01737 case ' ':
01738 case '\r':
01739 case '\f':
01740 case ',':
01741 case ';':
01742 case '>':
01743 if (token.length == 0) token.data++;
01744 else
01745 return &token;
01746 break;
01747 default:
01748 token.length++;
01749 }
01750 pos++;
01751 }
01752 return &token;
01753 }
01754
01755 private:
01756 Token token;
01757 char* data;
01758 int length;
01759 int pos;
01760 bool newline;
01761 };
01762
01763
01764
01765
01766 static inline bool STREQ(const Token *token, const char *b) {
01767 const char *a = token->data;
01768 int len = token->length;
01769 if (a == b) return true;
01770 while(*a && *b && len > 0) {
01771 if (*a != *b) return false;
01772 a++; b++; len--;
01773 }
01774 return (len == 0 && *b == 0);
01775 }
01776
01777 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes)
01778 {
01779 int i;
01780 double pct, maxpct, pctsum;
01781 double pcts[NTYPES];
01782 int mostaccurate, tokencount;
01783 int typeset, jonly, conly, jconly, objconly, cpponly;
01784 int has_escapes = 0;
01785
01786
01787
01788
01789 conf->accuracy = 70;
01790
01791
01792
01793
01794
01795
01796 if (*buf == '.') {
01797 unsigned char *tp = buf + 1;
01798
01799 while (isascii(*tp) && isspace(*tp))
01800 ++tp;
01801 if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') &&
01802 isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) {
01803 conf->resultBuf = MIME_APPL_TROFF;
01804 return 1;
01805 }
01806 }
01807 if ((*buf == 'c' || *buf == 'C') &&
01808 isascii(*(buf + 1)) && isspace(*(buf + 1))) {
01809
01810 conf->resultBuf = MIME_TEXT_FORTRAN;
01811 return 1;
01812 }
01813 assert(nbytes-1 < HOWMANY + 1);
01814
01815 has_escapes = (memchr(buf, '\033', nbytes) != NULL);
01816 Tokenizer tokenizer((char*)buf, nbytes);
01817 const Token* token;
01818 bool linecomment = false, blockcomment = false;
01819 const struct names *p;
01820 int typecount[NTYPES];
01821
01822
01823
01824
01825 memset(&typecount, 0, sizeof(typecount));
01826 typeset = 0;
01827 jonly = 0;
01828 conly = 0;
01829 jconly = 0;
01830 objconly = 0;
01831 cpponly = 0;
01832 tokencount = 0;
01833 bool foundClass = false;
01834
01835
01836 while ((token = tokenizer.nextToken())->length > 0) {
01837 #ifdef DEBUG_MIMEMAGIC
01838 kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl;
01839 #endif
01840 if (linecomment && tokenizer.isNewLine())
01841 linecomment = false;
01842 if (blockcomment && STREQ(token, "*/")) {
01843 blockcomment = false;
01844 continue;
01845 }
01846 for (p = names; p->name ; p++) {
01847 if (STREQ(token, p->name)) {
01848 #ifdef DEBUG_MIMEMAGIC
01849 kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl;
01850 #endif
01851 tokencount++;
01852 typeset |= p->type;
01853 if(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) {
01854 if (linecomment || blockcomment) {
01855 continue;
01856 }
01857 else {
01858 switch(p->type & (L_C|L_CPP|L_JAVA|L_OBJC))
01859 {
01860 case L_JAVA:
01861 jonly++;
01862 break;
01863 case L_OBJC:
01864 objconly++;
01865 break;
01866 case L_CPP:
01867 cpponly++;
01868 break;
01869 case (L_CPP|L_JAVA):
01870 jconly++;
01871 if ( !foundClass && STREQ(token, "class") )
01872 foundClass = true;
01873 break;
01874 case (L_C|L_CPP):
01875 conly++;
01876 break;
01877 default:
01878 if (STREQ(token, "//")) linecomment = true;
01879 if (STREQ(token, "/*")) blockcomment = true;
01880 }
01881 }
01882 }
01883 for (i = 0; i < (int)NTYPES; i++) {
01884 if ((1 << i) & p->type) typecount[i]++;
01885 }
01886 }
01887 }
01888 }
01889
01890 if (typeset & (L_C|L_CPP|L_JAVA|L_OBJC)) {
01891 conf->accuracy = 60;
01892 if (!(typeset & ~(L_C|L_CPP|L_JAVA|L_OBJC))) {
01893 #ifdef DEBUG_MIMEMAGIC
01894 kdDebug(7018) << "C/C++/Java/ObjC: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " objconly=" << objconly << endl;
01895 #endif
01896 if (jonly > 1 && foundClass) {
01897
01898 conf->resultBuf = QString(types[P_JAVA].type);
01899 return 1;
01900 }
01901 if (jconly > 1) {
01902
01903 if (typecount[P_JAVA] < typecount[P_CPP])
01904 conf->resultBuf = QString(types[P_CPP].type);
01905 else
01906 conf->resultBuf = QString(types[P_JAVA].type);
01907 return 1;
01908 }
01909 if (conly + cpponly > 1) {
01910
01911 if (cpponly > 0)
01912 conf->resultBuf = QString(types[P_CPP].type);
01913 else
01914 conf->resultBuf = QString(types[P_C].type);
01915 return 1;
01916 }
01917 if (objconly > 0) {
01918 conf->resultBuf = QString(types[P_OBJC].type);
01919 return 1;
01920 }
01921 }
01922 }
01923
01924
01925
01926
01927
01928 mostaccurate = -1;
01929 maxpct = pctsum = 0.0;
01930 for (i = 0; i < (int)NTYPES; i++) {
01931 if (typecount[i] > 1) {
01932 pct = (double)typecount[i] / (double)types[i].kwords *
01933 (double)types[i].weight;
01934 pcts[i] = pct;
01935 pctsum += pct;
01936 if (pct > maxpct) {
01937 maxpct = pct;
01938 mostaccurate = i;
01939 }
01940 #ifdef DEBUG_MIMEMAGIC
01941 kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl;
01942 #endif
01943 }
01944 }
01945 if (mostaccurate >= 0) {
01946 if ( mostaccurate != P_JAVA || foundClass )
01947 {
01948 conf->accuracy = (int)(pcts[mostaccurate] / pctsum * 60);
01949 #ifdef DEBUG_MIMEMAGIC
01950 kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << conf->accuracy << endl;
01951 #endif
01952 conf->resultBuf = QString(types[mostaccurate].type);
01953 return 1;
01954 }
01955 }
01956
01957 switch (is_tar(buf, nbytes)) {
01958 case 1:
01959
01960 conf->resultBuf = MIME_APPL_TAR;
01961 conf->accuracy = 90;
01962 return 1;
01963 case 2:
01964
01965 conf->resultBuf = MIME_APPL_TAR;
01966 conf->accuracy = 90;
01967 return 1;
01968 }
01969
01970 for (i = 0; i < nbytes; i++) {
01971 if (!isascii(*(buf + i)))
01972 return 0;
01973 }
01974
01975
01976 conf->accuracy = 90;
01977 if (has_escapes) {
01978
01979
01980 conf->resultBuf = MIME_TEXT_UNKNOWN;
01981 } else {
01982
01983 conf->resultBuf = MIME_TEXT_PLAIN;
01984 }
01985 return 1;
01986 }
01987
01988
01989 #define TEXT_MAXLINELEN 300
01990
01991
01992
01993
01994
01995 static int textmagic(struct config_rec* conf, unsigned char * buf, int nbytes)
01996 {
01997 int i;
01998 unsigned char *cp;
01999
02000 nbytes--;
02001
02002
02003 for (i = 0, cp = buf; i < nbytes; i++, cp++)
02004 if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
02005 return 0;
02006
02007
02008
02009
02010 for (i = 0; i < nbytes;) {
02011 cp = (unsigned char *) memchr(buf, '\n', nbytes - i);
02012 if (cp == NULL) {
02013
02014 if (i + TEXT_MAXLINELEN >= nbytes)
02015 break;
02016 else
02017 return 0;
02018 }
02019 if (cp - buf > TEXT_MAXLINELEN)
02020 return 0;
02021 i += (cp - buf + 1);
02022 buf = cp + 1;
02023 }
02024 conf->resultBuf = MIME_TEXT_PLAIN;
02025 return 1;
02026 }
02027
02028
02029
02030
02031
02032
02033
02034
02035
02036
02037
02038
02039
02040
02041
02042 #define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
02043
02044
02045
02046
02047
02048
02049 static int
02050 is_tar(unsigned char *buf, int nbytes)
02051 {
02052 register union record *header = (union record *) buf;
02053 register int i;
02054 register long sum,
02055 recsum;
02056 register char *p;
02057
02058 if (nbytes < (int)sizeof(union record))
02059 return 0;
02060
02061 recsum = from_oct(8, header->header.chksum);
02062
02063 sum = 0;
02064 p = header->charptr;
02065 for (i = sizeof(union record); --i >= 0;) {
02066
02067
02068
02069
02070 sum += 0xFF & *p++;
02071 }
02072
02073
02074 for (i = sizeof(header->header.chksum); --i >= 0;)
02075 sum -= 0xFF & header->header.chksum[i];
02076 sum += ' ' * sizeof header->header.chksum;
02077
02078 if (sum != recsum)
02079 return 0;
02080
02081 if (0 == strcmp(header->header.magic, TMAGIC))
02082 return 2;
02083
02084 return 1;
02085 }
02086
02087
02088
02089
02090
02091
02092
02093 static long
02094 from_oct(int digs, char *where)
02095 {
02096 register long value;
02097
02098 while (isspace(*where)) {
02099 where++;
02100 if (--digs <= 0)
02101 return -1;
02102 }
02103 value = 0;
02104 while (digs > 0 && isodigit(*where)) {
02105 value = (value << 3) | (*where++ - '0');
02106 --digs;
02107 }
02108
02109 if (digs > 0 && *where && !isspace(*where))
02110 return -1;
02111
02112 return value;
02113 }
02114
02115 KMimeMagic::KMimeMagic()
02116 {
02117
02118 QString mimefile = locate( "mime", "magic" );
02119 init( mimefile );
02120
02121 QStringList snippets = KGlobal::dirs()->findAllResources( "config", "magic/*.magic", true );
02122 for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
02123 if ( !mergeConfig( *it ) )
02124 kdWarning() << k_funcinfo << "Failed to parse " << *it << endl;
02125 }
02126
02127 KMimeMagic::KMimeMagic(const QString & _configfile)
02128 {
02129 init( _configfile );
02130 }
02131
02132 void KMimeMagic::init( const QString& _configfile )
02133 {
02134 int result;
02135 conf = new config_rec;
02136
02137
02138 conf->magic = conf->last = NULL;
02139 magicResult = NULL;
02140 conf->followLinks = false;
02141
02142 conf->utimeConf = 0L;
02143
02144 result = apprentice(_configfile);
02145 if (result == -1)
02146 return;
02147 #ifdef MIME_MAGIC_DEBUG_TABLE
02148 test_table();
02149 #endif
02150 }
02151
02152
02153
02154
02155
02156 KMimeMagic::~KMimeMagic()
02157 {
02158 if (conf) {
02159 struct magic *p = conf->magic;
02160 struct magic *q;
02161 while (p) {
02162 q = p;
02163 p = p->next;
02164 free(q);
02165 }
02166 delete conf->utimeConf;
02167 delete conf;
02168 }
02169 delete magicResult;
02170 }
02171
02172 bool
02173 KMimeMagic::mergeConfig(const QString & _configfile)
02174 {
02175 kdDebug(7018) << k_funcinfo << _configfile << endl;
02176 int result;
02177
02178 if (_configfile.isEmpty())
02179 return false;
02180 result = apprentice(_configfile);
02181 if (result == -1) {
02182 return false;
02183 }
02184 #ifdef MIME_MAGIC_DEBUG_TABLE
02185 test_table();
02186 #endif
02187 return true;
02188 }
02189
02190 bool
02191 KMimeMagic::mergeBufConfig(char * _configbuf)
02192 {
02193 int result;
02194
02195 if (conf) {
02196 result = buff_apprentice(_configbuf);
02197 if (result == -1)
02198 return false;
02199 #ifdef MIME_MAGIC_DEBUG_TABLE
02200 test_table();
02201 #endif
02202 return true;
02203 }
02204 return false;
02205 }
02206
02207 void
02208 KMimeMagic::setFollowLinks( bool _enable )
02209 {
02210 conf->followLinks = _enable;
02211 }
02212
02213 KMimeMagicResult *
02214 KMimeMagic::findBufferType(const QByteArray &array)
02215 {
02216 unsigned char buf[HOWMANY + 1];
02217
02218 conf->resultBuf = QString::null;
02219 if ( !magicResult )
02220 magicResult = new KMimeMagicResult();
02221 magicResult->setInvalid();
02222 conf->accuracy = 100;
02223
02224 int nbytes = array.size();
02225
02226 if (nbytes > HOWMANY)
02227 nbytes = HOWMANY;
02228 memcpy(buf, array.data(), nbytes);
02229 if (nbytes == 0) {
02230 conf->resultBuf = MIME_BINARY_ZEROSIZE;
02231 } else {
02232 buf[nbytes++] = '\0';
02233 tryit(conf, buf, nbytes);
02234 }
02235
02236 magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02237 magicResult->setAccuracy(conf->accuracy);
02238 return magicResult;
02239 }
02240
02241 static void
02242 refineResult(KMimeMagicResult *r, const QString & _filename)
02243 {
02244 QString tmp = r->mimeType();
02245 if (tmp.isEmpty())
02246 return;
02247 if ( tmp == "text/x-c" || tmp == "text/x-objc" )
02248 {
02249 if ( _filename.right(2) == ".h" )
02250 tmp += "hdr";
02251 else
02252 tmp += "src";
02253 r->setMimeType(tmp);
02254 }
02255 else
02256 if ( tmp == "text/x-c++" )
02257 {
02258 if ( _filename.endsWith(".h")
02259 || _filename.endsWith(".hh")
02260 || _filename.endsWith(".H")
02261 || !_filename.right(4).contains('.'))
02262 tmp += "hdr";
02263 else
02264 tmp += "src";
02265 r->setMimeType(tmp);
02266 }
02267 else
02268 if ( tmp == "application/x-sharedlib" )
02269 {
02270 if ( _filename.find( ".so" ) == -1 )
02271 {
02272 tmp = "application/x-executable";
02273 r->setMimeType( tmp );
02274 }
02275 }
02276 }
02277
02278 KMimeMagicResult *
02279 KMimeMagic::findBufferFileType( const QByteArray &data,
02280 const QString &fn)
02281 {
02282 KMimeMagicResult * r = findBufferType( data );
02283 refineResult(r, fn);
02284 return r;
02285 }
02286
02287
02288
02289
02290 KMimeMagicResult* KMimeMagic::findFileType(const QString & fn)
02291 {
02292 #ifdef DEBUG_MIMEMAGIC
02293 kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl;
02294 #endif
02295 conf->resultBuf = QString::null;
02296
02297 if ( !magicResult )
02298 magicResult = new KMimeMagicResult();
02299 magicResult->setInvalid();
02300 conf->accuracy = 100;
02301
02302 if ( !conf->utimeConf )
02303 conf->utimeConf = new KMimeMagicUtimeConf();
02304
02305
02306 process(conf, fn );
02307
02308
02309
02310 magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02311 magicResult->setAccuracy(conf->accuracy);
02312 refineResult(magicResult, fn);
02313 return magicResult;
02314 }