kmimemagic.cpp

00001 /* This file is part of the KDE libraries
00002    Copyright (C) 2000 Fritz Elfert <fritz@kde.org>
00003    Copyright (C) 2004 Allan Sandfeld Jensen <kde@carewolf.com>
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License version 2 as published by the Free Software Foundation.
00008 
00009    This library is distributed in the hope that it will be useful,
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012    Library General Public License for more details.
00013 
00014    You should have received a copy of the GNU Library General Public License
00015    along with this library; see the file COPYING.LIB.  If not, write to
00016    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00017    Boston, MA 02110-1301, USA.
00018 */
00019 #include "kmimemagic.h"
00020 #include <kdebug.h>
00021 #include <kapplication.h>
00022 #include <qfile.h>
00023 #include <ksimpleconfig.h>
00024 #include <kstandarddirs.h>
00025 #include <kstaticdeleter.h>
00026 #include <klargefile.h>
00027 #include <assert.h>
00028 
00029 static int fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb);
00030 static void process(struct config_rec* conf,  const QString &);
00031 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes);
00032 static int tagmagic(unsigned char *buf, int nbytes);
00033 static int textmagic(struct config_rec* conf, unsigned char *, int);
00034 
00035 static void tryit(struct config_rec* conf, unsigned char *buf, int nb);
00036 static int match(struct config_rec* conf, unsigned char *, int);
00037 
00038 KMimeMagic* KMimeMagic::s_pSelf;
00039 static KStaticDeleter<KMimeMagic> kmimemagicsd;
00040 
00041 KMimeMagic* KMimeMagic::self()
00042 {
00043   if( !s_pSelf )
00044     initStatic();
00045   return s_pSelf;
00046 }
00047 
00048 void KMimeMagic::initStatic()
00049 {
00050   s_pSelf = kmimemagicsd.setObject( s_pSelf, new KMimeMagic() );
00051   s_pSelf->setFollowLinks( true );
00052 }
00053 
00054 #include <stdio.h>
00055 #include <unistd.h>
00056 #include <stdlib.h>
00057 #include <sys/wait.h>
00058 #include <sys/types.h>
00059 #include <sys/stat.h>
00060 #include <fcntl.h>
00061 #include <errno.h>
00062 #include <ctype.h>
00063 #include <time.h>
00064 #include <utime.h>
00065 #include <stdarg.h>
00066 #include <qregexp.h>
00067 #include <qstring.h>
00068 
00069 //#define MIME_MAGIC_DEBUG_TABLE // untested
00070 
00071 // Uncomment to debug the config-file parsing phase
00072 //#define DEBUG_APPRENTICE
00073 // Uncomment to debug the matching phase
00074 //#define DEBUG_MIMEMAGIC
00075 
00076 #if (defined DEBUG_MIMEMAGIC || defined DEBUG_APPRENTICE)
00077 #define DEBUG_LINENUMBERS
00078 #endif
00079 
00080 /*
00081  * Buitltin Mime types
00082  */
00083 #define MIME_BINARY_UNKNOWN    "application/octet-stream"
00084 #define MIME_BINARY_UNREADABLE "application/x-unreadable"
00085 #define MIME_BINARY_ZEROSIZE   "application/x-zerosize"
00086 #define MIME_TEXT_UNKNOWN      "text/plain"
00087 #define MIME_TEXT_PLAIN        "text/plain"
00088 #define MIME_INODE_DIR         "inode/directory"
00089 #define MIME_INODE_CDEV        "inode/chardevice"
00090 #define MIME_INODE_BDEV        "inode/blockdevice"
00091 #define MIME_INODE_FIFO        "inode/fifo"
00092 #define MIME_INODE_LINK        "inode/link"
00093 #define MIME_INODE_SOCK        "inode/socket"
00094 // Following should go in magic-file - Fritz
00095 #define MIME_APPL_TROFF        "application/x-troff"
00096 #define MIME_APPL_TAR          "application/x-tar"
00097 #define MIME_TEXT_FORTRAN      "text/x-fortran"
00098 
00099 #define MAXMIMESTRING        256
00100 
00101 #define HOWMANY 4000            /* big enough to recognize most WWW files, and skip GPL-headers */
00102 #define MAXDESC   50            /* max leng of text description */
00103 #define MAXstring 64            /* max leng of "string" types */
00104 
00105 typedef union VALUETYPE {
00106     unsigned char b;
00107     unsigned short h;
00108     unsigned long l;
00109     char s[MAXstring];
00110     unsigned char hs[2];    /* 2 bytes of a fixed-endian "short" */
00111     unsigned char hl[4];    /* 2 bytes of a fixed-endian "long" */
00112 } VALUETYPE;
00113 
00114 struct magic {
00115     struct magic *next;     /* link to next entry */
00116 #ifdef DEBUG_LINENUMBERS
00117     int lineno;             /* line number from magic file - doesn't say from which one ;) */
00118 #endif
00119 
00120     short flag;
00121 #define INDIR    1              /* if '>(...)' appears,  */
00122 #define UNSIGNED 2              /* comparison is unsigned */
00123     short cont_level;       /* level of ">" */
00124     struct {
00125         char type;      /* byte short long */
00126         long offset;    /* offset from indirection */
00127     } in;
00128     long offset;            /* offset to magic number */
00129     unsigned char reln;     /* relation (0=eq, '>'=gt, etc) */
00130     char type;              /* int, short, long or string. */
00131     char vallen;            /* length of string value, if any */
00132 #define BYTE       1
00133 #define SHORT      2
00134 #define LONG       4
00135 #define STRING     5
00136 #define DATE       6
00137 #define BESHORT    7
00138 #define BELONG     8
00139 #define BEDATE     9
00140 #define LESHORT   10
00141 #define LELONG    11
00142 #define LEDATE    12
00143     VALUETYPE value;        /* either number or string */
00144     unsigned long mask;     /* mask before comparison with value */
00145     char nospflag;          /* suppress space character */
00146 
00147     /* NOTE: this string is suspected of overrunning - find it! */
00148     char desc[MAXDESC];     /* description */
00149 };
00150 
00151 /*
00152  * data structures for tar file recognition
00153  * --------------------------------------------------------------------------
00154  * Header file for public domain tar (tape archive) program.
00155  *
00156  * @(#)tar.h 1.20 86/10/29    Public Domain. Created 25 August 1985 by John
00157  * Gilmore, ihnp4!hoptoad!gnu.
00158  *
00159  * Header block on tape.
00160  *
00161  * I'm going to use traditional DP naming conventions here. A "block" is a big
00162  * chunk of stuff that we do I/O on. A "record" is a piece of info that we
00163  * care about. Typically many "record"s fit into a "block".
00164  */
00165 #define RECORDSIZE    512
00166 #define NAMSIZ    100
00167 #define TUNMLEN    32
00168 #define TGNMLEN    32
00169 
00170 union record {
00171     char charptr[RECORDSIZE];
00172     struct header {
00173         char name[NAMSIZ];
00174         char mode[8];
00175         char uid[8];
00176         char gid[8];
00177         char size[12];
00178         char mtime[12];
00179         char chksum[8];
00180         char linkflag;
00181         char linkname[NAMSIZ];
00182         char magic[8];
00183         char uname[TUNMLEN];
00184         char gname[TGNMLEN];
00185         char devmajor[8];
00186         char devminor[8];
00187     } header;
00188 };
00189 
00190 /* The magic field is filled with this if uname and gname are valid. */
00191 #define    TMAGIC        "ustar  "  /* 7 chars and a null */
00192 
00193 /*
00194  * file-function prototypes
00195  */
00196 static int is_tar(unsigned char *, int);
00197 static unsigned long signextend(struct magic *, unsigned long);
00198 static int getvalue(struct magic *, char **);
00199 static int hextoint(int);
00200 static char *getstr(char *, char *, int, int *);
00201 static int mget(union VALUETYPE *, unsigned char *, struct magic *, int);
00202 static int mcheck(union VALUETYPE *, struct magic *);
00203 static int mconvert(union VALUETYPE *, struct magic *);
00204 static long from_oct(int, char *);
00205 
00206 /*
00207  * includes for ASCII substring recognition formerly "names.h" in file
00208  * command
00209  *
00210  * Original notes: names and types used by ascmagic in file(1).
00211  * These tokens are
00212  * here because they can appear anywhere in the first HOWMANY bytes, while
00213  * tokens in /etc/magic must appear at fixed offsets into the file. Don't
00214  * make HOWMANY too high unless you have a very fast CPU.
00215  */
00216 
00217 /* these types are used calculate index to 'types': keep em in sync! */
00218 /* HTML inserted in first because this is a web server module now */
00219 /* ENG removed because stupid */
00220 #define L_HTML   0x001          /* HTML */
00221 #define L_C      0x002          /* first and foremost on UNIX */
00222 #define L_MAKE   0x004          /* Makefiles */
00223 #define L_PLI    0x008          /* PL/1 */
00224 #define L_MACH   0x010          /* some kinda assembler */
00225 #define L_PAS    0x020          /* Pascal */
00226 #define L_JAVA   0x040          /* Java source */
00227 #define L_CPP    0x080          /* C++ */
00228 #define L_MAIL   0x100          /* Electronic mail */
00229 #define L_NEWS   0x200          /* Usenet Netnews */
00230 #define L_DIFF   0x400          /* Output of diff */
00231 #define L_OBJC   0x800          /* Objective C */
00232 
00233 #define P_HTML   0          /* HTML */
00234 #define P_C      1          /* first and foremost on UNIX */
00235 #define P_MAKE   2          /* Makefiles */
00236 #define P_PLI    3          /* PL/1 */
00237 #define P_MACH   4          /* some kinda assembler */
00238 #define P_PAS    5          /* Pascal */
00239 #define P_JAVA   6          /* Java source */
00240 #define P_CPP    7          /* C++ */
00241 #define P_MAIL   8          /* Electronic mail */
00242 #define P_NEWS   9          /* Usenet Netnews */
00243 #define P_DIFF  10          /* Output of diff */
00244 #define P_OBJC  11          /* Objective C */
00245 
00246 typedef struct asc_type {
00247     const char *type;
00248     int  kwords;
00249     double  weight;
00250 } asc_type;
00251 
00252 static const asc_type types[] = {
00253     { "text/html",         19, 2 }, // 10 items but 10 different words only
00254     { "text/x-c",          13, 1 },
00255     { "text/x-makefile",    4, 1.9 },
00256     { "text/x-pli",         1, 3 },
00257     { "text/x-assembler",   6, 2.1 },
00258     { "text/x-pascal",      1, 1 },
00259     { "text/x-java",       12, 1 },
00260     { "text/x-c++",        19, 1 },
00261     { "message/rfc822",     4, 1.9 },
00262     { "message/news",       3, 2 },
00263         { "text/x-diff",        4, 2 },
00264         { "text/x-objc",    10, 1 }
00265 };
00266 
00267 #define NTYPES (sizeof(types)/sizeof(asc_type))
00268 
00269 static struct names {
00270     const char *name;
00271     short type;
00272 } const names[] = {
00273     {
00274         "<html", L_HTML
00275     },
00276     {
00277         "<HTML", L_HTML
00278     },
00279     {
00280         "<head", L_HTML
00281     },
00282     {
00283         "<HEAD", L_HTML
00284     },
00285     {
00286         "<body", L_HTML
00287     },
00288     {
00289         "<BODY", L_HTML
00290     },
00291     {
00292         "<title", L_HTML
00293     },
00294     {
00295         "<TITLE", L_HTML
00296     },
00297     {
00298         "<h1", L_HTML
00299     },
00300     {
00301         "<H1", L_HTML
00302     },
00303     {
00304         "<a", L_HTML
00305     },
00306     {
00307         "<A", L_HTML
00308     },
00309     {
00310         "<img", L_HTML
00311     },
00312     {
00313         "<IMG", L_HTML
00314     },
00315     {
00316         "<!--", L_HTML
00317     },
00318     {
00319         "<!doctype", L_HTML
00320     },
00321     {
00322         "<!DOCTYPE", L_HTML
00323     },
00324     {
00325         "<div", L_HTML
00326     },
00327     {
00328         "<DIV", L_HTML
00329     },
00330     {
00331         "<frame", L_HTML
00332     },
00333     {
00334         "<FRAME", L_HTML
00335     },
00336     {
00337         "<frameset", L_HTML
00338     },
00339     {
00340         "<FRAMESET", L_HTML
00341     },
00342         {
00343                 "<script", L_HTML
00344         },
00345         {
00346                 "<SCRIPT", L_HTML
00347         },
00348     {
00349         "/*", L_C|L_CPP|L_JAVA|L_OBJC
00350     },
00351     {
00352         "//", L_C|L_CPP|L_JAVA|L_OBJC
00353     },
00354     {
00355         "#include", L_C|L_CPP
00356     },
00357     {
00358         "#ifdef", L_C|L_CPP
00359     },
00360     {
00361         "#ifndef", L_C|L_CPP
00362     },
00363     {
00364         "bool", L_C|L_CPP
00365     },
00366     {
00367         "char", L_C|L_CPP|L_JAVA|L_OBJC
00368     },
00369     {
00370         "int", L_C|L_CPP|L_JAVA|L_OBJC
00371     },
00372     {
00373         "float", L_C|L_CPP|L_JAVA|L_OBJC
00374     },
00375     {
00376         "void", L_C|L_CPP|L_JAVA|L_OBJC
00377     },
00378     {
00379         "extern", L_C|L_CPP
00380     },
00381     {
00382         "struct", L_C|L_CPP
00383     },
00384     {
00385         "union", L_C|L_CPP
00386     },
00387     {
00388         "implements", L_JAVA
00389     },
00390     {
00391         "super", L_JAVA
00392     },
00393     {
00394         "import", L_JAVA
00395     },
00396     {
00397         "class", L_CPP|L_JAVA
00398     },
00399     {
00400         "public", L_CPP|L_JAVA
00401     },
00402     {
00403         "private", L_CPP|L_JAVA
00404     },
00405     {
00406         "explicit", L_CPP
00407     },
00408     {
00409         "virtual", L_CPP
00410     },
00411     {
00412         "namespace", L_CPP
00413     },
00414     {
00415         "#import", L_OBJC
00416     },
00417     {
00418         "@interface", L_OBJC
00419     },
00420     {
00421         "@implementation", L_OBJC
00422     },
00423     {
00424         "@protocol", L_OBJC
00425     },
00426     {
00427         "CFLAGS", L_MAKE
00428     },
00429     {
00430         "LDFLAGS", L_MAKE
00431     },
00432     {
00433         "all:", L_MAKE
00434     },
00435     {
00436         ".PHONY:", L_MAKE
00437     },
00438     {
00439         "srcdir", L_MAKE
00440     },
00441     {
00442         "exec_prefix", L_MAKE
00443     },
00444     /*
00445      * Too many files of text have these words in them.  Find another way
00446      * to recognize Fortrash.
00447      */
00448     {
00449         ".ascii", L_MACH
00450     },
00451     {
00452         ".asciiz", L_MACH
00453     },
00454     {
00455         ".byte", L_MACH
00456     },
00457     {
00458         ".even", L_MACH
00459     },
00460     {
00461         ".globl", L_MACH
00462     },
00463     {
00464         "clr", L_MACH
00465     },
00466     {
00467         "(input", L_PAS
00468     },
00469     {
00470         "dcl", L_PLI
00471     },
00472     {
00473         "Received:", L_MAIL
00474     },
00475     /* we now stop at '>' for tokens, so this one won't work {
00476         ">From", L_MAIL
00477         },*/
00478     {
00479         "Return-Path:", L_MAIL
00480     },
00481     {
00482         "Cc:", L_MAIL
00483     },
00484     {
00485         "Newsgroups:", L_NEWS
00486     },
00487     {
00488         "Path:", L_NEWS
00489     },
00490     {
00491         "Organization:", L_NEWS
00492     },
00493     {
00494         "---", L_DIFF
00495     },
00496     {
00497         "+++", L_DIFF
00498     },
00499     {
00500         "***", L_DIFF
00501     },
00502     {
00503         "@@", L_DIFF
00504     },
00505     {
00506         NULL, 0
00507     }
00508 };
00509 
00520 class KMimeMagicUtimeConf
00521 {
00522 public:
00523     KMimeMagicUtimeConf()
00524     {
00525         tmpDirs << QString::fromLatin1("/tmp"); // default value
00526 
00527         // The trick is that we also don't want the user to override globally set
00528         // directories. So we have to misuse KStandardDirs :}
00529         QStringList confDirs = KGlobal::dirs()->resourceDirs( "config" );
00530         if ( !confDirs.isEmpty() )
00531         {
00532             QString globalConf = confDirs.last() + "kmimemagicrc";
00533             if ( QFile::exists( globalConf ) )
00534             {
00535                 KSimpleConfig cfg( globalConf );
00536                 cfg.setGroup( "Settings" );
00537                 tmpDirs = cfg.readListEntry( "atimeDirs" );
00538             }
00539             if ( confDirs.count() > 1 )
00540             {
00541                 QString localConf = confDirs.first() + "kmimemagicrc";
00542                 if ( QFile::exists( localConf ) )
00543                 {
00544                     KSimpleConfig cfg( localConf );
00545                     cfg.setGroup( "Settings" );
00546                     tmpDirs += cfg.readListEntry( "atimeDirs" );
00547                 }
00548             }
00549             for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00550             {
00551                 QString dir = *it;
00552                 if ( !dir.isEmpty() && dir[ dir.length()-1 ] != '/' )
00553                     (*it) += '/';
00554             }
00555         }
00556 #if 0
00557         // debug code
00558         for ( QStringList::Iterator it = tmpDirs.begin() ; it != tmpDirs.end() ; ++it )
00559             kdDebug(7018) << " atimeDir: " << *it << endl;
00560 #endif
00561     }
00562 
00563     bool restoreAccessTime( const QString & file ) const
00564     {
00565         QString dir = file.left( file.findRev( '/' ) );
00566         bool res = tmpDirs.contains( dir );
00567         //kdDebug(7018) << "restoreAccessTime " << file << " dir=" << dir << " result=" << res << endl;
00568         return res;
00569     }
00570     QStringList tmpDirs;
00571 };
00572 
00573 /* current config */
00574 struct config_rec {
00575     bool followLinks;
00576     QString resultBuf;
00577     int accuracy;
00578 
00579     struct magic *magic,    /* head of magic config list */
00580     *last;
00581     KMimeMagicUtimeConf * utimeConf;
00582 };
00583 
00584 #ifdef MIME_MAGIC_DEBUG_TABLE
00585 static void
00586 test_table()
00587 {
00588     struct magic *m;
00589     struct magic *prevm = NULL;
00590 
00591     kdDebug(7018) << "test_table : started" << endl;
00592     for (m = conf->magic; m; m = m->next) {
00593         if (isprint((((unsigned long) m) >> 24) & 255) &&
00594             isprint((((unsigned long) m) >> 16) & 255) &&
00595             isprint((((unsigned long) m) >> 8) & 255) &&
00596             isprint(((unsigned long) m) & 255)) {
00597             //debug("test_table: POINTER CLOBBERED! "
00598             //"m=\"%c%c%c%c\" line=%d",
00599                   (((unsigned long) m) >> 24) & 255,
00600                   (((unsigned long) m) >> 16) & 255,
00601                   (((unsigned long) m) >> 8) & 255,
00602                   ((unsigned long) m) & 255,
00603                   prevm ? prevm->lineno : -1);
00604             break;
00605         }
00606         prevm = m;
00607     }
00608 }
00609 #endif
00610 
00611 #define    EATAB {while (isascii((unsigned char) *l) && \
00612           isspace((unsigned char) *l))  ++l;}
00613 
00614 int KMimeMagic::parse_line(char *line, int *rule, int lineno)
00615 {
00616     int ws_offset;
00617 
00618     /* delete newline */
00619     if (line[0]) {
00620         line[strlen(line) - 1] = '\0';
00621     }
00622     /* skip leading whitespace */
00623     ws_offset = 0;
00624     while (line[ws_offset] && isspace(line[ws_offset])) {
00625         ws_offset++;
00626     }
00627 
00628     /* skip blank lines */
00629     if (line[ws_offset] == 0) {
00630         return 0;
00631     }
00632     /* comment, do not parse */
00633     if (line[ws_offset] == '#')
00634         return 0;
00635 
00636     /* if we get here, we're going to use it so count it */
00637     (*rule)++;
00638 
00639     /* parse it */
00640     return (parse(line + ws_offset, lineno) != 0);
00641 }
00642 
00643 /*
00644  * apprentice - load configuration from the magic file.
00645  */
00646 int KMimeMagic::apprentice( const QString& magicfile )
00647 {
00648     FILE *f;
00649     char line[BUFSIZ + 1];
00650     int errs = 0;
00651     int lineno;
00652     int rule = 0;
00653     QCString fname;
00654 
00655     if (magicfile.isEmpty())
00656         return -1;
00657     fname = QFile::encodeName(magicfile);
00658     f = fopen(fname, "r");
00659     if (f == NULL) {
00660         kdError(7018) << "can't read magic file " << fname.data() << ": " << strerror(errno) << endl;
00661         return -1;
00662     }
00663 
00664     /* parse it */
00665     for (lineno = 1; fgets(line, BUFSIZ, f) != NULL; lineno++)
00666         if (parse_line(line, &rule, lineno))
00667             errs++;
00668 
00669     fclose(f);
00670 
00671 #ifdef DEBUG_APPRENTICE
00672     kdDebug(7018) << "apprentice: conf=" << conf << " file=" << magicfile << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00673     kdDebug(7018) << "apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00674 #endif
00675 
00676 #ifdef MIME_MAGIC_DEBUG_TABLE
00677     test_table();
00678 #endif
00679 
00680     return (errs ? -1 : 0);
00681 }
00682 
00683 int KMimeMagic::buff_apprentice(char *buff)
00684 {
00685     char line[BUFSIZ + 2];
00686     int errs = 0;
00687     int lineno = 1;
00688     char *start = buff;
00689     char *end;
00690     int count = 0;
00691     int rule = 0;
00692     int len = strlen(buff) + 1;
00693 
00694     /* parse it */
00695     do {
00696         count = (len > BUFSIZ-1)?BUFSIZ-1:len;
00697         strncpy(line, start, count);
00698         line[count] = '\0';
00699         if ((end = strchr(line, '\n'))) {
00700             *(++end) = '\0';
00701             count = strlen(line);
00702         } else
00703           strcat(line, "\n");
00704         start += count;
00705         len -= count;
00706         if (parse_line(line, &rule, lineno))
00707             errs++;
00708         lineno++;
00709     } while (len > 0);
00710 
00711 #ifdef DEBUG_APPRENTICE
00712     kdDebug(7018) << "buff_apprentice: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
00713     kdDebug(7018) << "buff_apprentice: read " << lineno << " lines, " << rule << " rules, " << errs << " errors" << endl;
00714 #endif
00715 
00716 #ifdef MIME_MAGIC_DEBUG_TABLE
00717     test_table();
00718 #endif
00719 
00720     return (errs ? -1 : 0);
00721 }
00722 
00723 /*
00724  * extend the sign bit if the comparison is to be signed
00725  */
00726 static unsigned long
00727 signextend(struct magic *m, unsigned long v)
00728 {
00729     if (!(m->flag & UNSIGNED))
00730         switch (m->type) {
00731                 /*
00732                  * Do not remove the casts below.  They are vital.
00733                  * When later compared with the data, the sign
00734                  * extension must have happened.
00735                  */
00736             case BYTE:
00737                 v = (char) v;
00738                 break;
00739             case SHORT:
00740             case BESHORT:
00741             case LESHORT:
00742                 v = (short) v;
00743                 break;
00744             case DATE:
00745             case BEDATE:
00746             case LEDATE:
00747             case LONG:
00748             case BELONG:
00749             case LELONG:
00750                 v = (long) v;
00751                 break;
00752             case STRING:
00753                 break;
00754             default:
00755                 kdError(7018) << "" << "signextend" << ": can't happen: m->type=" << m->type << endl;
00756                 return 998; //good value
00757         }
00758     return v;
00759 }
00760 
00761 /*
00762  * parse one line from magic file, put into magic[index++] if valid
00763  */
00764 int KMimeMagic::parse(char *l, int
00765 #ifdef DEBUG_LINENUMBERS
00766     lineno
00767 #endif
00768         )
00769 {
00770     int i = 0;
00771     struct magic *m;
00772     char *t,
00773     *s;
00774     /* allocate magic structure entry */
00775     if ((m = (struct magic *) calloc(1, sizeof(struct magic))) == NULL) {
00776         kdError(7018) << "parse: Out of memory." << endl;
00777         return -1;
00778     }
00779     /* append to linked list */
00780     m->next = NULL;
00781     if (!conf->magic || !conf->last) {
00782         conf->magic = conf->last = m;
00783     } else {
00784         conf->last->next = m;
00785         conf->last = m;
00786     }
00787 
00788     /* set values in magic structure */
00789     m->flag = 0;
00790     m->cont_level = 0;
00791 #ifdef DEBUG_LINENUMBERS
00792     m->lineno = lineno;
00793 #endif
00794 
00795     while (*l == '>') {
00796         ++l;            /* step over */
00797         m->cont_level++;
00798     }
00799 
00800     if (m->cont_level != 0 && *l == '(') {
00801         ++l;            /* step over */
00802         m->flag |= INDIR;
00803     }
00804     /* get offset, then skip over it */
00805     m->offset = (int) strtol(l, &t, 0);
00806     if (l == t) {
00807             kdError(7018) << "parse: offset " << l << " invalid" << endl;
00808     }
00809     l = t;
00810 
00811     if (m->flag & INDIR) {
00812         m->in.type = LONG;
00813         m->in.offset = 0;
00814         /*
00815          * read [.lbs][+-]nnnnn)
00816          */
00817         if (*l == '.') {
00818             switch (*++l) {
00819                 case 'l':
00820                     m->in.type = LONG;
00821                     break;
00822                 case 's':
00823                     m->in.type = SHORT;
00824                     break;
00825                 case 'b':
00826                     m->in.type = BYTE;
00827                     break;
00828                 default:
00829                     kdError(7018) << "parse: indirect offset type " << *l << " invalid" << endl;
00830                     break;
00831             }
00832             l++;
00833         }
00834         s = l;
00835         if (*l == '+' || *l == '-')
00836             l++;
00837         if (isdigit((unsigned char) *l)) {
00838             m->in.offset = strtol(l, &t, 0);
00839             if (*s == '-')
00840                 m->in.offset = -m->in.offset;
00841         } else
00842             t = l;
00843         if (*t++ != ')') {
00844             kdError(7018) << "parse: missing ')' in indirect offset" << endl;
00845         }
00846         l = t;
00847     }
00848     while (isascii((unsigned char) *l) && isdigit((unsigned char) *l))
00849         ++l;
00850     EATAB;
00851 
00852 #define NBYTE       4
00853 #define NSHORT      5
00854 #define NLONG       4
00855 #define NSTRING     6
00856 #define NDATE       4
00857 #define NBESHORT    7
00858 #define NBELONG     6
00859 #define NBEDATE     6
00860 #define NLESHORT    7
00861 #define NLELONG     6
00862 #define NLEDATE     6
00863 
00864     if (*l == 'u') {
00865         ++l;
00866         m->flag |= UNSIGNED;
00867     }
00868     /* get type, skip it */
00869     if (strncmp(l, "byte", NBYTE) == 0) {
00870         m->type = BYTE;
00871         l += NBYTE;
00872     } else if (strncmp(l, "short", NSHORT) == 0) {
00873         m->type = SHORT;
00874         l += NSHORT;
00875     } else if (strncmp(l, "long", NLONG) == 0) {
00876         m->type = LONG;
00877         l += NLONG;
00878     } else if (strncmp(l, "string", NSTRING) == 0) {
00879         m->type = STRING;
00880         l += NSTRING;
00881     } else if (strncmp(l, "date", NDATE) == 0) {
00882         m->type = DATE;
00883         l += NDATE;
00884     } else if (strncmp(l, "beshort", NBESHORT) == 0) {
00885         m->type = BESHORT;
00886         l += NBESHORT;
00887     } else if (strncmp(l, "belong", NBELONG) == 0) {
00888         m->type = BELONG;
00889         l += NBELONG;
00890     } else if (strncmp(l, "bedate", NBEDATE) == 0) {
00891         m->type = BEDATE;
00892         l += NBEDATE;
00893     } else if (strncmp(l, "leshort", NLESHORT) == 0) {
00894         m->type = LESHORT;
00895         l += NLESHORT;
00896     } else if (strncmp(l, "lelong", NLELONG) == 0) {
00897         m->type = LELONG;
00898         l += NLELONG;
00899     } else if (strncmp(l, "ledate", NLEDATE) == 0) {
00900         m->type = LEDATE;
00901         l += NLEDATE;
00902     } else {
00903         kdError(7018) << "parse: type " << l << " invalid" << endl;
00904         return -1;
00905     }
00906     /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
00907     if (*l == '&') {
00908         ++l;
00909         m->mask = signextend(m, strtol(l, &l, 0));
00910     } else
00911         m->mask = (unsigned long) ~0L;
00912     EATAB;
00913 
00914     switch (*l) {
00915         case '>':
00916         case '<':
00917             /* Old-style anding: "0 byte &0x80 dynamically linked" */
00918         case '&':
00919         case '^':
00920         case '=':
00921             m->reln = *l;
00922             ++l;
00923             break;
00924         case '!':
00925             if (m->type != STRING) {
00926                 m->reln = *l;
00927                 ++l;
00928                 break;
00929             }
00930             /* FALL THROUGH */
00931         default:
00932             if (*l == 'x' && isascii((unsigned char) l[1]) &&
00933                 isspace((unsigned char) l[1])) {
00934                 m->reln = *l;
00935                 ++l;
00936                 goto GetDesc;   /* Bill The Cat */
00937             }
00938             m->reln = '=';
00939             break;
00940     }
00941     EATAB;
00942 
00943     if (getvalue(m, &l))
00944         return -1;
00945     /*
00946      * now get last part - the description
00947      */
00948       GetDesc:
00949     EATAB;
00950     if (l[0] == '\b') {
00951         ++l;
00952         m->nospflag = 1;
00953     } else if ((l[0] == '\\') && (l[1] == 'b')) {
00954         ++l;
00955         ++l;
00956         m->nospflag = 1;
00957     } else
00958         m->nospflag = 0;
00959         // Copy description - until EOL or '#' (for comments)
00960         while (*l != '\0' && *l != '#' && i < MAXDESC-1)
00961             m->desc[i++] = *l++;
00962         m->desc[i] = '\0';
00963         // Remove trailing spaces
00964         while (--i>0 && isspace( m->desc[i] ))
00965             m->desc[i] = '\0';
00966 
00967         // old code
00968     //while ((m->desc[i++] = *l++) != '\0' && i < MAXDESC) /* NULLBODY */ ;
00969 
00970 #ifdef DEBUG_APPRENTICE
00971     kdDebug(7018) << "parse: line=" << lineno << " m=" << m << " next=" << m->next << " cont=" << m->cont_level << " desc=" << (m->desc ? m->desc : "NULL") << endl;
00972 #endif
00973     return 0;
00974 }
00975 
00976 /*
00977  * Read a numeric value from a pointer, into the value union of a magic
00978  * pointer, according to the magic type.  Update the string pointer to point
00979  * just after the number read.  Return 0 for success, non-zero for failure.
00980  */
00981 static int
00982 getvalue(struct magic *m, char **p)
00983 {
00984     int slen;
00985 
00986     if (m->type == STRING) {
00987         *p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
00988         m->vallen = slen;
00989     } else if (m->reln != 'x')
00990         m->value.l = signextend(m, strtol(*p, p, 0));
00991     return 0;
00992 }
00993 
00994 /*
00995  * Convert a string containing C character escapes.  Stop at an unescaped
00996  * space or tab. Copy the converted version to "p", returning its length in
00997  * *slen. Return updated scan pointer as function result.
00998  */
00999 static char *
01000 getstr(register char *s, register char *p, int plen, int *slen)
01001 {
01002     char *origs = s,
01003     *origp = p;
01004     char *pmax = p + plen - 1;
01005     register int c;
01006     register int val;
01007 
01008     while ((c = *s++) != '\0') {
01009         if (isspace((unsigned char) c))
01010             break;
01011         if (p >= pmax) {
01012             kdError(7018) << "String too long: " << origs << endl;
01013             break;
01014         }
01015         if (c == '\\') {
01016             switch (c = *s++) {
01017 
01018                 case '\0':
01019                     goto out;
01020 
01021                 default:
01022                     *p++ = (char) c;
01023                     break;
01024 
01025                 case 'n':
01026                     *p++ = '\n';
01027                     break;
01028 
01029                 case 'r':
01030                     *p++ = '\r';
01031                     break;
01032 
01033                 case 'b':
01034                     *p++ = '\b';
01035                     break;
01036 
01037                 case 't':
01038                     *p++ = '\t';
01039                     break;
01040 
01041                 case 'f':
01042                     *p++ = '\f';
01043                     break;
01044 
01045                 case 'v':
01046                     *p++ = '\v';
01047                     break;
01048 
01049                     /* \ and up to 3 octal digits */
01050                 case '0':
01051                 case '1':
01052                 case '2':
01053                 case '3':
01054                 case '4':
01055                 case '5':
01056                 case '6':
01057                 case '7':
01058                     val = c - '0';
01059                     c = *s++;   /* try for 2 */
01060                     if (c >= '0' && c <= '7') {
01061                         val = (val << 3) | (c - '0');
01062                         c = *s++;   /* try for 3 */
01063                         if (c >= '0' && c <= '7')
01064                             val = (val << 3) | (c - '0');
01065                         else
01066                             --s;
01067                     } else
01068                         --s;
01069                     *p++ = (char) val;
01070                     break;
01071 
01072                     /* \x and up to 3 hex digits */
01073                 case 'x':
01074                     val = 'x';  /* Default if no digits */
01075                     c = hextoint(*s++); /* Get next char */
01076                     if (c >= 0) {
01077                         val = c;
01078                         c = hextoint(*s++);
01079                         if (c >= 0) {
01080                             val = (val << 4) + c;
01081                             c = hextoint(*s++);
01082                             if (c >= 0) {
01083                                 val = (val << 4) + c;
01084                             } else
01085                                 --s;
01086                         } else
01087                             --s;
01088                     } else
01089                         --s;
01090                     *p++ = (char) val;
01091                     break;
01092             }
01093         } else
01094             *p++ = (char) c;
01095     }
01096       out:
01097     *p = '\0';
01098     *slen = p - origp;
01099     //for ( char* foo = origp; foo < p ; ++foo )
01100     //  kdDebug(7018) << "  " << *foo << endl;
01101     return s;
01102 }
01103 
01104 
01105 /* Single hex char to int; -1 if not a hex char. */
01106 static int
01107 hextoint(int c)
01108 {
01109     if (!isascii((unsigned char) c))
01110         return -1;
01111     if (isdigit((unsigned char) c))
01112         return c - '0';
01113     if ((c >= 'a') && (c <= 'f'))
01114         return c + 10 - 'a';
01115     if ((c >= 'A') && (c <= 'F'))
01116         return c + 10 - 'A';
01117     return -1;
01118 }
01119 
01120 /*
01121  * Convert the byte order of the data we are looking at
01122  */
01123 static int
01124 mconvert(union VALUETYPE *p, struct magic *m)
01125 {
01126     switch (m->type) {
01127         case BYTE:
01128             return 1;
01129         case STRING:
01130             /* Null terminate */
01131             p->s[sizeof(p->s) - 1] = '\0';
01132             return 1;
01133 #ifndef WORDS_BIGENDIAN
01134         case SHORT:
01135 #endif
01136         case BESHORT:
01137             p->h = (short) ((p->hs[0] << 8) | (p->hs[1]));
01138             return 1;
01139 #ifndef WORDS_BIGENDIAN
01140         case LONG:
01141         case DATE:
01142 #endif
01143         case BELONG:
01144         case BEDATE:
01145             p->l = (long)
01146                 ((p->hl[0] << 24) | (p->hl[1] << 16) | (p->hl[2] << 8) | (p->hl[3]));
01147             return 1;
01148 #ifdef WORDS_BIGENDIAN
01149         case SHORT:
01150 #endif
01151         case LESHORT:
01152             p->h = (short) ((p->hs[1] << 8) | (p->hs[0]));
01153             return 1;
01154 #ifdef WORDS_BIGENDIAN
01155         case LONG:
01156         case DATE:
01157 #endif
01158         case LELONG:
01159         case LEDATE:
01160             p->l = (long)
01161                 ((p->hl[3] << 24) | (p->hl[2] << 16) | (p->hl[1] << 8) | (p->hl[0]));
01162             return 1;
01163         default:
01164             kdError(7018) << "mconvert: invalid type " << m->type << endl;
01165             return 0;
01166     }
01167 }
01168 
01169 
01170 static int
01171 mget(union VALUETYPE *p, unsigned char *s, struct magic *m,
01172      int nbytes)
01173 {
01174     long offset = m->offset;
01175         switch ( m->type )
01176     {
01177         case BYTE:
01178         if ( offset + 1 > nbytes-1 ) // nbytes = (size of file) + 1
01179             return 0;
01180         break;
01181         case SHORT:
01182         case BESHORT:
01183         case LESHORT:
01184             if ( offset + 2 > nbytes-1 )
01185             return 0;
01186         break;
01187         case LONG:
01188         case BELONG:
01189         case LELONG:
01190         case DATE:
01191         case BEDATE:
01192         case LEDATE:
01193             if ( offset + 4 > nbytes-1 )
01194             return 0;
01195         break;
01196         case STRING:
01197         break;
01198     }
01199 
01200 // The file length might be < sizeof(union VALUETYPE) (David)
01201 // -> pad with zeros (the 'file' command does it this way)
01202 // Thanks to Stan Covington <stan@calderasystems.com> for detailed report
01203     if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01204     {
01205       int have = nbytes - offset;
01206       memset(p, 0, sizeof(union VALUETYPE));
01207       if (have > 0)
01208         memcpy(p, s + offset, have);
01209     } else
01210       memcpy(p, s + offset, sizeof(union VALUETYPE));
01211 
01212     if (!mconvert(p, m))
01213         return 0;
01214 
01215     if (m->flag & INDIR) {
01216 
01217         switch (m->in.type) {
01218             case BYTE:
01219                 offset = p->b + m->in.offset;
01220                 break;
01221             case SHORT:
01222                 offset = p->h + m->in.offset;
01223                 break;
01224             case LONG:
01225                 offset = p->l + m->in.offset;
01226                 break;
01227         }
01228 
01229         if (offset + (int)sizeof(union VALUETYPE) > nbytes)
01230              return 0;
01231 
01232         memcpy(p, s + offset, sizeof(union VALUETYPE));
01233 
01234         if (!mconvert(p, m))
01235             return 0;
01236     }
01237     return 1;
01238 }
01239 
01240 static int
01241 mcheck(union VALUETYPE *p, struct magic *m)
01242 {
01243     register unsigned long l = m->value.l;
01244     register unsigned long v;
01245     int matched;
01246 
01247     if ((m->value.s[0] == 'x') && (m->value.s[1] == '\0')) {
01248         kdError(7018) << "BOINK" << endl;
01249         return 1;
01250     }
01251     switch (m->type) {
01252         case BYTE:
01253             v = p->b;
01254             break;
01255 
01256         case SHORT:
01257         case BESHORT:
01258         case LESHORT:
01259             v = p->h;
01260             break;
01261 
01262         case LONG:
01263         case BELONG:
01264         case LELONG:
01265         case DATE:
01266         case BEDATE:
01267         case LEDATE:
01268             v = p->l;
01269             break;
01270 
01271         case STRING:
01272             l = 0;
01273             /*
01274              * What we want here is: v = strncmp(m->value.s, p->s,
01275              * m->vallen); but ignoring any nulls.  bcmp doesn't give
01276              * -/+/0 and isn't universally available anyway.
01277              */
01278             v = 0;
01279             {
01280                 register unsigned char *a = (unsigned char *) m->value.s;
01281                 register unsigned char *b = (unsigned char *) p->s;
01282                 register int len = m->vallen;
01283                 Q_ASSERT(len);
01284 
01285                 while (--len >= 0)
01286                     if ((v = *b++ - *a++) != 0)
01287                         break;
01288             }
01289             break;
01290         default:
01291             kdError(7018) << "mcheck: invalid type " << m->type << endl;
01292             return 0;   /* NOTREACHED */
01293     }
01294 #if 0
01295     qDebug("Before signextend %08x", v);
01296 #endif
01297     v = signextend(m, v) & m->mask;
01298 #if 0
01299     qDebug("After signextend %08x", v);
01300 #endif
01301 
01302     switch (m->reln) {
01303         case 'x':
01304             matched = 1;
01305             break;
01306 
01307         case '!':
01308             matched = v != l;
01309             break;
01310 
01311         case '=':
01312             matched = v == l;
01313             break;
01314 
01315         case '>':
01316             if (m->flag & UNSIGNED)
01317                 matched = v > l;
01318             else
01319                 matched = (long) v > (long) l;
01320             break;
01321 
01322         case '<':
01323             if (m->flag & UNSIGNED)
01324                 matched = v < l;
01325             else
01326                 matched = (long) v < (long) l;
01327             break;
01328 
01329         case '&':
01330             matched = (v & l) == l;
01331             break;
01332 
01333         case '^':
01334             matched = (v & l) != l;
01335             break;
01336 
01337         default:
01338             matched = 0;
01339             kdError(7018) << "mcheck: can't happen: invalid relation " << m->reln << "." << endl;
01340             break;  /* NOTREACHED */
01341     }
01342 
01343     return matched;
01344 }
01345 
01346 /*
01347  * magic_process - process input file fn. Opens the file and reads a
01348  * fixed-size buffer to begin processing the contents.
01349  */
01350 
01351 void process(struct config_rec* conf, const QString & fn)
01352 {
01353     int fd = 0;
01354     unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */
01355     KDE_struct_stat sb;
01356     int nbytes = 0;         /* number of bytes read from a datafile */
01357         int tagbytes = 0;       /* size of prefixed tag */
01358         QCString fileName = QFile::encodeName( fn );
01359 
01360     /*
01361      * first try judging the file based on its filesystem status
01362      */
01363     if (fsmagic(conf, fileName, &sb) != 0) {
01364         //resultBuf += "\n";
01365         return;
01366     }
01367     if ((fd = KDE_open(fileName, O_RDONLY)) < 0) {
01368         /* We can't open it, but we were able to stat it. */
01369         /*
01370          * if (sb.st_mode & 0002) addResult("writable, ");
01371          * if (sb.st_mode & 0111) addResult("executable, ");
01372          */
01373         //kdDebug(7018) << "can't read `" << fn << "' (" << strerror(errno) << ")." << endl;
01374         conf->resultBuf = MIME_BINARY_UNREADABLE;
01375         return;
01376     }
01377     /*
01378      * try looking at the first HOWMANY bytes
01379      */
01380     if ((nbytes = read(fd, (char *) buf, HOWMANY)) == -1) {
01381         kdError(7018) << "" << fn << " read failed (" << strerror(errno) << ")." << endl;
01382         conf->resultBuf = MIME_BINARY_UNREADABLE;
01383         (void)close(fd);
01384         return;
01385     }
01386         if ((tagbytes = tagmagic(buf, nbytes))) {
01387         // Read buffer at new position
01388         lseek(fd, tagbytes, SEEK_SET);
01389         nbytes = read(fd, (char*)buf, HOWMANY);
01390         if (nbytes < 0) {
01391             conf->resultBuf = MIME_BINARY_UNREADABLE;
01392             (void)close(fd);
01393             return;
01394         }
01395         }
01396     if (nbytes == 0) {
01397         conf->resultBuf = MIME_BINARY_ZEROSIZE;
01398     } else {
01399         buf[nbytes++] = '\0';   /* null-terminate it */
01400         tryit(conf, buf, nbytes);
01401     }
01402 
01403         if ( conf->utimeConf && conf->utimeConf->restoreAccessTime( fn ) )
01404         {
01405             /*
01406              * Try to restore access, modification times if read it.
01407              * This changes the "change" time (ctime), but we can't do anything
01408              * about that.
01409              */
01410             struct utimbuf utbuf;
01411             utbuf.actime = sb.st_atime;
01412             utbuf.modtime = sb.st_mtime;
01413             (void) utime(fileName, &utbuf);
01414         }
01415     (void) close(fd);
01416 }
01417 
01418 
01419 static void tryit(struct config_rec* conf, unsigned char *buf, int nb)
01420 {
01421     /* try tests in /etc/magic (or surrogate magic file) */
01422     if (match(conf, buf, nb))
01423         return;
01424 
01425     /* try known keywords, check for ascii-ness too. */
01426     if (ascmagic(conf, buf, nb) == 1)
01427         return;
01428 
01429         /* see if it's plain text */
01430         if (textmagic(conf, buf, nb))
01431                 return;
01432 
01433     /* abandon hope, all ye who remain here */
01434     conf->resultBuf = MIME_BINARY_UNKNOWN;
01435     conf->accuracy = 0;
01436 }
01437 
01438 static int
01439 fsmagic(struct config_rec* conf, const char *fn, KDE_struct_stat *sb)
01440 {
01441     int ret = 0;
01442 
01443     /*
01444      * Fstat is cheaper but fails for files you don't have read perms on.
01445      * On 4.2BSD and similar systems, use lstat() to identify symlinks.
01446      */
01447     ret = KDE_lstat(fn, sb);  /* don't merge into if; see "ret =" above */
01448 
01449     if (ret) {
01450         return 1;
01451 
01452     }
01453     /*
01454      * if (sb->st_mode & S_ISUID) resultBuf += "setuid ";
01455      * if (sb->st_mode & S_ISGID) resultBuf += "setgid ";
01456      * if (sb->st_mode & S_ISVTX) resultBuf += "sticky ";
01457      */
01458 
01459     switch (sb->st_mode & S_IFMT) {
01460     case S_IFDIR:
01461         conf->resultBuf = MIME_INODE_DIR;
01462         return 1;
01463     case S_IFCHR:
01464         conf->resultBuf = MIME_INODE_CDEV;
01465         return 1;
01466     case S_IFBLK:
01467         conf->resultBuf = MIME_INODE_BDEV;
01468         return 1;
01469         /* TODO add code to handle V7 MUX and Blit MUX files */
01470 #ifdef    S_IFIFO
01471     case S_IFIFO:
01472         conf->resultBuf = MIME_INODE_FIFO;
01473         return 1;
01474 #endif
01475 #ifdef    S_IFLNK
01476     case S_IFLNK:
01477     {
01478         char buf[BUFSIZ + BUFSIZ + 4];
01479         register int nch;
01480         KDE_struct_stat tstatbuf;
01481 
01482         if ((nch = readlink(fn, buf, BUFSIZ - 1)) <= 0) {
01483             conf->resultBuf = MIME_INODE_LINK;
01484             //conf->resultBuf += "\nunreadable";
01485             return 1;
01486         }
01487         buf[nch] = '\0'; /* readlink(2) forgets this */
01488         /* If broken symlink, say so and quit early. */
01489         if (*buf == '/') {
01490             if (KDE_stat(buf, &tstatbuf) < 0) {
01491                 conf->resultBuf = MIME_INODE_LINK;
01492                 //conf->resultBuf += "\nbroken";
01493                 return 1;
01494             }
01495         } else {
01496             char *tmp;
01497             char buf2[BUFSIZ + BUFSIZ + 4];
01498 
01499             strncpy(buf2, fn, BUFSIZ);
01500             buf2[BUFSIZ] = 0;
01501 
01502             if ((tmp = strrchr(buf2, '/')) == NULL) {
01503                 tmp = buf; /* in current dir */
01504             } else {
01505                 /* dir part plus (rel.) link */
01506                 *++tmp = '\0';
01507                 strcat(buf2, buf);
01508                 tmp = buf2;
01509             }
01510             if (KDE_stat(tmp, &tstatbuf) < 0) {
01511                 conf->resultBuf = MIME_INODE_LINK;
01512                 //conf->resultBuf += "\nbroken";
01513                 return 1;
01514             } else
01515                 strcpy(buf, tmp);
01516         }
01517         if (conf->followLinks)
01518             process( conf, QFile::decodeName( buf ) );
01519         else
01520             conf->resultBuf = MIME_INODE_LINK;
01521         return 1;
01522     }
01523     return 1;
01524 #endif
01525 #ifdef    S_IFSOCK
01526 #ifndef __COHERENT__
01527     case S_IFSOCK:
01528         conf->resultBuf = MIME_INODE_SOCK;
01529         return 1;
01530 #endif
01531 #endif
01532     case S_IFREG:
01533         break;
01534     default:
01535         kdError(7018) << "KMimeMagic::fsmagic: invalid mode 0" << sb->st_mode << "." << endl;
01536         /* NOTREACHED */
01537     }
01538 
01539     /*
01540      * regular file, check next possibility
01541      */
01542     if (sb->st_size == 0) {
01543         conf->resultBuf = MIME_BINARY_ZEROSIZE;
01544         return 1;
01545     }
01546     return 0;
01547 }
01548 
01549 /*
01550  * Go through the whole list, stopping if you find a match.  Process all the
01551  * continuations of that match before returning.
01552  *
01553  * We support multi-level continuations:
01554  *
01555  * At any time when processing a successful top-level match, there is a current
01556  * continuation level; it represents the level of the last successfully
01557  * matched continuation.
01558  *
01559  * Continuations above that level are skipped as, if we see one, it means that
01560  * the continuation that controls them - i.e, the lower-level continuation
01561  * preceding them - failed to match.
01562  *
01563  * Continuations below that level are processed as, if we see one, it means
01564  * we've finished processing or skipping higher-level continuations under the
01565  * control of a successful or unsuccessful lower-level continuation, and are
01566  * now seeing the next lower-level continuation and should process it.  The
01567  * current continuation level reverts to the level of the one we're seeing.
01568  *
01569  * Continuations at the current level are processed as, if we see one, there's
01570  * no lower-level continuation that may have failed.
01571  *
01572  * If a continuation matches, we bump the current continuation level so that
01573  * higher-level continuations are processed.
01574  */
01575 static int
01576 match(struct config_rec* conf, unsigned char *s, int nbytes)
01577 {
01578     int cont_level = 0;
01579     union VALUETYPE p;
01580     struct magic *m;
01581 
01582 #ifdef DEBUG_MIMEMAGIC
01583     kdDebug(7018) << "match: conf=" << conf << " m=" << (conf->magic ? "set" : "NULL") << " m->next=" << ((conf->magic && conf->magic->next) ? "set" : "NULL") << " last=" << (conf->last ? "set" : "NULL") << endl;
01584     for (m = conf->magic; m; m = m->next) {
01585         if (isprint((((unsigned long) m) >> 24) & 255) &&
01586             isprint((((unsigned long) m) >> 16) & 255) &&
01587             isprint((((unsigned long) m) >> 8) & 255) &&
01588             isprint(((unsigned long) m) & 255)) {
01589             kdDebug(7018) << "match: POINTER CLOBBERED! " << endl;
01590             break;
01591         }
01592     }
01593 #endif
01594 
01595     for (m = conf->magic; m; m = m->next) {
01596 #ifdef DEBUG_MIMEMAGIC
01597         kdDebug(7018) << "match: line=" << m->lineno << " desc=" << m->desc << endl;
01598 #endif
01599         memset(&p, 0, sizeof(union VALUETYPE));
01600 
01601         /* check if main entry matches */
01602         if (!mget(&p, s, m, nbytes) ||
01603             !mcheck(&p, m)) {
01604             struct magic *m_cont;
01605 
01606             /*
01607              * main entry didn't match, flush its continuations
01608              */
01609             if (!m->next || (m->next->cont_level == 0)) {
01610                 continue;
01611             }
01612             m_cont = m->next;
01613             while (m_cont && (m_cont->cont_level != 0)) {
01614 #ifdef DEBUG_MIMEMAGIC
01615                 kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m_cont->cont_level << " mc=" << m_cont->lineno << " mc->next=" << m_cont << " " << endl;
01616 #endif
01617                 /*
01618                  * this trick allows us to keep *m in sync
01619                  * when the continue advances the pointer
01620                  */
01621                 m = m_cont;
01622                 m_cont = m_cont->next;
01623             }
01624             continue;
01625         }
01626         /* if we get here, the main entry rule was a match */
01627         /* this will be the last run through the loop */
01628 #ifdef DEBUG_MIMEMAGIC
01629         kdDebug(7018) << "match: rule matched, line=" << m->lineno << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01630 #endif
01631 
01632         /* remember the match */
01633         conf->resultBuf = m->desc;
01634 
01635         cont_level++;
01636         /*
01637          * while (m && m->next && m->next->cont_level != 0 && ( m =
01638          * m->next ))
01639          */
01640         m = m->next;
01641         while (m && (m->cont_level != 0)) {
01642 #ifdef DEBUG_MIMEMAGIC
01643                     kdDebug(7018) << "match: line=" << m->lineno << " cont=" << m->cont_level << " type=" << m->type << " " << ((m->type == STRING) ? m->value.s : "") << endl;
01644 #endif
01645                     if (cont_level >= m->cont_level) {
01646                 if (cont_level > m->cont_level) {
01647                     /*
01648                      * We're at the end of the level
01649                      * "cont_level" continuations.
01650                      */
01651                     cont_level = m->cont_level;
01652                 }
01653                 if (mget(&p, s, m, nbytes) &&
01654                     mcheck(&p, m)) {
01655                     /*
01656                      * This continuation matched. Print
01657                      * its message, with a blank before
01658                      * it if the previous item printed
01659                      * and this item isn't empty.
01660                      */
01661 #ifdef DEBUG_MIMEMAGIC
01662                                     kdDebug(7018) << "continuation matched" << endl;
01663 #endif
01664                                     conf->resultBuf = m->desc;
01665                     cont_level++;
01666                 }
01667             }
01668             /* move to next continuation record */
01669             m = m->next;
01670         }
01671                 // KDE-specific: need an actual mimetype for a real match
01672                 // If we only matched a rule with continuations but no mimetype, it's not a match
01673                 if ( !conf->resultBuf.isEmpty() )
01674                 {
01675 #ifdef DEBUG_MIMEMAGIC
01676                     kdDebug(7018) << "match: matched" << endl;
01677 #endif
01678                     return 1;       /* all through */
01679                 }
01680     }
01681 #ifdef DEBUG_MIMEMAGIC
01682     kdDebug(7018) << "match: failed" << endl;
01683 #endif
01684     return 0;               /* no match at all */
01685 }
01686 
01687 // Try to parse prefixed tags before matching on content
01688 // Sofar only ID3v2 tags (<=.4) are handled
01689 static int tagmagic(unsigned char *buf, int nbytes)
01690 {
01691     if(nbytes<40) return 0;
01692     if(buf[0] == 'I' && buf[1] == 'D' && buf[2] == '3') {
01693         int size = 10;
01694         // Sanity (known version, no unknown flags)
01695         if(buf[3] > 4) return 0;
01696         if(buf[5] & 0x0F) return 0;
01697         // Tag has v4 footer
01698         if(buf[5] & 0x10) size += 10;
01699         // Calculated syncsafe size
01700         size += buf[9];
01701         size += buf[8] << 7;
01702         size += buf[7] << 14;
01703         size += buf[6] << 21;
01704         return size;
01705     }
01706     return 0;
01707 }
01708 
01709 struct Token {
01710     char *data;
01711     int length;
01712 };
01713 
01714 struct Tokenizer
01715 {
01716     Tokenizer(char* buf, int nbytes) {
01717         data = buf;
01718         length = nbytes;
01719         pos = 0;
01720     }
01721     bool isNewLine() {
01722         return newline;
01723     }
01724     Token* nextToken() {
01725         if (pos == 0)
01726             newline = true;
01727         else
01728             newline = false;
01729         token.data = data+pos;
01730         token.length = 0;
01731         while(pos<length) {
01732             switch (data[pos]) {
01733                 case '\n':
01734                     newline = true;
01735                 case '\0':
01736                 case '\t':
01737                 case ' ':
01738                 case '\r':
01739                 case '\f':
01740                 case ',':
01741                 case ';':
01742                 case '>':
01743                     if (token.length == 0) token.data++;
01744                     else
01745                         return &token;
01746                     break;
01747                 default:
01748                     token.length++;
01749             }
01750             pos++;
01751         }
01752         return &token;
01753     }
01754 
01755 private:
01756     Token token;
01757     char* data;
01758     int length;
01759     int pos;
01760     bool newline;
01761 };
01762 
01763 
01764 /* an optimization over plain strcmp() */
01765 //#define    STREQ(a, b)    (*(a) == *(b) && strcmp((a), (b)) == 0)
01766 static inline bool STREQ(const Token *token, const char *b) {
01767     const char *a = token->data;
01768     int len = token->length;
01769     if (a == b) return true;
01770     while(*a && *b && len > 0) {
01771         if (*a != *b) return false;
01772         a++; b++; len--;
01773     }
01774     return (len == 0 && *b == 0);
01775 }
01776 
01777 static int ascmagic(struct config_rec* conf, unsigned char *buf, int nbytes)
01778 {
01779     int i;
01780     double pct, maxpct, pctsum;
01781     double pcts[NTYPES];
01782     int mostaccurate, tokencount;
01783     int typeset, jonly, conly, jconly, objconly, cpponly;
01784     int has_escapes = 0;
01785     //unsigned char *s;
01786     //char nbuf[HOWMANY + 1]; /* one extra for terminating '\0' */
01787 
01788     /* these are easy, do them first */
01789     conf->accuracy = 70;
01790 
01791     /*
01792      * for troff, look for . + letter + letter or .\"; this must be done
01793      * to disambiguate tar archives' ./file and other trash from real
01794      * troff input.
01795      */
01796     if (*buf == '.') {
01797         unsigned char *tp = buf + 1;
01798 
01799         while (isascii(*tp) && isspace(*tp))
01800             ++tp;   /* skip leading whitespace */
01801         if ((isascii(*tp) && (isalnum(*tp) || *tp == '\\') &&
01802              isascii(*(tp + 1)) && (isalnum(*(tp + 1)) || *tp == '"'))) {
01803             conf->resultBuf = MIME_APPL_TROFF;
01804             return 1;
01805         }
01806     }
01807     if ((*buf == 'c' || *buf == 'C') &&
01808         isascii(*(buf + 1)) && isspace(*(buf + 1))) {
01809         /* Fortran */
01810         conf->resultBuf = MIME_TEXT_FORTRAN;
01811         return 1;
01812     }
01813     assert(nbytes-1 < HOWMANY + 1);
01814     /* look for tokens - this is expensive! */
01815     has_escapes = (memchr(buf, '\033', nbytes) != NULL);
01816         Tokenizer tokenizer((char*)buf, nbytes);
01817         const Token* token;
01818         bool linecomment = false, blockcomment = false;
01819     const struct names *p;
01820     int typecount[NTYPES];
01821 /*
01822  * Fritz:
01823  * Try a little harder on C/C++/Java.
01824  */
01825     memset(&typecount, 0, sizeof(typecount));
01826     typeset = 0;
01827     jonly = 0;
01828     conly = 0;
01829     jconly = 0;
01830     objconly = 0;
01831     cpponly = 0;
01832     tokencount = 0;
01833         bool foundClass = false; // mandatory for java
01834     // first collect all possible types and count matches
01835         // we stop at '>' too, because of "<title>blah</title>" on HTML pages
01836     while ((token = tokenizer.nextToken())->length > 0) {
01837 #ifdef DEBUG_MIMEMAGIC
01838             kdDebug(7018) << "KMimeMagic::ascmagic token=" << token << endl;
01839 #endif
01840             if (linecomment && tokenizer.isNewLine())
01841                 linecomment = false;
01842             if (blockcomment && STREQ(token, "*/")) {
01843                 blockcomment = false;
01844                 continue;
01845             }
01846             for (p = names; p->name ; p++) {
01847                 if (STREQ(token, p->name)) {
01848 #ifdef DEBUG_MIMEMAGIC
01849                     kdDebug(7018) << "KMimeMagic::ascmagic token matches ! name=" << p->name << " type=" << p->type << endl;
01850 #endif
01851                     tokencount++;
01852                     typeset |= p->type;
01853                     if(p->type & (L_C|L_CPP|L_JAVA|L_OBJC)) {
01854                         if (linecomment || blockcomment) {
01855                             continue;
01856                         }
01857                         else {
01858                             switch(p->type & (L_C|L_CPP|L_JAVA|L_OBJC))
01859                             {
01860                 case L_JAVA:
01861                     jonly++;
01862                     break;
01863                 case L_OBJC:
01864                     objconly++;
01865                     break;
01866                 case L_CPP:
01867                     cpponly++;
01868                     break;
01869                 case (L_CPP|L_JAVA):
01870                     jconly++;
01871                                         if ( !foundClass && STREQ(token, "class") )
01872                                             foundClass = true;
01873                     break;
01874                 case (L_C|L_CPP):
01875                     conly++;
01876                     break;
01877                 default:
01878                                     if (STREQ(token, "//")) linecomment = true;
01879                                     if (STREQ(token, "/*")) blockcomment = true;
01880                             }
01881             }
01882                     }
01883                     for (i = 0; i < (int)NTYPES; i++) {
01884                         if ((1 << i) & p->type) typecount[i]++;
01885                     }
01886         }
01887             }
01888     }
01889 
01890     if (typeset & (L_C|L_CPP|L_JAVA|L_OBJC)) {
01891         conf->accuracy = 60;
01892             if (!(typeset & ~(L_C|L_CPP|L_JAVA|L_OBJC))) {
01893 #ifdef DEBUG_MIMEMAGIC
01894                         kdDebug(7018) << "C/C++/Java/ObjC: jonly=" << jonly << " conly=" << conly << " jconly=" << jconly << " objconly=" << objconly << endl;
01895 #endif
01896             if (jonly > 1 && foundClass) {
01897                 // At least two java-only tokens have matched, including "class"
01898                 conf->resultBuf = QString(types[P_JAVA].type);
01899                 return 1;
01900             }
01901             if (jconly > 1) {
01902                 // At least two non-C (only C++ or Java) token have matched.
01903                 if (typecount[P_JAVA] < typecount[P_CPP])
01904                   conf->resultBuf = QString(types[P_CPP].type);
01905                 else
01906                   conf->resultBuf = QString(types[P_JAVA].type);
01907                 return 1;
01908             }
01909                         if (conly + cpponly > 1) {
01910                  // Either C or C++.
01911                       if (cpponly > 0)
01912                                 conf->resultBuf = QString(types[P_CPP].type);
01913                               else
01914                                 conf->resultBuf = QString(types[P_C].type);
01915                               return 1;
01916                         }
01917             if (objconly > 0) {
01918                 conf->resultBuf =  QString(types[P_OBJC].type);
01919                 return 1;
01920             }
01921           }
01922     }
01923 
01924     /* Neither C, C++ or Java (or all of them without able to distinguish):
01925      * Simply take the token-class with the highest
01926      * matchcount > 0
01927      */
01928     mostaccurate = -1;
01929     maxpct = pctsum = 0.0;
01930     for (i = 0; i < (int)NTYPES; i++) {
01931       if (typecount[i] > 1) { // one word is not enough, we need at least two
01932         pct = (double)typecount[i] / (double)types[i].kwords *
01933             (double)types[i].weight;
01934         pcts[i] = pct;
01935         pctsum += pct;
01936         if (pct > maxpct) {
01937             maxpct = pct;
01938             mostaccurate = i;
01939           }
01940 #ifdef DEBUG_MIMEMAGIC
01941           kdDebug(7018) << "" << types[i].type << " has " << typecount[i] << " hits, " << types[i].kwords << " kw, weight " << types[i].weight << ", " << pct << " -> max = " << maxpct << "\n" << endl;
01942 #endif
01943       }
01944     }
01945     if (mostaccurate >= 0) {
01946             if ( mostaccurate != P_JAVA || foundClass ) // 'class' mandatory for java
01947             {
01948         conf->accuracy = (int)(pcts[mostaccurate] / pctsum * 60);
01949 #ifdef DEBUG_MIMEMAGIC
01950                 kdDebug(7018) << "mostaccurate=" << mostaccurate << " pcts=" << pcts[mostaccurate] << " pctsum=" << pctsum << " accuracy=" << conf->accuracy << endl;
01951 #endif
01952         conf->resultBuf = QString(types[mostaccurate].type);
01953         return 1;
01954             }
01955     }
01956 
01957     switch (is_tar(buf, nbytes)) {
01958         case 1:
01959             /* V7 tar archive */
01960             conf->resultBuf = MIME_APPL_TAR;
01961             conf->accuracy = 90;
01962             return 1;
01963         case 2:
01964             /* POSIX tar archive */
01965             conf->resultBuf = MIME_APPL_TAR;
01966             conf->accuracy = 90;
01967             return 1;
01968     }
01969 
01970     for (i = 0; i < nbytes; i++) {
01971         if (!isascii(*(buf + i)))
01972             return 0;   /* not all ascii */
01973     }
01974 
01975     /* all else fails, but it is ascii... */
01976     conf->accuracy = 90;
01977     if (has_escapes) {
01978         /* text with escape sequences */
01979         /* we leave this open for further differentiation later */
01980         conf->resultBuf = MIME_TEXT_UNKNOWN;
01981     } else {
01982         /* plain text */
01983         conf->resultBuf = MIME_TEXT_PLAIN;
01984     }
01985     return 1;
01986 }
01987 
01988 /* Maximal length of a line we consider "reasonable". */
01989 #define TEXT_MAXLINELEN 300
01990 
01991 // This code is taken from the "file" command, where it is licensed
01992 // in the "beer-ware license" :-)
01993 // Original author: <joerg@FreeBSD.ORG>
01994 // Simplified by David Faure to avoid the static array char[256].
01995 static int textmagic(struct config_rec* conf, unsigned char * buf, int nbytes)
01996 {
01997     int i;
01998     unsigned char *cp;
01999 
02000     nbytes--;
02001 
02002     /* First, look whether there are "unreasonable" characters. */
02003     for (i = 0, cp = buf; i < nbytes; i++, cp++)
02004         if ((*cp < 8) || (*cp>13 && *cp<32 && *cp!=27 ) || (*cp==0x7F))
02005             return 0;
02006 
02007     /* Now, look whether the file consists of lines of
02008      * "reasonable" length. */
02009 
02010     for (i = 0; i < nbytes;) {
02011         cp = (unsigned char *) memchr(buf, '\n', nbytes - i);
02012         if (cp == NULL) {
02013             /* Don't fail if we hit the end of buffer. */
02014             if (i + TEXT_MAXLINELEN >= nbytes)
02015                 break;
02016             else
02017                 return 0;
02018         }
02019         if (cp - buf > TEXT_MAXLINELEN)
02020             return 0;
02021         i += (cp - buf + 1);
02022         buf = cp + 1;
02023     }
02024     conf->resultBuf = MIME_TEXT_PLAIN;
02025     return 1;
02026 }
02027 
02028 
02029 /*
02030  * is_tar() -- figure out whether file is a tar archive.
02031  *
02032  * Stolen (by author of file utility) from the public domain tar program: Public
02033  * Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
02034  *
02035  * @(#)list.c 1.18 9/23/86 Public Domain - gnu $Id: mod_mime_magic.c,v 1.7
02036  * 1997/06/24 00:41:02 ikluft Exp ikluft $
02037  *
02038  * Comments changed and some code/comments reformatted for file command by Ian
02039  * Darwin.
02040  */
02041 
02042 #define    isodigit(c)    ( ((c) >= '0') && ((c) <= '7') )
02043 
02044 /*
02045  * Return 0 if the checksum is bad (i.e., probably not a tar archive), 1 for
02046  * old UNIX tar file, 2 for Unix Std (POSIX) tar file.
02047  */
02048 
02049 static int
02050 is_tar(unsigned char *buf, int nbytes)
02051 {
02052     register union record *header = (union record *) buf;
02053     register int i;
02054     register long sum,
02055      recsum;
02056     register char *p;
02057 
02058     if (nbytes < (int)sizeof(union record))
02059          return 0;
02060 
02061     recsum = from_oct(8, header->header.chksum);
02062 
02063     sum = 0;
02064     p = header->charptr;
02065     for (i = sizeof(union record); --i >= 0;) {
02066         /*
02067          * We can't use unsigned char here because of old compilers,
02068          * e.g. V7.
02069          */
02070         sum += 0xFF & *p++;
02071     }
02072 
02073     /* Adjust checksum to count the "chksum" field as blanks. */
02074     for (i = sizeof(header->header.chksum); --i >= 0;)
02075         sum -= 0xFF & header->header.chksum[i];
02076     sum += ' ' * sizeof header->header.chksum;
02077 
02078     if (sum != recsum)
02079         return 0;       /* Not a tar archive */
02080 
02081     if (0 == strcmp(header->header.magic, TMAGIC))
02082         return 2;       /* Unix Standard tar archive */
02083 
02084     return 1;               /* Old fashioned tar archive */
02085 }
02086 
02087 
02088 /*
02089  * Quick and dirty octal conversion.
02090  *
02091  * Result is -1 if the field is invalid (all blank, or nonoctal).
02092  */
02093 static long
02094 from_oct(int digs, char *where)
02095 {
02096     register long value;
02097 
02098     while (isspace(*where)) {   /* Skip spaces */
02099         where++;
02100         if (--digs <= 0)
02101             return -1;  /* All blank field */
02102     }
02103     value = 0;
02104     while (digs > 0 && isodigit(*where)) {  /* Scan til nonoctal */
02105         value = (value << 3) | (*where++ - '0');
02106         --digs;
02107     }
02108 
02109     if (digs > 0 && *where && !isspace(*where))
02110         return -1;      /* Ended on non-space/nul */
02111 
02112     return value;
02113 }
02114 
02115 KMimeMagic::KMimeMagic()
02116 {
02117     // Magic file detection init
02118     QString mimefile = locate( "mime", "magic" );
02119     init( mimefile );
02120     // Add snippets from share/config/magic/*
02121     QStringList snippets = KGlobal::dirs()->findAllResources( "config", "magic/*.magic", true );
02122     for ( QStringList::Iterator it = snippets.begin() ; it != snippets.end() ; ++it )
02123         if ( !mergeConfig( *it ) )
02124             kdWarning() << k_funcinfo << "Failed to parse " << *it << endl;
02125 }
02126 
02127 KMimeMagic::KMimeMagic(const QString & _configfile)
02128 {
02129     init( _configfile );
02130 }
02131 
02132 void KMimeMagic::init( const QString& _configfile )
02133 {
02134     int result;
02135     conf = new config_rec;
02136 
02137     /* set up the magic list (empty) */
02138     conf->magic = conf->last = NULL;
02139     magicResult = NULL;
02140     conf->followLinks = false;
02141 
02142         conf->utimeConf = 0L; // created on demand
02143     /* on the first time through we read the magic file */
02144     result = apprentice(_configfile);
02145     if (result == -1)
02146         return;
02147 #ifdef MIME_MAGIC_DEBUG_TABLE
02148     test_table();
02149 #endif
02150 }
02151 
02152 /*
02153  * The destructor.
02154  * Free the magic-table and other resources.
02155  */
02156 KMimeMagic::~KMimeMagic()
02157 {
02158     if (conf) {
02159         struct magic *p = conf->magic;
02160         struct magic *q;
02161         while (p) {
02162             q = p;
02163             p = p->next;
02164             free(q);
02165         }
02166                 delete conf->utimeConf;
02167         delete conf;
02168     }
02169         delete magicResult;
02170 }
02171 
02172 bool
02173 KMimeMagic::mergeConfig(const QString & _configfile)
02174 {
02175     kdDebug(7018) << k_funcinfo << _configfile << endl;
02176     int result;
02177 
02178     if (_configfile.isEmpty())
02179         return false;
02180     result = apprentice(_configfile);
02181     if (result == -1) {
02182         return false;
02183     }
02184 #ifdef MIME_MAGIC_DEBUG_TABLE
02185     test_table();
02186 #endif
02187     return true;
02188 }
02189 
02190 bool
02191 KMimeMagic::mergeBufConfig(char * _configbuf)
02192 {
02193     int result;
02194 
02195     if (conf) {
02196         result = buff_apprentice(_configbuf);
02197         if (result == -1)
02198             return false;
02199 #ifdef MIME_MAGIC_DEBUG_TABLE
02200         test_table();
02201 #endif
02202         return true;
02203     }
02204     return false;
02205 }
02206 
02207 void
02208 KMimeMagic::setFollowLinks( bool _enable )
02209 {
02210     conf->followLinks = _enable;
02211 }
02212 
02213 KMimeMagicResult *
02214 KMimeMagic::findBufferType(const QByteArray &array)
02215 {
02216     unsigned char buf[HOWMANY + 1]; /* one extra for terminating '\0' */
02217 
02218     conf->resultBuf = QString::null;
02219     if ( !magicResult )
02220       magicResult = new KMimeMagicResult();
02221     magicResult->setInvalid();
02222     conf->accuracy = 100;
02223 
02224     int nbytes = array.size();
02225 
02226         if (nbytes > HOWMANY)
02227                 nbytes = HOWMANY;
02228         memcpy(buf, array.data(), nbytes);
02229         if (nbytes == 0) {
02230                 conf->resultBuf = MIME_BINARY_ZEROSIZE;
02231         } else {
02232                 buf[nbytes++] = '\0';   /* null-terminate it */
02233                 tryit(conf, buf, nbytes);
02234         }
02235         /* if we have any results, put them in the request structure */
02236     magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02237     magicResult->setAccuracy(conf->accuracy);
02238         return magicResult;
02239 }
02240 
02241 static void
02242 refineResult(KMimeMagicResult *r, const QString & _filename)
02243 {
02244     QString tmp = r->mimeType();
02245     if (tmp.isEmpty())
02246         return;
02247     if ( tmp == "text/x-c" || tmp == "text/x-objc" )
02248     {
02249         if ( _filename.right(2) == ".h" )
02250             tmp += "hdr";
02251         else
02252             tmp += "src";
02253         r->setMimeType(tmp);
02254     }
02255     else
02256     if ( tmp == "text/x-c++" )
02257     {
02258         if ( _filename.endsWith(".h")
02259           || _filename.endsWith(".hh")
02260           || _filename.endsWith(".H")
02261           || !_filename.right(4).contains('.'))
02262             tmp += "hdr";
02263         else
02264             tmp += "src";
02265         r->setMimeType(tmp);
02266     }
02267     else
02268     if ( tmp == "application/x-sharedlib" )
02269     {
02270         if ( _filename.find( ".so" ) == -1 ) 
02271         {
02272             tmp = "application/x-executable";
02273             r->setMimeType( tmp );
02274         }
02275     }
02276 }
02277 
02278 KMimeMagicResult *
02279 KMimeMagic::findBufferFileType( const QByteArray &data,
02280                 const QString &fn)
02281 {
02282         KMimeMagicResult * r = findBufferType( data );
02283     refineResult(r, fn);
02284         return r;
02285 }
02286 
02287 /*
02288  * Find the content-type of the given file.
02289  */
02290 KMimeMagicResult* KMimeMagic::findFileType(const QString & fn)
02291 {
02292 #ifdef DEBUG_MIMEMAGIC
02293     kdDebug(7018) << "KMimeMagic::findFileType " << fn << endl;
02294 #endif
02295     conf->resultBuf = QString::null;
02296 
02297         if ( !magicResult )
02298       magicResult = new KMimeMagicResult();
02299     magicResult->setInvalid();
02300     conf->accuracy = 100;
02301 
02302         if ( !conf->utimeConf )
02303             conf->utimeConf = new KMimeMagicUtimeConf();
02304 
02305         /* process it based on the file contents */
02306         process(conf, fn );
02307 
02308         /* if we have any results, put them in the request structure */
02309         //finishResult();
02310     magicResult->setMimeType(conf->resultBuf.stripWhiteSpace());
02311     magicResult->setAccuracy(conf->accuracy);
02312     refineResult(magicResult, fn);
02313         return magicResult;
02314 }
KDE Home | KDE Accessibility Home | Description of Access Keys