* Initial import of r_magic

- Imported code from OpenBSD trunk
  - Remove the elf part (-1KLOC)
  - Some refactoring in order to be more r2-like
  - fixed warning messages
  - readdir one can be dangerous /cc @vect01
  - Remove libmagic dependency
* Rename asm.case into asm.ucase
This commit is contained in:
pancake 2011-09-10 01:30:03 +02:00
parent 0357ddf18e
commit 323441c122
30 changed files with 8764 additions and 42 deletions

13
configure vendored
View File

@ -152,7 +152,6 @@ Optional Features:
--without-debugger disable native debugger features
--without-ewf disable EWF dependency
--without-gmp disable GMP dependency
--without-magic disable magic dependency
--without-ssl disable openssl dependency
--with-little-endian force little endian
--with-big-endian force big endian
@ -209,7 +208,7 @@ echo "LANGS: c"
echo "REQUIRED: libdl"
echo "OPTIONAL: libewf"
echo "PKG-CONFIG: openssl"
echo "FLAGS: --without-debugger --without-ewf --without-gmp --without-magic --without-ssl --with-little-endian --with-big-endian --with-compiler=gcc --with-ostype=auto --without-pic --with-nonpic --with-libversion=xxx"
echo "FLAGS: --without-debugger --without-ewf --without-gmp --without-ssl --with-little-endian --with-big-endian --with-compiler=gcc --with-ostype=auto --without-pic --with-nonpic --with-libversion=xxx"
exit 0
;;
"--cache-file")
@ -254,7 +253,6 @@ echo "FLAGS: --without-debugger --without-ewf --without-gmp --without-magic
"--without-debugger") DEBUGGER="0"; ;;
"--without-ewf") HAVE_EWF="0"; ;;
"--without-gmp") HAVE_GMP="0"; ;;
"--without-magic") HAVE_MAGIC="0"; ;;
"--without-ssl") HAVE_OPENSSL="0"; ;;
"--with-little-endian") HAVE_LILEND="1"; ;;
"--with-big-endian") HAVE_BIGEND="1"; ;;
@ -280,7 +278,7 @@ parse_options $1
shift
done
ENVWORDS="MANDIR INFODIR LIBDIR INCLUDEDIR LOCALSTATEDIR SYSCONFDIR DATADIR LIBEXECDIR SBINDIR BINDIR EPREFIX PREFIX SPREFIX TARGET HOST BUILD INSTALL INSTALL_LIB INSTALL_MAN INSTALL_PROGRAM INSTALL_DIR INSTALL_SCRIPT INSTALL_DATA HOST_OS HOST_CPU BUILD_OS BUILD_CPU TARGET_OS TARGET_CPU PKGNAME VPATH VERSION CONTACT CONTACT_NAME CONTACT_MAIL CC CFLAGS LDFLAGS HAVE_LANG_C DEBUGGER HAVE_LIB_DL DL_LIBS LIL_ENDIAN BIG_ENDIAN BYTEORDER HAVE_LIB_EWF HAVE_EWF HAVE_LIB_GMP HAVE_GMP HAVE_LIB_MAGIC HAVE_MAGIC HAVE_LIB_SSL SSL_CFLAGS SSL_LDFLAGS HAVE_PKGCFG_OPENSSL HAVE_OPENSSL HAVE_LILEND HAVE_BIGEND HAVE_LIB_LUA5_1 HAVE_LIB_TCC USERCC USEROSTYPE WITHPIC WITHNONPIC LIBVERSION"
ENVWORDS="MANDIR INFODIR LIBDIR INCLUDEDIR LOCALSTATEDIR SYSCONFDIR DATADIR LIBEXECDIR SBINDIR BINDIR EPREFIX PREFIX SPREFIX TARGET HOST BUILD INSTALL INSTALL_LIB INSTALL_MAN INSTALL_PROGRAM INSTALL_DIR INSTALL_SCRIPT INSTALL_DATA HOST_OS HOST_CPU BUILD_OS BUILD_CPU TARGET_OS TARGET_CPU PKGNAME VPATH VERSION CONTACT CONTACT_NAME CONTACT_MAIL CC CFLAGS LDFLAGS HAVE_LANG_C DEBUGGER HAVE_LIB_DL DL_LIBS LIL_ENDIAN BIG_ENDIAN BYTEORDER HAVE_LIB_EWF HAVE_EWF HAVE_LIB_GMP HAVE_GMP HAVE_MAGIC HAVE_LIB_SSL SSL_CFLAGS SSL_LDFLAGS HAVE_PKGCFG_OPENSSL HAVE_OPENSSL HAVE_LILEND HAVE_BIGEND HAVE_LIB_LUA5_1 HAVE_LIB_TCC USERCC USEROSTYPE WITHPIC WITHNONPIC LIBVERSION"
create_environ
@ -360,9 +358,6 @@ HAVE_LIB_EWF="0"; fi
check_library HAVE_LIB_GMP gmp 0
if [ "$HAVE_GMP" = "0" ]; then
HAVE_LIB_GMP="0"; fi
check_library HAVE_LIB_MAGIC magic 0
if [ "$HAVE_MAGIC" = "0" ]; then
HAVE_LIB_MAGIC="0"; fi
check_library HAVE_LIB_SSL ssl 0
printf 'checking pkg-config flags for openssl... '
tmp=`pkg-config --cflags openssl 2>/dev/null`
@ -421,7 +416,7 @@ for A in ${ENVWORDS} ; do
SEDFLAGS="${SEDFLAGS}s,@${A}@,${VAR},g;"
done
SEDFLAGS="${SEDFLAGS}'"
for A in ./config-user.mk libr/include/r_userconf.h pkgcfg/r_io.pc pkgcfg/r_asm.pc pkgcfg/r_bin.pc pkgcfg/r_anal.pc pkgcfg/r_hash.pc pkgcfg/r_cons.pc pkgcfg/r_diff.pc pkgcfg/r_core.pc pkgcfg/r_lang.pc pkgcfg/r_socket.pc pkgcfg/r_debug.pc pkgcfg/r_reg.pc pkgcfg/r_cmd.pc pkgcfg/r_config.pc pkgcfg/r_flags.pc pkgcfg/r_line.pc pkgcfg/r_syscall.pc pkgcfg/r_sign.pc pkgcfg/r_util.pc pkgcfg/r_search.pc pkgcfg/r_bp.pc pkgcfg/r_lib.pc pkgcfg/r_parse.pc pkgcfg/r_print.pc pkgcfg/r_fs.pc pkgcfg/r_db.pc ; do # SUBDIRS
for A in ./config-user.mk libr/include/r_userconf.h pkgcfg/r_io.pc pkgcfg/r_asm.pc pkgcfg/r_bin.pc pkgcfg/r_anal.pc pkgcfg/r_hash.pc pkgcfg/r_cons.pc pkgcfg/r_diff.pc pkgcfg/r_core.pc pkgcfg/r_lang.pc pkgcfg/r_socket.pc pkgcfg/r_debug.pc pkgcfg/r_reg.pc pkgcfg/r_cmd.pc pkgcfg/r_config.pc pkgcfg/r_flags.pc pkgcfg/r_line.pc pkgcfg/r_syscall.pc pkgcfg/r_sign.pc pkgcfg/r_util.pc pkgcfg/r_search.pc pkgcfg/r_bp.pc pkgcfg/r_lib.pc pkgcfg/r_parse.pc pkgcfg/r_print.pc pkgcfg/r_fs.pc pkgcfg/r_db.pc pkgcfg/r_magic.pc ; do # SUBDIRS
if [ -f "${VPATH}/${A}.acr" ]; then
SD_TARGET=${A}
else
@ -455,7 +450,7 @@ done
do_remove
echo
echo "Final report:"
for A in PREFIX HAVE_LIB_EWF HAVE_LIB_GMP HAVE_OPENSSL HAVE_LIB_TCC HAVE_LIB_MAGIC DEBUGGER USERCC USEROSTYPE LIL_ENDIAN VERSION LIBVERSION ; do # REPORT
for A in PREFIX HAVE_LIB_EWF HAVE_LIB_GMP HAVE_OPENSSL HAVE_LIB_TCC DEBUGGER USERCC USEROSTYPE LIL_ENDIAN VERSION LIBVERSION ; do # REPORT
eval VAL="\$${A}"
[ -z "${VAL}" ] && VAL="(null)"
echo " - ${A} = ${VAL}"

View File

@ -18,10 +18,6 @@ CHKLIB gmp
ARG_WITHOUT HAVE_GMP gmp disable GMP dependency ;
IFNOT HAVE_GMP { HAVE_LIB_GMP = 0 ; }
CHKLIB magic
ARG_WITHOUT HAVE_MAGIC magic disable magic dependency ;
IFNOT HAVE_MAGIC { HAVE_LIB_MAGIC = 0 ; }
CHKLIB ssl
PKGCFG SSL_CFLAGS SSL_LDFLAGS openssl
ARG_WITHOUT HAVE_OPENSSL ssl disable openssl dependency ;
@ -83,17 +79,17 @@ IFEQ LIBVERSION xxx ; {
}
REPORT PREFIX HAVE_LIB_EWF HAVE_LIB_GMP HAVE_OPENSSL HAVE_LIB_TCC
HAVE_LIB_MAGIC DEBUGGER USERCC USEROSTYPE LIL_ENDIAN VERSION LIBVERSION ;
DEBUGGER USERCC USEROSTYPE LIL_ENDIAN VERSION LIBVERSION ;
(( deprecated:
pkgcfg/r_th.pc
pkgcfg/r_db.pc
))
(( TODO: Add the rest of .pc files here.. add a rule for acr? ))
SUBDIRS ./config-user.mk
libr/include/r_userconf.h
pkgcfg/r_io.pc
pkgcfg/r_db.pc
pkgcfg/r_magic.pc
pkgcfg/r_asm.pc
pkgcfg/r_bin.pc
pkgcfg/r_anal.pc

View File

@ -8,7 +8,7 @@ IFX=${DESTDIR}/${INCLUDEDIR}
PWD=`pwd`
# Libraries
LIBLIST=util socket db cons line lib io lang flags bin hash config syscall
LIBLIST=util magic socket db cons line lib io lang flags bin hash config syscall
LIBLIST+=cmd reg asm diff anal print parse search egg bp sign debug fs core
#DEPRECATED APIS : LIBLIST+=th crypto
# sysproxy ... common wat du?

View File

@ -2,7 +2,7 @@ NAME=r_core
DEPS=r_config r_cons r_line r_io r_cmd r_util r_print r_flags r_asm r_lib
DEPS+=r_debug r_hash r_bin r_lang r_io r_anal r_parse r_print r_bp
DEPS+=r_reg r_search r_syscall r_sign r_diff r_socket r_fs
DEPS+=r_reg r_search r_syscall r_sign r_diff r_socket r_fs r_magic
OBJ=core.o cmd.o file.o config.o visual.o io.o yank.o libs.o
OBJ+=anal.o project.o gdiff.o asm.o rtr.o vmenus.o disasm.o patch.o
@ -16,7 +16,3 @@ LDFLAGS+=-lpthread
endif
include ../rules.mk
ifeq ($(HAVE_LIB_MAGIC),1)
LDFLAGS+=-lmagic
endif

View File

@ -5,9 +5,6 @@
#include <sys/types.h>
#include <ctype.h>
#include <stdarg.h>
#if HAVE_LIB_MAGIC
#include <magic.h>
#endif
static int printzoomcallback(void *user, int mode, ut64 addr, ut8 *bufz, ut64 size) {
RCore *core = (RCore *) user;
@ -1573,10 +1570,9 @@ static int cmd_info(void *data, const char *input) {
}
static void r_core_magic_at(RCore *core, const char *file, ut64 addr, int depth, int v) {
#if HAVE_LIB_MAGIC
char *fmt, *q, *p;
const char *str;
magic_t ck;
r_magic_t ck;
if (depth--<0)
return;
@ -1584,13 +1580,13 @@ static void r_core_magic_at(RCore *core, const char *file, ut64 addr, int depth,
r_core_seek (core, addr, R_TRUE);
if (*file == ' ') file++;
if (!*file) file = NULL;
ck = magic_open (0);
if (magic_load (ck, file) == -1) {
eprintf ("r_core_magic(\"%s\") %s\n", file, magic_error (ck));
ck = r_magic_open (0);
if (r_magic_load (ck, file) == -1) {
eprintf ("r_core_magic(\"%s\") %s\n", file, r_magic_error (ck));
return;
}
if (v) r_cons_printf ("# pm %s @ 0x%"PFMT64x"\n", file?file:"", addr);
str = magic_buffer (ck, core->block, core->blocksize);
str = r_magic_buffer (ck, core->block, core->blocksize);
if (str) {
if (!v && !strcmp (str, "data"))
return;
@ -1620,10 +1616,7 @@ static void r_core_magic_at(RCore *core, const char *file, ut64 addr, int depth,
}
free (p);
}
magic_close (ck);
#else
eprintf ("r_core_magic: Compiled without magic :(\n");
#endif
r_magic_close (ck);
}
static void r_core_magic(RCore *core, const char *file, int v) {
@ -3236,8 +3229,6 @@ static int cmd_search(void *data, const char *input) {
dosearch = R_TRUE;
break;
case 'm':
#if HAVE_LIB_MAGIC
/* XXX: This is pretty sloow */
dosearch = R_FALSE;
if (input[1]==' ') {
const char *file = input+2;
@ -3247,9 +3238,6 @@ static int cmd_search(void *data, const char *input) {
r_core_magic (core, file, R_FALSE);
}
} else eprintf ("Usage: /m [file]\n");
#else
eprintf ("r_core_magic: Compiled without magic :(\n");
#endif
break;
case 'p':
{

View File

@ -389,7 +389,7 @@ R_API int r_core_config_init(RCore *core) {
r_config_set (cfg, "asm.lbytes", "true");
r_config_set (cfg, "asm.middle", "false"); // jump in the middle because of antidisasm tricks
r_config_set (cfg, "asm.comments", "true");
r_config_set (cfg, "asm.case", "false");
r_config_set (cfg, "asm.ucase", "false");
r_config_set (cfg, "asm.stackptr", "true");
r_config_set (cfg, "asm.dwarf", "false");
r_config_set_i (cfg, "asm.nbytes", 8);

View File

@ -42,7 +42,7 @@ R_API int r_core_print_disasm(RPrint *p, RCore *core, ut64 addr, ut8 *buf, int l
// TODO: All those options must be print flags
int show_color = r_config_get_i (core->config, "scr.color");
int acase = r_config_get_i (core->config, "asm.case");
int acase = r_config_get_i (core->config, "asm.ucase");
int decode = r_config_get_i (core->config, "asm.decode");
int pseudo = r_config_get_i (core->config, "asm.pseudo");
int filter = r_config_get_i (core->config, "asm.filter");

View File

@ -3,7 +3,7 @@
#define FSP(x) sfs_##x
#define FSS(x) x##_sfs
#define FSNAME "sfs"
#define FSDESC "SFS filesystem"
#define FSDESC "Amiga Smart FileSystem"
#define FSPRFX sfs
#define FSIPTR grub_sfs_fs

View File

@ -2,6 +2,7 @@
#define _INCLUDE_R_CORE_H_
#include "r_types.h"
#include "r_magic.h"
#include "r_io.h"
#include "r_fs.h"
#include "r_lib.h"

314
libr/include/r_magic.h Normal file
View File

@ -0,0 +1,314 @@
#ifndef R_MAGIC_H
#define R_MAGIC_H
#include <r_types.h>
#ifndef MAGIC
#define MAGIC "/etc/magic"
#endif
#ifndef ENABLE_CONDITIONALS
#define ENABLE_CONDITIONALS 1
#endif
#ifdef __EMX__
#define PATHSEP ';'
#else
#define PATHSEP ':'
#endif
#define public
#ifndef __GNUC_PREREQ__
#ifdef __GNUC__
#define __GNUC_PREREQ__(x, y) \
((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) || \
(__GNUC__ > (x)))
#else
#define __GNUC_PREREQ__(x, y) 0
#endif
#endif
#ifndef MIN
#define MIN(a,b) (((a) < (b)) ? (a) : (b))
#endif
#ifndef MAX
#define MAX(a,b) (((a) > (b)) ? (a) : (b))
#endif
#ifndef HOWMANY
# define HOWMANY (256 * 1024) /* how much of the file to look at */
#endif
#define MAXMAGIS 8192 /* max entries in any one magic file
or directory */
#define MAXDESC 64 /* max leng of text description/MIME type */
#define MAXstring 32 /* max leng of "string" types */
#define MAGICNO 0xF11E041C
#define VERSIONNO 5
#define FILE_MAGICSIZE (32 * 6)
#define FILE_LOAD 0
#define FILE_CHECK 1
#define FILE_COMPILE 2
struct magic {
/* Word 1 */
ut16 cont_level; /* level of ">" */
ut8 flag;
#define INDIR 0x01 /* if '(...)' appears */
#define OFFADD 0x02 /* if '>&' or '>...(&' appears */
#define INDIROFFADD 0x04 /* if '>&(' appears */
#define UNSIGNED 0x08 /* comparison is unsigned */
#define NOSPACE 0x10 /* suppress space character before output */
#define BINTEST 0x20 /* test is for a binary type (set only
for top-level tests) */
#define TEXTTEST 0 /* for passing to file_softmagic */
ut8 dummy1;
/* Word 2 */
ut8 reln; /* relation (0=eq, '>'=gt, etc) */
ut8 vallen; /* length of string value, if any */
ut8 type; /* comparison type (FILE_*) */
ut8 in_type; /* type of indirection */
#define FILE_INVALID 0
#define FILE_BYTE 1
#define FILE_SHORT 2
#define FILE_DEFAULT 3
#define FILE_LONG 4
#define FILE_STRING 5
#define FILE_DATE 6
#define FILE_BESHORT 7
#define FILE_BELONG 8
#define FILE_BEDATE 9
#define FILE_LESHORT 10
#define FILE_LELONG 11
#define FILE_LEDATE 12
#define FILE_PSTRING 13
#define FILE_LDATE 14
#define FILE_BELDATE 15
#define FILE_LELDATE 16
#define FILE_REGEX 17
#define FILE_BESTRING16 18
#define FILE_LESTRING16 19
#define FILE_SEARCH 20
#define FILE_MEDATE 21
#define FILE_MELDATE 22
#define FILE_MELONG 23
#define FILE_QUAD 24
#define FILE_LEQUAD 25
#define FILE_BEQUAD 26
#define FILE_QDATE 27
#define FILE_LEQDATE 28
#define FILE_BEQDATE 29
#define FILE_QLDATE 30
#define FILE_LEQLDATE 31
#define FILE_BEQLDATE 32
#define FILE_FLOAT 33
#define FILE_BEFLOAT 34
#define FILE_LEFLOAT 35
#define FILE_DOUBLE 36
#define FILE_BEDOUBLE 37
#define FILE_LEDOUBLE 38
#define FILE_NAMES_SIZE 39/* size of array to contain all names */
#define IS_STRING(t) \
((t) == FILE_STRING || \
(t) == FILE_PSTRING || \
(t) == FILE_BESTRING16 || \
(t) == FILE_LESTRING16 || \
(t) == FILE_REGEX || \
(t) == FILE_SEARCH || \
(t) == FILE_DEFAULT)
#define FILE_FMT_NONE 0
#define FILE_FMT_NUM 1 /* "cduxXi" */
#define FILE_FMT_STR 2 /* "s" */
#define FILE_FMT_QUAD 3 /* "ll" */
#define FILE_FMT_FLOAT 4 /* "eEfFgG" */
#define FILE_FMT_DOUBLE 5 /* "eEfFgG" */
/* Word 3 */
ut8 in_op; /* operator for indirection */
ut8 mask_op; /* operator for mask */
#ifdef ENABLE_CONDITIONALS
ut8 cond; /* conditional type */
ut8 dummy2;
#else
ut8 dummy2;
ut8 dummy3;
#endif
#define FILE_OPS "&|^+-*/%"
#define FILE_OPAND 0
#define FILE_OPOR 1
#define FILE_OPXOR 2
#define FILE_OPADD 3
#define FILE_OPMINUS 4
#define FILE_OPMULTIPLY 5
#define FILE_OPDIVIDE 6
#define FILE_OPMODULO 7
#define FILE_OPS_MASK 0x07 /* mask for above ops */
#define FILE_UNUSED_1 0x08
#define FILE_UNUSED_2 0x10
#define FILE_UNUSED_3 0x20
#define FILE_OPINVERSE 0x40
#define FILE_OPINDIRECT 0x80
#ifdef ENABLE_CONDITIONALS
#define COND_NONE 0
#define COND_IF 1
#define COND_ELIF 2
#define COND_ELSE 3
#endif /* ENABLE_CONDITIONALS */
/* Word 4 */
ut32 offset; /* offset to magic number */
/* Word 5 */
int32_t in_offset; /* offset from indirection */
/* Word 6 */
ut32 lineno; /* line number in magic file */
/* Word 7,8 */
union {
ut64 _mask; /* for use with numeric and date types */
struct {
ut32 _count; /* repeat/line count */
ut32 _flags; /* modifier flags */
} _s; /* for use with string types */
} _u;
#define num_mask _u._mask
#define str_range _u._s._count
#define str_flags _u._s._flags
/* Words 9-16 */
union VALUETYPE {
ut8 b;
ut16 h;
ut32 l;
ut64 q;
ut8 hs[2]; /* 2 bytes of a fixed-endian "short" */
ut8 hl[4]; /* 4 bytes of a fixed-endian "long" */
ut8 hq[8]; /* 8 bytes of a fixed-endian "quad" */
char s[MAXstring]; /* the search string or regex pattern */
float f;
double d;
} value; /* either number or string */
/* Words 17..31 */
char desc[MAXDESC]; /* description */
/* Words 32..47 */
char mimetype[MAXDESC]; /* MIME type */
};
#define BIT(A) (1 << (A))
#define STRING_COMPACT_BLANK BIT(0)
#define STRING_COMPACT_OPTIONAL_BLANK BIT(1)
#define STRING_IGNORE_LOWERCASE BIT(2)
#define STRING_IGNORE_UPPERCASE BIT(3)
#define REGEX_OFFSET_START BIT(4)
#define CHAR_COMPACT_BLANK 'B'
#define CHAR_COMPACT_OPTIONAL_BLANK 'b'
#define CHAR_IGNORE_LOWERCASE 'c'
#define CHAR_IGNORE_UPPERCASE 'C'
#define CHAR_REGEX_OFFSET_START 's'
#define STRING_IGNORE_CASE (STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE)
#define STRING_DEFAULT_RANGE 100
/* list of magic entries */
struct mlist {
struct magic *magic; /* array of magic entries */
ut32 nmagic; /* number of entries in array */
int mapped; /* allocation type: 0 => apprentice_file
* 1 => apprentice_map + malloc
* 2 => apprentice_map + mmap */
struct mlist *next, *prev;
};
#define R_MAGIC_NONE 0x000000 /* No flags */
#define R_MAGIC_DEBUG 0x000001 /* Turn on debugging */
#define R_MAGIC_SYMLINK 0x000002 /* Follow symlinks */
#define R_MAGIC_COMPRESS 0x000004 /* Check inside compressed files */
#define R_MAGIC_DEVICES 0x000008 /* Look at the contents of devices */
#define R_MAGIC_MIME_TYPE 0x000010 /* Return only the MIME type */
#define R_MAGIC_CONTINUE 0x000020 /* Return all matches */
#define R_MAGIC_CHECK 0x000040 /* Print warnings to stderr */
#define R_MAGIC_PRESERVE_ATIME 0x000080 /* Restore access time on exit */
#define R_MAGIC_RAW 0x000100 /* Don't translate unprint chars */
#define R_MAGIC_ERROR 0x000200 /* Handle ENOENT etc as real errors */
#define R_MAGIC_MIME_ENCODING 0x000400 /* Return only the MIME encoding */
#define R_MAGIC_MIME (R_MAGIC_MIME_TYPE|R_MAGIC_MIME_ENCODING)
#define R_MAGIC_NO_CHECK_COMPRESS 0x001000 /* Don't check for compressed files */
#define R_MAGIC_NO_CHECK_TAR 0x002000 /* Don't check for tar files */
#define R_MAGIC_NO_CHECK_SOFT 0x004000 /* Don't check magic entries */
#define R_MAGIC_NO_CHECK_APPTYPE 0x008000 /* Don't check application type */
#define R_MAGIC_NO_CHECK_ELF 0x010000 /* Don't check for elf details */
#define R_MAGIC_NO_CHECK_ASCII 0x020000 /* Don't check for ascii files */
#define R_MAGIC_NO_CHECK_TOKENS 0x100000 /* Don't check ascii/tokens */
/* Defined for backwards compatibility; do nothing */
#define MAGIC_NO_CHECK_FORTRAN 0x000000 /* Don't check ascii/fortran */
#define MAGIC_NO_CHECK_TROFF 0x000000 /* Don't check ascii/troff */
struct r_magic_set {
struct mlist *mlist;
struct cont {
size_t len;
struct level_info {
st32 off;
int got_match;
#ifdef ENABLE_CONDITIONALS
int last_match;
int last_cond; /* used for error checking by parse() */
#endif
} *li;
} c;
struct out {
char *buf; /* Accumulation buffer */
char *pbuf; /* Printable buffer */
} o;
ut32 offset;
int error;
int flags;
int haderr;
const char *file;
size_t line; /* current magic line number */
/* data for searches */
struct {
const char *s; /* start of search in original source */
size_t s_len; /* length of search region */
size_t offset; /* starting offset in source: XXX - should this be off_t? */
size_t rm_len; /* match length */
} search;
/* FIXME: Make the string dynamically allocated so that e.g.
strings matched in files can be longer than MAXstring */
union VALUETYPE ms_value; /* either number or string */
};
#ifdef __cplusplus
extern "C" {
#endif
typedef struct r_magic_set *r_magic_t;
R_API r_magic_t r_magic_open(int);
R_API void r_magic_close(r_magic_t);
R_API const char *r_magic_file(r_magic_t, const char *);
R_API const char *r_magic_descriptor(r_magic_t, int);
R_API const char *r_magic_buffer(r_magic_t, const void *, size_t);
R_API const char *r_magic_error(r_magic_t);
R_API int r_magic_setflags(r_magic_t, int);
R_API int r_magic_load(r_magic_t, const char *);
R_API int r_magic_compile(r_magic_t, const char *);
R_API int r_magic_check(r_magic_t, const char *);
R_API int r_magic_errno(r_magic_t);
#ifdef __cplusplus
};
#endif
#endif /* _MAGIC_H */

17
libr/magic/Makefile Normal file
View File

@ -0,0 +1,17 @@
include ../config.mk
NAME=r_magic
CFLAGS+=-I.
CFLAGS+=-DHAVE_CONFIG_H
OBJ=apprentice.o ascmagic.o compress.o fsmagic.o funcs.o is_tar.o magic.o print.o softmagic.o
include ../rules.mk
libfile.a:
${CC} -c ${CFLAGS} ${SRC}
ar q libfile.a *.o
ranlib libfile.a
BIN=file${EXT_EXE}
${BIN}:
${CC} -I../include ${CFLAGS} ${SRC} file.c -o ${BIN}

8
libr/magic/README Normal file
View File

@ -0,0 +1,8 @@
This implementation of file(1) has been taken from OpenBSD,
I have removed the elf part which reduces more than 1KLOC.
I have also cleaned up the code in order to fix some warnings
and integrate with r2 syntax. magic_ api is now r_magic_
--pancake
$ cvs -d anoncvs@anoncvs.ca.openbsd.org:/cvs get src/usr.bin/file

2129
libr/magic/apprentice.c Normal file

File diff suppressed because it is too large Load Diff

788
libr/magic/ascmagic.c Normal file
View File

@ -0,0 +1,788 @@
/* $OpenBSD: ascmagic.c,v 1.11 2009/10/27 23:59:37 deraadt Exp $ */
/*
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice immediately at the beginning of the file, without modification,
* this list of conditions, and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* ASCII magic -- file types that we know based on keywords
* that can appear anywhere in the file.
*
* Extensively modified by Eric Fischer <enf@pobox.com> in July, 2000,
* to handle character codes other than ASCII on a unified basis.
*
* Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit
* international characters, now subsumed into this file.
*/
#include "file.h"
#include <stdio.h>
#include <string.h>
#include <memory.h>
#include <ctype.h>
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "names.h"
#define MAXLINELEN 300 /* longest sane line length */
#define ISSPC(x) ((x) == ' ' || (x) == '\t' || (x) == '\r' || (x) == '\n' \
|| (x) == 0x85 || (x) == '\f')
static int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
static int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
size_t *);
int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *);
static int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
static int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
static int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
static void from_ebcdic(const unsigned char *, size_t, unsigned char *);
static int ascmatch(const unsigned char *, const unichar *, size_t);
static unsigned char *encode_utf8(unsigned char *, size_t, unichar *, size_t);
int
file_ascmagic(struct r_magic_set *ms, const unsigned char *buf, size_t nbytes)
{
size_t i;
unsigned char *nbuf = NULL, *utf8_buf = NULL, *utf8_end;
unichar *ubuf = NULL;
size_t ulen, mlen;
const struct names *p;
int rv = -1;
int mime = ms->flags & R_MAGIC_MIME;
const char *code = NULL;
const char *code_mime = NULL;
const char *type = NULL;
const char *subtype = NULL;
const char *subtype_mime = NULL;
int has_escapes = 0;
int has_backspace = 0;
int seen_cr = 0;
int n_crlf = 0;
int n_lf = 0;
int n_cr = 0;
int n_nel = 0;
size_t last_line_end = (size_t)-1;
int has_long_lines = 0;
/*
* Undo the NUL-termination kindly provided by process()
* but leave at least one byte to look at
*/
while (nbytes > 1 && buf[nbytes - 1] == '\0')
nbytes--;
if ((nbuf = calloc(1, (nbytes + 1) * sizeof(nbuf[0]))) == NULL)
goto done;
if ((ubuf = calloc(1, (nbytes + 1) * sizeof(ubuf[0]))) == NULL)
goto done;
/*
* Then try to determine whether it's any character code we can
* identify. Each of these tests, if it succeeds, will leave
* the text converted into one-unichar-per-character Unicode in
* ubuf, and the number of characters converted in ulen.
*/
if (looks_ascii(buf, nbytes, ubuf, &ulen)) {
code = "ASCII";
code_mime = "us-ascii";
type = "text";
} else if (looks_utf8_with_BOM(buf, nbytes, ubuf, &ulen) > 0) {
code = "UTF-8 Unicode (with BOM)";
code_mime = "utf-8";
type = "text";
} else if (file_looks_utf8(buf, nbytes, ubuf, &ulen) > 1) {
code = "UTF-8 Unicode";
code_mime = "utf-8";
type = "text";
} else if ((i = looks_ucs16(buf, nbytes, ubuf, &ulen)) != 0) {
if (i == 1)
code = "Little-endian UTF-16 Unicode";
else
code = "Big-endian UTF-16 Unicode";
type = "character data";
code_mime = "utf-16"; /* is this defined? */
} else if (looks_latin1(buf, nbytes, ubuf, &ulen)) {
code = "ISO-8859";
type = "text";
code_mime = "iso-8859-1";
} else if (looks_extended(buf, nbytes, ubuf, &ulen)) {
code = "Non-ISO extended-ASCII";
type = "text";
code_mime = "unknown";
} else {
from_ebcdic(buf, nbytes, nbuf);
if (looks_ascii(nbuf, nbytes, ubuf, &ulen)) {
code = "EBCDIC";
type = "character data";
code_mime = "ebcdic";
} else if (looks_latin1(nbuf, nbytes, ubuf, &ulen)) {
code = "International EBCDIC";
type = "character data";
code_mime = "ebcdic";
} else {
rv = 0;
goto done; /* doesn't look like text at all */
}
}
if (nbytes <= 1) {
rv = 0;
goto done;
}
/* Convert ubuf to UTF-8 and try text soft magic */
/* If original was ASCII or UTF-8, could use nbuf instead of
re-converting. */
/* malloc size is a conservative overestimate; could be
re-converting improved, or at least realloced after
re-converting conversion. */
mlen = ulen * 6;
if ((utf8_buf = malloc(mlen)) == NULL) {
file_oomem(ms, mlen);
goto done;
}
if ((utf8_end = encode_utf8(utf8_buf, mlen, ubuf, ulen)) == NULL)
goto done;
if (file_softmagic(ms, utf8_buf, utf8_end - utf8_buf, TEXTTEST) != 0) {
rv = 1;
goto done;
}
/* look for tokens from names.h - this is expensive! */
if ((ms->flags & R_MAGIC_NO_CHECK_TOKENS) != 0)
goto subtype_identified;
i = 0;
while (i < ulen) {
size_t end;
/* skip past any leading space */
while (i < ulen && ISSPC(ubuf[i]))
i++;
if (i >= ulen)
break;
/* find the next whitespace */
for (end = i + 1; end < nbytes; end++)
if (ISSPC(ubuf[end]))
break;
/* compare the word thus isolated against the token list */
for (p = names; p < names + NNAMES; p++) {
if (ascmatch((const unsigned char *)p->name, ubuf + i,
end - i)) {
subtype = types[p->type].human;
subtype_mime = types[p->type].mime;
goto subtype_identified;
}
}
i = end;
}
subtype_identified:
/* Now try to discover other details about the file. */
for (i = 0; i < ulen; i++) {
if (ubuf[i] == '\n') {
if (seen_cr)
n_crlf++;
else
n_lf++;
last_line_end = i;
} else if (seen_cr)
n_cr++;
seen_cr = (ubuf[i] == '\r');
if (seen_cr)
last_line_end = i;
if (ubuf[i] == 0x85) { /* X3.64/ECMA-43 "next line" character */
n_nel++;
last_line_end = i;
}
/* If this line is _longer_ than MAXLINELEN, remember it. */
if (i > last_line_end + MAXLINELEN)
has_long_lines = 1;
if (ubuf[i] == '\033')
has_escapes = 1;
if (ubuf[i] == '\b')
has_backspace = 1;
}
/* Beware, if the data has been truncated, the final CR could have
been followed by a LF. If we have HOWMANY bytes, it indicates
that the data might have been truncated, probably even before
this function was called. */
if (seen_cr && nbytes < HOWMANY)
n_cr++;
if (mime) {
if (mime & R_MAGIC_MIME_TYPE) {
if (subtype_mime) {
if (file_printf(ms, subtype_mime) == -1)
goto done;
} else {
if (file_printf(ms, "text/plain") == -1)
goto done;
}
}
if ((mime == 0 || mime == R_MAGIC_MIME) && code_mime) {
if ((mime & R_MAGIC_MIME_TYPE) &&
file_printf(ms, " charset=") == -1)
goto done;
if (file_printf(ms, code_mime) == -1)
goto done;
}
if (mime == R_MAGIC_MIME_ENCODING)
file_printf(ms, "binary");
} else {
if (file_printf(ms, code) == -1)
goto done;
if (subtype) {
if (file_printf(ms, " ") == -1)
goto done;
if (file_printf(ms, subtype) == -1)
goto done;
}
if (file_printf(ms, " ") == -1)
goto done;
if (file_printf(ms, type) == -1)
goto done;
if (has_long_lines)
if (file_printf(ms, ", with very long lines") == -1)
goto done;
/*
* Only report line terminators if we find one other than LF,
* or if we find none at all.
*/
if ((n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) ||
(n_crlf != 0 || n_cr != 0 || n_nel != 0)) {
if (file_printf(ms, ", with") == -1)
goto done;
if (n_crlf == 0 && n_cr == 0 && n_nel == 0 && n_lf == 0) {
if (file_printf(ms, " no") == -1)
goto done;
} else {
if (n_crlf) {
if (file_printf(ms, " CRLF") == -1)
goto done;
if (n_cr || n_lf || n_nel)
if (file_printf(ms, ",") == -1)
goto done;
}
if (n_cr) {
if (file_printf(ms, " CR") == -1)
goto done;
if (n_lf || n_nel)
if (file_printf(ms, ",") == -1)
goto done;
}
if (n_lf) {
if (file_printf(ms, " LF") == -1)
goto done;
if (n_nel)
if (file_printf(ms, ",") == -1)
goto done;
}
if (n_nel)
if (file_printf(ms, " NEL") == -1)
goto done;
}
if (file_printf(ms, " line terminators") == -1)
goto done;
}
if (has_escapes)
if (file_printf(ms, ", with escape sequences") == -1)
goto done;
if (has_backspace)
if (file_printf(ms, ", with overstriking") == -1)
goto done;
}
rv = 1;
done:
if (nbuf)
free(nbuf);
if (ubuf)
free(ubuf);
if (utf8_buf)
free(utf8_buf);
return rv;
}
static int
ascmatch(const unsigned char *s, const unichar *us, size_t ulen)
{
size_t i;
for (i = 0; i < ulen; i++) {
if (s[i] != us[i])
return 0;
}
if (s[i])
return 0;
else
return 1;
}
/*
* This table reflects a particular philosophy about what constitutes
* "text," and there is room for disagreement about it.
*
* Version 3.31 of the file command considered a file to be ASCII if
* each of its characters was approved by either the isascii() or
* isalpha() function. On most systems, this would mean that any
* file consisting only of characters in the range 0x00 ... 0x7F
* would be called ASCII text, but many systems might reasonably
* consider some characters outside this range to be alphabetic,
* so the file command would call such characters ASCII. It might
* have been more accurate to call this "considered textual on the
* local system" than "ASCII."
*
* It considered a file to be "International language text" if each
* of its characters was either an ASCII printing character (according
* to the real ASCII standard, not the above test), a character in
* the range 0x80 ... 0xFF, or one of the following control characters:
* backspace, tab, line feed, vertical tab, form feed, carriage return,
* escape. No attempt was made to determine the language in which files
* of this type were written.
*
*
* The table below considers a file to be ASCII if all of its characters
* are either ASCII printing characters (again, according to the X3.4
* standard, not isascii()) or any of the following controls: bell,
* backspace, tab, line feed, form feed, carriage return, esc, nextline.
*
* I include bell because some programs (particularly shell scripts)
* use it literally, even though it is rare in normal text. I exclude
* vertical tab because it never seems to be used in real text. I also
* include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
* because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
* character to. It might be more appropriate to include it in the 8859
* set instead of the ASCII set, but it's got to be included in *something*
* we recognize or EBCDIC files aren't going to be considered textual.
* Some old Unix source files use SO/SI (^N/^O) to shift between Greek
* and Latin characters, so these should possibly be allowed. But they
* make a real mess on VT100-style displays if they're not paired properly,
* so we are probably better off not calling them text.
*
* A file is considered to be ISO-8859 text if its characters are all
* either ASCII, according to the above definition, or printing characters
* from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
*
* Finally, a file is considered to be international text from some other
* character code if its characters are all either ISO-8859 (according to
* the above definition) or characters in the range 0x80 ... 0x9F, which
* ISO-8859 considers to be control characters but the IBM PC and Macintosh
* consider to be printing characters.
*/
#define F 0 /* character never appears in text */
#define T 1 /* character appears in plain ASCII text */
#define I 2 /* character appears in ISO-8859 text */
#define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
static char text_chars[256] = {
/* BEL BS HT LF FF CR */
F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */
/* ESC */
F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
/* NEL */
X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */
I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */
};
static int
looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
size_t *ulen)
{
size_t i;
*ulen = 0;
for (i = 0; i < nbytes; i++) {
int t = text_chars[buf[i]];
if (t != T)
return 0;
ubuf[(*ulen)++] = buf[i];
}
return 1;
}
static int
looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
{
size_t i;
*ulen = 0;
for (i = 0; i < nbytes; i++) {
int t = text_chars[buf[i]];
if (t != T && t != I)
return 0;
ubuf[(*ulen)++] = buf[i];
}
return 1;
}
static int
looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
size_t *ulen)
{
size_t i;
*ulen = 0;
for (i = 0; i < nbytes; i++) {
int t = text_chars[buf[i]];
if (t != T && t != I && t != X)
return 0;
ubuf[(*ulen)++] = buf[i];
}
return 1;
}
/*
* Encode Unicode string as UTF-8, returning pointer to character
* after end of string, or NULL if an invalid character is found.
*/
static unsigned char *
encode_utf8(unsigned char *buf, size_t len, unichar *ubuf, size_t ulen)
{
size_t i;
unsigned char *end = buf + len;
for (i = 0; i < ulen; i++) {
if (ubuf[i] <= 0x7f) {
if (end - buf < 1)
return NULL;
*buf++ = (unsigned char)ubuf[i];
} else if (ubuf[i] <= 0x7ff) {
if (end - buf < 2)
return NULL;
*buf++ = (unsigned char)((ubuf[i] >> 6) + 0xc0);
*buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
} else if (ubuf[i] <= 0xffff) {
if (end - buf < 3)
return NULL;
*buf++ = (unsigned char)((ubuf[i] >> 12) + 0xe0);
*buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80);
*buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
} else if (ubuf[i] <= 0x1fffff) {
if (end - buf < 4)
return NULL;
*buf++ = (unsigned char)((ubuf[i] >> 18) + 0xf0);
*buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80);
*buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80);
*buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
} else if (ubuf[i] <= 0x3ffffff) {
if (end - buf < 5)
return NULL;
*buf++ = (unsigned char)((ubuf[i] >> 24) + 0xf8);
*buf++ = (unsigned char)(((ubuf[i] >> 18) & 0x3f) + 0x80);
*buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80);
*buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80);
*buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
} else if (ubuf[i] <= 0x7fffffff) {
if (end - buf < 6)
return NULL;
*buf++ = (unsigned char)((ubuf[i] >> 30) + 0xfc);
*buf++ = (unsigned char)(((ubuf[i] >> 24) & 0x3f) + 0x80);
*buf++ = (unsigned char)(((ubuf[i] >> 18) & 0x3f) + 0x80);
*buf++ = (unsigned char)(((ubuf[i] >> 12) & 0x3f) + 0x80);
*buf++ = (unsigned char)(((ubuf[i] >> 6) & 0x3f) + 0x80);
*buf++ = (unsigned char)((ubuf[i] & 0x3f) + 0x80);
} else /* Invalid character */
return NULL;
}
return buf;
}
/*
* Decide whether some text looks like UTF-8. Returns:
*
* -1: invalid UTF-8
* 0: uses odd control characters, so doesn't look like text
* 1: 7-bit text
* 2: definitely UTF-8 text (valid high-bit set bytes)
*
* If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
* ubuf must be big enough!
*/
int
file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
{
size_t i;
int n;
unichar c;
int gotone = 0, ctrl = 0;
if (ubuf)
*ulen = 0;
for (i = 0; i < nbytes; i++) {
if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */
/*
* Even if the whole file is valid UTF-8 sequences,
* still reject it if it uses weird control characters.
*/
if (text_chars[buf[i]] != T)
ctrl = 1;
if (ubuf)
ubuf[(*ulen)++] = buf[i];
} else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
return -1;
} else { /* 11xxxxxx begins UTF-8 */
int following;
if ((buf[i] & 0x20) == 0) { /* 110xxxxx */
c = buf[i] & 0x1f;
following = 1;
} else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */
c = buf[i] & 0x0f;
following = 2;
} else if ((buf[i] & 0x08) == 0) { /* 11110xxx */
c = buf[i] & 0x07;
following = 3;
} else if ((buf[i] & 0x04) == 0) { /* 111110xx */
c = buf[i] & 0x03;
following = 4;
} else if ((buf[i] & 0x02) == 0) { /* 1111110x */
c = buf[i] & 0x01;
following = 5;
} else
return -1;
for (n = 0; n < following; n++) {
i++;
if (i >= nbytes)
goto done;
if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
return -1;
c = (c << 6) + (buf[i] & 0x3f);
}
if (ubuf)
ubuf[(*ulen)++] = c;
gotone = 1;
}
}
done:
return ctrl ? 0 : (gotone ? 2 : 1);
}
/*
* Decide whether some text looks like UTF-8 with BOM. If there is no
* BOM, return -1; otherwise return the result of looks_utf8 on the
* rest of the text.
*/
static int
looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf,
size_t *ulen)
{
if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
else
return -1;
}
static int
looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf,
size_t *ulen)
{
int bigend;
size_t i;
if (nbytes < 2)
return 0;
if (buf[0] == 0xff && buf[1] == 0xfe)
bigend = 0;
else if (buf[0] == 0xfe && buf[1] == 0xff)
bigend = 1;
else
return 0;
*ulen = 0;
for (i = 2; i + 1 < nbytes; i += 2) {
/* XXX fix to properly handle chars > 65536 */
if (bigend)
ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i];
else
ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1];
if (ubuf[*ulen - 1] == 0xfffe)
return 0;
if (ubuf[*ulen - 1] < 128 &&
text_chars[(size_t)ubuf[*ulen - 1]] != T)
return 0;
}
return 1 + bigend;
}
#undef F
#undef T
#undef I
#undef X
/*
* This table maps each EBCDIC character to an (8-bit extended) ASCII
* character, as specified in the rationale for the dd(1) command in
* draft 11.2 (September, 1991) of the POSIX P1003.2 standard.
*
* Unfortunately it does not seem to correspond exactly to any of the
* five variants of EBCDIC documented in IBM's _Enterprise Systems
* Architecture/390: Principles of Operation_, SA22-7201-06, Seventh
* Edition, July, 1999, pp. I-1 - I-4.
*
* Fortunately, though, all versions of EBCDIC, including this one, agree
* on most of the printing characters that also appear in (7-bit) ASCII.
* Of these, only '|', '!', '~', '^', '[', and ']' are in question at all.
*
* Fortunately too, there is general agreement that codes 0x00 through
* 0x3F represent control characters, 0x41 a nonbreaking space, and the
* remainder printing characters.
*
* This is sufficient to allow us to identify EBCDIC text and to distinguish
* between old-style and internationalized examples of text.
*/
static unsigned char ebcdic_to_ascii[] = {
0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15,
16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31,
128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7,
144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26,
' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|',
'&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~',
'-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?',
186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"',
195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201,
202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208,
209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215,
216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231,
'{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237,
'}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243,
'\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249,
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
};
#ifdef notdef
/*
* The following EBCDIC-to-ASCII table may relate more closely to reality,
* or at least to modern reality. It comes from
*
* http://ftp.s390.ibm.com/products/oe/bpxqp9.html
*
* and maps the characters of EBCDIC code page 1047 (the code used for
* Unix-derived software on IBM's 390 systems) to the corresponding
* characters from ISO 8859-1.
*
* If this table is used instead of the above one, some of the special
* cases for the NEL character can be taken out of the code.
*/
static unsigned char ebcdic_1047_to_8859[] = {
0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07,
0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A,
0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C,
0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E,
0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F,
0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22,
0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1,
0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4,
0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE,
0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7,
0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5,
0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF,
0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5,
0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F
};
#endif
/*
* Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII.
*/
static void
from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out)
{
size_t i;
for (i = 0; i < nbytes; i++) {
out[i] = ebcdic_to_ascii[buf[i]];
}
}

478
libr/magic/compress.c Normal file
View File

@ -0,0 +1,478 @@
/* $OpenBSD: compress.c,v 1.15 2011/01/10 20:59:42 deraadt Exp $ */
/*
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice immediately at the beginning of the file, without modification,
* this list of conditions, and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* compress routines:
* zmagic() - returns 0 if not recognized, uncompresses and prints
* information if recognized
* uncompress(method, old, n, newch) - uncompress old into new,
* using method, return sizeof new
*/
#include "file.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <string.h>
#include <errno.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#ifdef HAVE_SYS_WAIT_H
#include <sys/wait.h>
#endif
#if defined(HAVE_SYS_TIME_H)
#include <sys/time.h>
#endif
#if defined(HAVE_ZLIB_H) && defined(HAVE_LIBZ)
#define BUILTIN_DECOMPRESS
#include <zlib.h>
#endif
static const struct {
const char magic[8];
size_t maglen;
const char *argv[3];
int silent;
} compr[] = {
{ "\037\235", 2, { "gzip", "-cdq", NULL }, 1 }, /* compressed */
/* Uncompress can get stuck; so use gzip first if we have it
* Idea from Damien Clark, thanks! */
{ "\037\235", 2, { "uncompress", "-c", NULL }, 1 }, /* compressed */
{ "\037\213", 2, { "gzip", "-cdq", NULL }, 1 }, /* gzipped */
{ "\037\236", 2, { "gzip", "-cdq", NULL }, 1 }, /* frozen */
{ "\037\240", 2, { "gzip", "-cdq", NULL }, 1 }, /* SCO LZH */
/* the standard pack utilities do not accept standard input */
{ "\037\036", 2, { "gzip", "-cdq", NULL }, 0 }, /* packed */
{ "PK\3\4", 4, { "gzip", "-cdq", NULL }, 1 }, /* pkzipped, */
/* ...only first file examined */
{ "BZh", 3, { "bzip2", "-cd", NULL }, 1 }, /* bzip2-ed */
};
static size_t ncompr = sizeof(compr) / sizeof(compr[0]);
#define NODATA ((size_t)~0)
static ssize_t swrite(int, const void *, size_t);
static size_t uncompressbuf(struct r_magic_set *, int, size_t,
const unsigned char *, unsigned char **, size_t);
#ifdef BUILTIN_DECOMPRESS
static size_t uncompressgzipped(struct r_magic_set *, const unsigned char *,
unsigned char **, size_t);
#endif
int
file_zmagic(struct r_magic_set *ms, int fd, const char *name,
const unsigned char *buf, size_t nbytes)
{
unsigned char *newbuf = NULL;
size_t i, nsz;
int rv = 0;
int mime = ms->flags & R_MAGIC_MIME;
if ((ms->flags & R_MAGIC_COMPRESS) == 0)
return 0;
for (i = 0; i < ncompr; i++) {
if (nbytes < compr[i].maglen)
continue;
if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 &&
(nsz = uncompressbuf(ms, fd, i, buf, &newbuf,
nbytes)) != NODATA) {
ms->flags &= ~R_MAGIC_COMPRESS;
rv = -1;
if (file_buffer(ms, -1, name, newbuf, nsz) == -1)
goto error;
if (mime == R_MAGIC_MIME || mime == 0) {
if (file_printf(ms, mime ?
" compressed-encoding=" : " (") == -1)
goto error;
}
if ((mime == 0 || mime & R_MAGIC_MIME_ENCODING) &&
file_buffer(ms, -1, NULL, buf, nbytes) == -1)
goto error;
if (!mime && file_printf(ms, ")") == -1)
goto error;
rv = 1;
break;
}
}
error:
if (newbuf)
free(newbuf);
ms->flags |= R_MAGIC_COMPRESS;
return rv;
}
/*
* `safe' write for sockets and pipes.
*/
static ssize_t
swrite(int fd, const void *buf, size_t n)
{
int rv;
size_t rn = n;
do
switch (rv = write(fd, buf, n)) {
case -1:
if (errno == EINTR)
continue;
return -1;
default:
n -= rv;
buf = ((const char *)buf) + rv;
break;
}
while (n > 0);
return rn;
}
/*
* `safe' read for sockets and pipes.
*/
ssize_t
sread(int fd, void *buf, size_t n, int canbepipe)
{
int rv, cnt;
#ifdef FIONREAD
int t = 0;
#endif
size_t rn = n;
if (fd == STDIN_FILENO)
goto nocheck;
#ifdef FIONREAD
if ((canbepipe && (ioctl(fd, FIONREAD, &t) == -1)) || (t == 0)) {
#ifdef FD_ZERO
for (cnt = 0;; cnt++) {
fd_set check;
struct timeval tout = {0, 100 * 1000};
int selrv;
FD_ZERO(&check);
FD_SET(fd, &check);
/*
* Avoid soft deadlock: do not read if there
* is nothing to read from sockets and pipes.
*/
selrv = select(fd + 1, &check, NULL, NULL, &tout);
if (selrv == -1) {
if (errno == EINTR || errno == EAGAIN)
continue;
} else if (selrv == 0 && cnt >= 5) {
return 0;
} else
break;
}
#endif
(void)ioctl(fd, FIONREAD, &t);
}
if (t > 0 && (size_t)t < n) {
n = t;
rn = n;
}
#endif
nocheck:
do
switch ((rv = read(fd, buf, n))) {
case -1:
if (errno == EINTR)
continue;
return -1;
case 0:
return rn - n;
default:
n -= rv;
buf = ((char *)buf) + rv;
break;
}
while (n > 0);
return rn;
}
int
file_pipe2file(struct r_magic_set *ms, int fd, const void *startbuf,
size_t nbytes)
{
char buf[4096];
int r, tfd;
(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
tfd = mkstemp(buf);
r = errno;
(void)unlink(buf);
errno = r;
if (tfd == -1) {
file_error(ms, errno,
"cannot create temporary file for pipe copy");
return -1;
}
if (swrite(tfd, startbuf, nbytes) != (ssize_t)nbytes)
r = 1;
else {
while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
if (swrite(tfd, buf, (size_t)r) != r)
break;
}
switch (r) {
case -1:
file_error(ms, errno, "error copying from pipe to temp file");
return -1;
case 0:
break;
default:
file_error(ms, errno, "error while writing to temp file");
return -1;
}
/*
* We duplicate the file descriptor, because fclose on a
* tmpfile will delete the file, but any open descriptors
* can still access the phantom inode.
*/
if ((fd = dup2(tfd, fd)) == -1) {
file_error(ms, errno, "could not dup descriptor for temp file");
return -1;
}
(void)close(tfd);
if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
file_badseek(ms);
return -1;
}
return fd;
}
#ifdef BUILTIN_DECOMPRESS
#define FHCRC (1 << 1)
#define FEXTRA (1 << 2)
#define FNAME (1 << 3)
#define FCOMMENT (1 << 4)
static size_t
uncompressgzipped(struct r_magic_set *ms, const unsigned char *old,
unsigned char **newch, size_t n)
{
unsigned char flg = old[3];
size_t data_start = 10;
z_stream z;
int rc;
if (flg & FEXTRA) {
if (data_start+1 >= n)
return 0;
data_start += 2 + old[data_start] + old[data_start + 1] * 256;
}
if (flg & FNAME) {
while(data_start < n && old[data_start])
data_start++;
data_start++;
}
if(flg & FCOMMENT) {
while(data_start < n && old[data_start])
data_start++;
data_start++;
}
if(flg & FHCRC)
data_start += 2;
if (data_start >= n)
return 0;
if ((*newch = (unsigned char *)malloc(HOWMANY + 1)) == NULL) {
return 0;
}
/* XXX: const castaway, via strchr */
z.next_in = (Bytef *)strchr((const char *)old + data_start,
old[data_start]);
z.avail_in = n - data_start;
z.next_out = *newch;
z.avail_out = HOWMANY;
z.zalloc = Z_NULL;
z.zfree = Z_NULL;
z.opaque = Z_NULL;
rc = inflateInit2(&z, -15);
if (rc != Z_OK) {
file_error(ms, 0, "zlib: %s", z.msg);
return 0;
}
rc = inflate(&z, Z_SYNC_FLUSH);
if (rc != Z_OK && rc != Z_STREAM_END) {
file_error(ms, 0, "zlib: %s", z.msg);
return 0;
}
n = (size_t)z.total_out;
(void)inflateEnd(&z);
/* let's keep the nul-terminate tradition */
(*newch)[n] = '\0';
return n;
}
#endif
static size_t
uncompressbuf(struct r_magic_set *ms, int fd, size_t method,
const unsigned char *old, unsigned char **newch, size_t n)
{
int fdin[2], fdout[2];
int r;
#ifdef BUILTIN_DECOMPRESS
/* FIXME: This doesn't cope with bzip2 */
if (method == 2)
return uncompressgzipped(ms, old, newch, n);
#endif
(void)fflush(stdout);
(void)fflush(stderr);
if ((fd != -1 && pipe(fdin) == -1) || pipe(fdout) == -1) {
file_error(ms, errno, "cannot create pipe");
return NODATA;
}
switch (fork()) {
case 0: /* child */
(void) close(0);
if (fd != -1) {
(void) dup(fd);
(void) lseek(0, (off_t)0, SEEK_SET);
} else {
(void) dup(fdin[0]);
(void) close(fdin[0]);
(void) close(fdin[1]);
}
(void) close(1);
(void) dup(fdout[1]);
(void) close(fdout[0]);
(void) close(fdout[1]);
#ifndef DEBUG
if (compr[method].silent)
(void)close(2);
#endif
(void)execvp(compr[method].argv[0],
(char *const *)(intptr_t)compr[method].argv);
#ifdef DEBUG
(void)fprintf(stderr, "exec `%s' failed (%s)\n",
compr[method].argv[0], strerror(errno));
#endif
exit(1);
/*NOTREACHED*/
case -1:
file_error(ms, errno, "could not fork");
return NODATA;
default: /* parent */
(void) close(fdout[1]);
if (fd == -1) {
(void) close(fdin[0]);
/*
* fork again, to avoid blocking because both
* pipes filled
*/
switch (fork()) {
case 0: /* child */
(void)close(fdout[0]);
if (swrite(fdin[1], old, n) != (ssize_t)n) {
#ifdef DEBUG
(void)fprintf(stderr,
"Write failed (%s)\n",
strerror(errno));
#endif
exit(1);
}
exit(0);
/*NOTREACHED*/
case -1:
#ifdef DEBUG
(void)fprintf(stderr, "Fork failed (%s)\n",
strerror(errno));
#endif
exit(1);
/*NOTREACHED*/
default: /* parent */
break;
}
(void) close(fdin[1]);
fdin[1] = -1;
}
if ((*newch = (unsigned char *) malloc(HOWMANY + 1)) == NULL) {
#ifdef DEBUG
(void)fprintf(stderr, "Malloc failed (%s)\n",
strerror(errno));
#endif
n = 0;
goto err;
}
if ((r = sread(fdout[0], *newch, HOWMANY, 0)) <= 0) {
#ifdef DEBUG
(void)fprintf(stderr, "Read failed (%s)\n",
strerror(errno));
#endif
free(*newch);
n = 0;
newch[0] = '\0';
goto err;
} else {
n = r;
}
/* NUL terminate, as every buffer is handled here. */
(*newch)[n] = '\0';
err:
if (fdin[1] != -1)
(void) close(fdin[1]);
(void) close(fdout[0]);
#ifdef WNOHANG
while (waitpid(-1, NULL, WNOHANG) != -1)
continue;
#else
(void)wait(NULL);
#endif
return n;
}
}

38
libr/magic/config.h Normal file
View File

@ -0,0 +1,38 @@
/*
* Hand-made config.h file for OpenBSD, so we don't have to run
* the dratted configure script every time we build this puppy,
* but can still carefully import stuff from Christos' version.
*
* This file is in the public domain. Original Author Ian F. Darwin.
* $OpenBSD: config.h,v 1.7 2011/07/25 16:21:22 martynas Exp $
*/
/* header file issues. */
#define HAVE_UNISTD_H 1
#define HAVE_FCNTL_H 1
#define HAVE_SYS_WAIT_H 1
#define HAVE_LOCALE_H 1
#define HAVE_SYS_STAT_H 1
#define HAVE_INTTYPES_H 1
#define HAVE_GETOPT_H 1
#define HAVE_LIMITS_H 1
/* #define HAVE_ZLIB_H 1 DO NOT ENABLE YET -- chl */
/* #define HAVE_LIBZ 1 DO NOT ENABLE YET -- ian */
#define HAVE_STRTOUL
#define HAVE_STRERROR
#define HAVE_VSNPRINTF
#define HAVE_SNPRINTF
#undef HAVE_STRNDUP
#define HAVE_STRTOF
/* Compiler issues */
#define SIZEOF_LONG_LONG 8
/* Library issues */
#define HAVE_GETOPT_LONG 1 /* in-tree as of 3.2 */
#define HAVE_ST_RDEV 1
/* ELF support */
#define BUILTIN_ELF 0
#define ELFCORE 0

500
libr/magic/file.1 Normal file
View File

@ -0,0 +1,500 @@
.\" $OpenBSD: file.1,v 1.33 2010/10/28 21:32:54 jmc Exp $
.\" $FreeBSD: src/usr.bin/file/file.1,v 1.16 2000/03/01 12:19:39 sheldonh Exp $
.\"
.\" Copyright (c) Ian F. Darwin 1986-1995.
.\" Software written by Ian F. Darwin and others;
.\" maintained 1995-present by Christos Zoulas and others.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice immediately at the beginning of the file, without modification,
.\" this list of conditions, and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
.\" ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.Dd $Mdocdate: October 28 2010 $
.Dt FILE 1
.Os
.Sh NAME
.Nm file
.Nd determine file type
.Sh SYNOPSIS
.Nm
.Bk -words
.Op Fl 0bCcehikLNnprsvz
.Op Fl -help
.Op Fl -mime-encoding
.Op Fl -mime-type
.Op Fl F Ar separator
.Op Fl f Ar namefile
.Op Fl m Ar magicfiles
.Ar file
.Ek
.Sh DESCRIPTION
The
.Nm
utility tests each argument in an attempt to classify it.
There are three sets of tests, performed in this order:
filesystem tests, magic tests, and language tests.
The first test that succeeds causes the file type to be printed.
.Pp
The type printed will usually contain one of the words
.Em text
(the file contains only
printing characters and a few common control
characters and is probably safe to read on an
ASCII terminal),
.Em executable
(the file contains the result of compiling a program
in a form understandable to some
.Ux
kernel or another),
or
.Em data
meaning anything else (data is usually
.Dq binary
or non-printable).
Exceptions are well-known file formats (core files, tar archives)
that are known to contain binary data.
When modifying magic files or the program itself, make sure to
.Em preserve these keywords .
Users depend on knowing that all the readable files in a directory
have the word
.Dq text
printed.
Don't do as Berkeley did and change
.Dq shell commands text
to
.Dq shell script .
.Pp
The filesystem tests are based on examining the return from a
.Xr stat 2
system call.
The program checks to see if the file is empty,
or if it's some sort of special file.
Any known file types,
such as sockets, symbolic links, and named pipes (FIFOs),
are intuited if they are defined in
the system header file
.Aq Pa sys/stat.h .
.Pp
The magic tests are used to check for files with data in
particular fixed formats.
The canonical example of this is a binary executable (compiled program)
a.out file, whose format is defined in
.Aq Pa elf.h ,
.Aq Pa a.out.h ,
and possibly
.Aq Pa exec.h
in the standard include directory.
These files have a
.Dq magic number
stored in a particular place
near the beginning of the file that tells the
.Ux
operating system
that the file is a binary executable, and which of several types thereof.
The concept of a
.Dq magic
has been applied by extension to data files.
Any file with some invariant identifier at a small fixed
offset into the file can usually be described in this way.
The information identifying these files is read from the magic file
.Pa /etc/magic .
In addition, if
.Pa $HOME/.magic.mgc
or
.Pa $HOME/.magic
exists, it will be used in preference to the system magic files.
.Pp
If a file does not match any of the entries in the magic file,
it is examined to see if it seems to be a text file.
ASCII, ISO-8859-x, non-ISO 8-bit extended-ASCII character sets
(such as those used on Macintosh and IBM PC systems),
UTF-8-encoded Unicode, UTF-16-encoded Unicode, and EBCDIC
character sets can be distinguished by the different
ranges and sequences of bytes that constitute printable text
in each set.
If a file passes any of these tests, its character set is reported.
ASCII, ISO-8859-x, UTF-8, and extended-ASCII files are identified
as
.Dq text
because they will be mostly readable on nearly any terminal;
UTF-16 and EBCDIC are only
.Dq character data
because, while
they contain text, it is text that will require translation
before it can be read.
In addition,
.Nm
will attempt to determine other characteristics of text-type files.
If the lines of a file are terminated by CR, CRLF, or NEL, instead
of the Unix-standard LF, this will be reported.
Files that contain embedded escape sequences or overstriking
will also be identified.
.Pp
Once
.Nm
has determined the character set used in a text-type file,
it will
attempt to determine in what language the file is written.
The language tests look for particular strings (cf.\&
.Aq Pa names.h )
that can appear anywhere in the first few blocks of a file.
For example, the keyword
.Em .br
indicates that the file is most likely a
troff input file, just as the keyword
.Em struct
indicates a C program.
These tests are less reliable than the previous
two groups, so they are performed last.
The language test routines also test for some miscellany
(such as
.Xr tar 1
archives).
.Pp
Any file that cannot be identified as having been written
in any of the character sets listed above is simply said to be
.Dq data .
.Sh OPTIONS
.Bl -tag -width indent
.It Fl 0 , -print0
Output a null character
.Sq \e0
after the end of the filename.
Nice to
.Xr cut 1
the output.
This does not affect the separator which is still printed.
.It Fl b , -brief
Do not prepend filenames to output lines (brief mode).
.It Fl C , -compile
Write a
.Pa magic.mgc
output file that contains a pre-parsed version of the magic file or directory.
.It Fl c , -checking-printout
Cause a checking printout of the parsed form of the magic file.
This is usually used in conjunction with the
.Fl m
flag to debug a new magic file before installing it.
.It Fl e , -exclude Ar testname
Exclude the test named in
.Ar testname
from the list of tests made to determine the file type.
Valid test names are:
.Bl -tag -width compress
.It apptype
Check for
.Dv EMX
application type (only on EMX).
.It ascii
Check for various types of ASCII files.
.It compress
Don't look for, or inside, compressed files.
.It elf
Don't print elf details.
.It fortran
Don't look for fortran sequences inside ASCII files.
.It soft
Don't consult magic files.
.It tar
Don't examine tar files.
.It token
Don't look for known tokens inside ASCII files.
.It troff
Don't look for troff sequences inside ASCII files.
.El
.It Fl F , -separator Ar separator
Use the specified string as the separator between the filename and the
file result returned.
Defaults to
.Sq \&: .
.It Fl f , -files-from Ar namefile
Read the names of the files to be examined from
.Ar namefile
(one per line)
before the argument list.
Either
.Ar namefile
or at least one filename argument must be present;
to test the standard input, use
.Sq -
as a filename argument.
.It Fl h , -no-dereference
Causes symlinks not to be followed.
This is the default if the environment variable
.Dv POSIXLY_CORRECT
is not defined.
.It Fl -help
Print a help message and exit.
.It Fl i , -mime
Causes the file command to output mime type strings rather than the more
traditional human readable ones.
Thus it may say
.Dq text/plain charset=us-ascii
rather than
.Dq ASCII text .
In order for this option to work,
.Nm
changes the way it handles files recognized by the command itself
(such as many of the text file types, directories etc.),
and makes use of an alternative
.Dq magic
file.
See also
.Sx FILES ,
below.
.It Fl -mime-encoding , -mime-type
Like
.Fl i ,
but print only the specified element(s).
.It Fl k , -keep-going
Don't stop at the first match, keep going.
Subsequent matches will have the string
.Dq "\[rs]012\- "
prepended.
(If a newline is required, see the
.Fl r
option.)
.It Fl L , -dereference
Causes symlinks to be followed;
analogous to the option of the same name in
.Xr ls 1 .
This is the default if the environment variable
.Dv POSIXLY_CORRECT
is defined.
.It Fl m , -magic-file Ar magicfiles
Specify an alternate list of files and directories containing magic.
This can be a single item, or a colon-separated list.
If a compiled magic file is found alongside a file or directory,
it will be used instead.
.It Fl N , -no-pad
Don't pad filenames so that they align in the output.
.It Fl n , -no-buffer
Force stdout to be flushed after checking each file.
This is only useful if checking a list of files.
It is intended to be used by programs that want filetype output from a pipe.
.It Fl p , -preserve-date
On systems that support
.Xr utime 3
or
.Xr utimes 2 ,
attempt to preserve the access time of files analyzed, to pretend that
.Nm
never read them.
.It Fl r , -raw
Don't translate unprintable characters to \eooo.
Normally
.Nm
translates unprintable characters to their octal representation.
.It Fl s , -special-files
Normally,
.Nm
only attempts to read and determine the type of argument files which
.Xr stat 2
reports are ordinary files.
This prevents problems, because reading special files may have peculiar
consequences.
Specifying the
.Fl s
option causes
.Nm
to also read argument files which are block or character special files.
This is useful for determining the filesystem types of the data in raw
disk partitions, which are block special files.
This option also causes
.Nm
to disregard the file size as reported by
.Xr stat 2
since on some systems it reports a zero size for raw disk partitions.
.It Fl v , -version
Print the version of the program and exit.
.It Fl z , -uncompress
Try to look inside compressed files.
.El
.Sh ENVIRONMENT
The environment variable
.Dv MAGIC
can be used to set the default magic file name.
If that variable is set, then
.Nm
will not attempt to open
.Pa $HOME/.magic .
.Nm
adds
.Dq .mgc
to the value of this variable as appropriate.
The environment variable
.Dv POSIXLY_CORRECT
controls whether
.Nm
will attempt to follow symlinks or not.
If set, then
.Nm
follows symlinks; otherwise it does not.
This is also controlled by the
.Fl L
and
.Fl h
options.
.Sh FILES
.Bl -tag -width /etc/magic -compact
.It Pa /etc/magic
default list of magic numbers
.El
.Sh EXIT STATUS
.Ex -std file
.Sh SEE ALSO
.Xr hexdump 1 ,
.Xr od 1 ,
.Xr strings 1 ,
.Xr magic 5
.Sh STANDARDS CONFORMANCE
This program is believed to exceed the System V Interface Definition
of FILE(CMD), as near as one can determine from the vague language
contained therein.
Its behavior is mostly compatible with the System V program of the same name.
This version knows more magic, however, so it will produce
different (albeit more accurate) output in many cases.
.\" URL: http://www.opengroup.org/onlinepubs/009695399/utilities/file.html
.Pp
The one significant difference
between this version and System V
is that this version treats any whitespace
as a delimiter, so that spaces in pattern strings must be escaped.
For example,
.Bd -literal -offset indent
\*(Gt10 string language impress\ (imPRESS data)
.Ed
.Pp
in an existing magic file would have to be changed to
.Bd -literal -offset indent
\*(Gt10 string language\e impress (imPRESS data)
.Ed
.Pp
In addition, in this version, if a pattern string contains a backslash,
it must be escaped.
For example
.Bd -literal -offset indent
0 string \ebegindata Andrew Toolkit document
.Ed
.Pp
in an existing magic file would have to be changed to
.Bd -literal -offset indent
0 string \e\ebegindata Andrew Toolkit document
.Ed
.Pp
SunOS releases 3.2 and later from Sun Microsystems include a
.Nm
command derived from the System V one, but with some extensions.
This version differs from Sun's only in minor ways.
It includes the extension of the
.Sq &
operator, used as,
for example,
.Bd -literal -offset indent
\*(Gt16 long&0x7fffffff \*(Gt0 not stripped
.Ed
.Sh HISTORY
There has been a
.Nm
command in every
.Ux
since at least Research Version 4
(man page dated November, 1973).
The System V version introduced one significant major change:
the external list of magic types.
This slowed the program down slightly but made it a lot more flexible.
.Pp
This program, based on the System V version,
was written by Ian Darwin
without looking at anybody else's source code.
.Pp
John Gilmore revised the code extensively, making it better than
the first version.
Geoff Collyer found several inadequacies
and provided some magic file entries.
Contributions by the `&' operator by Rob McMahon, 1989.
.Pp
Guy Harris, made many changes from 1993 to the present.
.Pp
Primary development and maintenance from 1990 to the present by
Christos Zoulas.
.Pp
Altered by Chris Lowth, 2000:
Handle the
.Fl i
option to output mime type strings, using an alternative
magic file and internal logic.
.Pp
Altered by Eric Fischer, July, 2000,
to identify character codes and attempt to identify the languages
of non-ASCII files.
.Pp
Altered by Reuben Thomas, 2007 to 2008, to improve MIME
support and merge MIME and non-MIME magic, support directories as well
as files of magic, apply many bug fixes and improve the build system.
.Pp
The list of contributors to the
.Dq magic
directory (magic files)
is too long to include here.
You know who you are; thank you.
Many contributors are listed in the source files.
.Sh BUGS
.Pp
There must be a better way to automate the construction of the Magic
file from all the glop in Magdir.
What is it?
.Pp
.Nm
uses several algorithms that favor speed over accuracy,
thus it can be misled about the contents of
text
files.
.Pp
The support for text files (primarily for programming languages)
is simplistic, inefficient and requires recompilation to update.
.Pp
The list of keywords in
.Pa ascmagic
probably belongs in the Magic file.
This could be done by using some keyword like
.Sq *
for the offset value.
.Pp
Complain about conflicts in the magic file entries.
Make a rule that the magic entries sort based on file offset rather
than position within the magic file?
.Pp
The program should provide a way to give an estimate
of
.Dq how good
a guess is.
We end up removing guesses (e.g.
.Dq From\
as first 5 chars of file) because
they are not as good as other guesses (e.g.\&
.Dq Newsgroups:
versus
.Dq Return-Path: ) .
Still, if the others don't pan out, it should be possible to use the
first guess.
.Pp
This manual page, and particularly this section, is too long.

478
libr/magic/file.c Normal file
View File

@ -0,0 +1,478 @@
/* $OpenBSD: file.c,v 1.23 2011/04/15 16:05:34 stsp Exp $ */
/*
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice immediately at the beginning of the file, without modification,
* this list of conditions, and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* file - find type of a file or files - main program.
*/
#include <sys/types.h>
#include <sys/param.h> /* for MAXPATHLEN */
#include <sys/stat.h>
#include <r_magic.h>
#include "file.h"
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#ifdef RESTORE_TIME
# if (__COHERENT__ >= 0x420)
# include <sys/utime.h>
# else
# ifdef USE_UTIMES
# include <sys/time.h>
# else
# include <utime.h>
# endif
# endif
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h> /* for read() */
#endif
#ifdef HAVE_LOCALE_H
#include <locale.h>
#endif
#ifdef HAVE_WCHAR_H
#include <wchar.h>
#endif
#include <getopt.h>
#ifndef HAVE_GETOPT_LONG
int getopt_long(int argc, char * const *argv, const char *optstring, const struct option *longopts, int *longindex);
#endif
#include <netinet/in.h> /* for byte swapping */
#include "patchlevel.h"
#ifdef S_IFLNK
#define SYMLINKFLAG "Lh"
#else
#define SYMLINKFLAG ""
#endif
# define USAGE "Usage: %s [-bcik" SYMLINKFLAG "nNprsvz0] [-e test] [-f namefile] [-F separator] [-m magicfiles] file...\n" \
" %s -C -m magicfiles\n"
#ifndef MAXPATHLEN
#define MAXPATHLEN 512
#endif
static int /* Global command-line options */
bflag = 0, /* brief output format */
nopad = 0, /* Don't pad output */
nobuffer = 0, /* Do not buffer stdout */
nulsep = 0; /* Append '\0' to the separator */
static const char *magicfile = 0; /* where the magic is */
static const char *default_magicfile = MAGIC;
static const char *separator = ":"; /* Default field separator */
extern char *__progname; /* used throughout */
static struct r_magic_set *magic;
static void unwrap(char *);
static void usage(void);
static void help(void);
int main(int, char *[]);
static void process(const char *, int);
static void load(const char *, int);
/*
* main - parse arguments and handle options
*/
int
main(int argc, char *argv[])
{
int c;
size_t i;
int action = 0, didsomefiles = 0, errflg = 0;
int flags = 0;
char *home, *usermagic;
struct stat sb;
static const char hmagic[] = "/.magic";
#define OPTSTRING "bcCde:f:F:hikLm:nNprsvz0"
int longindex;
static const struct option long_options[] =
{
#define OPT(shortname, longname, opt, doc) \
{longname, opt, NULL, shortname},
#define OPT_LONGONLY(longname, opt, doc) \
{longname, opt, NULL, 0},
#include "file_opts.h"
#undef OPT
#undef OPT_LONGONLY
{0, 0, NULL, 0}
};
static const struct {
const char *name;
int value;
} nv[] = {
{ "apptype", R_MAGIC_NO_CHECK_APPTYPE },
{ "ascii", R_MAGIC_NO_CHECK_ASCII },
{ "compress", R_MAGIC_NO_CHECK_COMPRESS },
// { "elf", R_MAGIC_NO_CHECK_ELF },
{ "soft", R_MAGIC_NO_CHECK_SOFT },
{ "tar", R_MAGIC_NO_CHECK_TAR },
{ "tokens", R_MAGIC_NO_CHECK_TOKENS },
};
/* makes islower etc work for other langs */
(void)setlocale(LC_CTYPE, "");
#ifdef __EMX__
/* sh-like wildcard expansion! Shouldn't hurt at least ... */
_wildcard(&argc, &argv);
#endif
magicfile = default_magicfile;
if ((usermagic = getenv("MAGIC")) != NULL)
magicfile = usermagic;
else
if ((home = getenv("HOME")) != NULL) {
size_t len = strlen(home) + sizeof(hmagic);
if ((usermagic = malloc(len)) != NULL) {
(void)strlcpy(usermagic, home, len);
(void)strlcat(usermagic, hmagic, len);
if (stat(usermagic, &sb)<0)
free(usermagic);
else
magicfile = usermagic;
}
}
#ifdef S_IFLNK
flags |= getenv("POSIXLY_CORRECT") ? R_MAGIC_SYMLINK : 0;
#endif
while ((c = getopt_long(argc, argv, OPTSTRING, long_options,
&longindex)) != -1)
switch (c) {
case 0 :
switch (longindex) {
case 0:
help();
break;
case 10:
flags |= R_MAGIC_MIME_TYPE;
break;
case 11:
flags |= R_MAGIC_MIME_ENCODING;
break;
}
break;
case '0':
nulsep = 1;
break;
case 'b':
bflag++;
break;
case 'c':
action = FILE_CHECK;
break;
case 'C':
action = FILE_COMPILE;
break;
case 'd':
flags |= R_MAGIC_DEBUG|R_MAGIC_CHECK;
break;
case 'e':
for (i = 0; i < sizeof(nv) / sizeof(nv[0]); i++)
if (strcmp(nv[i].name, optarg) == 0)
break;
if (i == sizeof(nv) / sizeof(nv[0]))
errflg++;
else
flags |= nv[i].value;
break;
case 'f':
if(action)
usage();
load(magicfile, flags);
unwrap(optarg);
++didsomefiles;
break;
case 'F':
separator = optarg;
break;
case 'i':
flags |= R_MAGIC_MIME;
break;
case 'k':
flags |= R_MAGIC_CONTINUE;
break;
case 'm':
magicfile = optarg;
break;
case 'n':
++nobuffer;
break;
case 'N':
++nopad;
break;
#if defined(HAVE_UTIME) || defined(HAVE_UTIMES)
case 'p':
flags |= R_MAGIC_PRESERVE_ATIME;
break;
#endif
case 'r':
flags |= R_MAGIC_RAW;
break;
case 's':
flags |= R_MAGIC_DEVICES;
break;
case 'v':
(void)fprintf(stderr, "%s-%d.%.2d\n", __progname,
FILE_VERSION_MAJOR, patchlevel);
(void)fprintf(stderr, "magic file from %s\n",
magicfile);
return 1;
case 'z':
flags |= R_MAGIC_COMPRESS;
break;
#ifdef S_IFLNK
case 'L':
flags |= R_MAGIC_SYMLINK;
break;
case 'h':
flags &= ~R_MAGIC_SYMLINK;
break;
#endif
case '?':
default:
errflg++;
break;
}
if (errflg) {
usage();
}
switch(action) {
case FILE_CHECK:
case FILE_COMPILE:
magic = r_magic_open(flags|R_MAGIC_CHECK);
if (magic == NULL) {
(void)fprintf(stderr, "%s: %s\n", __progname,
strerror(errno));
return 1;
}
c = action == FILE_CHECK ? r_magic_check(magic, magicfile) :
r_magic_compile(magic, magicfile);
if (c == -1) {
(void)fprintf(stderr, "%s: %s\n", __progname,
r_magic_error(magic));
return -1;
}
return 0;
default:
load(magicfile, flags);
break;
}
if (optind == argc) {
if (!didsomefiles) {
usage();
}
} else {
size_t j, wid, nw;
for (wid = 0, j = (size_t)optind; j < (size_t)argc; j++) {
nw = file_mbswidth(argv[j]);
if (nw > wid)
wid = nw;
}
/*
* If bflag is only set twice, set it depending on
* number of files [this is undocumented, and subject to change]
*/
if (bflag == 2) {
bflag = optind >= argc - 1;
}
for (; optind < argc; optind++)
process(argv[optind], wid);
}
c = magic->haderr ? 1 : 0;
r_magic_close(magic);
return c;
}
static void
/*ARGSUSED*/
load(const char *m, int flags)
{
if (magic || m == NULL)
return;
magic = r_magic_open(flags);
if (magic == NULL) {
(void)fprintf(stderr, "%s: %s\n", __progname, strerror(errno));
exit(1);
}
if (r_magic_load(magic, magicfile) == -1) {
(void)fprintf(stderr, "%s: %s\n",
__progname, r_magic_error(magic));
exit(1);
}
}
/*
* unwrap -- read a file of filenames, do each one.
*/
static void
unwrap(char *fn)
{
char buf[MAXPATHLEN];
FILE *f;
int wid = 0, cwid;
if (strcmp("-", fn) == 0) {
f = stdin;
wid = 1;
} else {
if ((f = fopen(fn, "r")) == NULL) {
(void)fprintf(stderr, "%s: Cannot open `%s' (%s).\n",
__progname, fn, strerror(errno));
exit(1);
}
while (fgets(buf, sizeof(buf), f) != NULL) {
buf[strcspn(buf, "\n")] = '\0';
cwid = file_mbswidth(buf);
if (cwid > wid)
wid = cwid;
}
rewind(f);
}
while (fgets(buf, sizeof(buf), f) != NULL) {
buf[strcspn(buf, "\n")] = '\0';
process(buf, wid);
if(nobuffer)
(void)fflush(stdout);
}
(void)fclose(f);
}
/*
* Called for each input file on the command line (or in a list of files)
*/
static void
process(const char *inname, int wid)
{
const char *type;
int std_in = strcmp(inname, "-") == 0;
if (wid > 0 && !bflag) {
(void)printf("%s", std_in ? "/dev/stdin" : inname);
if (nulsep)
(void)putc('\0', stdout);
else
(void)printf("%s", separator);
(void)printf("%*s ",
(int) (nopad ? 0 : (wid - file_mbswidth(inname))), "");
}
type = r_magic_file(magic, std_in ? NULL : inname);
if (type == NULL)
(void)printf("ERROR: %s\n", r_magic_error(magic));
else
(void)printf("%s\n", type);
}
size_t
file_mbswidth(const char *s)
{
#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
size_t bytesconsumed, old_n, n, width = 0;
mbstate_t state;
wchar_t nextchar;
(void)memset(&state, 0, sizeof(mbstate_t));
old_n = n = strlen(s);
int w;
while (n > 0) {
bytesconsumed = mbrtowc(&nextchar, s, n, &state);
if (bytesconsumed == (size_t)(-1) ||
bytesconsumed == (size_t)(-2)) {
/* Something went wrong, return something reasonable */
return old_n;
}
if (s[0] == '\n') {
/*
* do what strlen() would do, so that caller
* is always right
*/
width++;
} else {
w = wcwidth(nextchar);
if (w > 0)
width += w;
}
s += bytesconsumed, n -= bytesconsumed;
}
return width;
#else
return strlen(s);
#endif
}
static void
usage(void)
{
(void)fprintf(stderr, USAGE, __progname, __progname);
(void)fputs("Try `file --help' for more information.\n", stderr);
exit(1);
}
static void
help(void)
{
(void)fputs(
"Usage: file [OPTION...] [FILE...]\n"
"Determine type of FILEs.\n"
"\n", stderr);
#define OPT(shortname, longname, opt, doc) \
fprintf(stderr, " -%c, --" longname doc, shortname);
#define OPT_LONGONLY(longname, opt, doc) \
fprintf(stderr, " --" longname doc);
#include "file_opts.h"
#undef OPT
#undef OPT_LONGONLY
exit(0);
}

119
libr/magic/file.h Normal file
View File

@ -0,0 +1,119 @@
/* $OpenBSD: file.h,v 1.22 2009/10/27 23:59:37 deraadt Exp $ */
/*
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice immediately at the beginning of the file, without modification,
* this list of conditions, and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* file.h - definitions for file(1) program
* @(#)$Id: file.h,v 1.22 2009/10/27 23:59:37 deraadt Exp $
*/
#ifndef __file_h__
#define __file_h__
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <r_magic.h>
#include <stdio.h> /* Include that here, to make sure __P gets defined */
#include <errno.h>
#include <fcntl.h> /* For open and flags */
#include <inttypes.h> // TODO: use utX
#include <regex.h>
#include <sys/types.h>
/* Do this here and now, because struct stat gets re-defined on solaris */
#include <sys/stat.h>
#include <stdarg.h>
/* Type for Unicode characters */
typedef unsigned long unichar;
struct stat;
const char *file_fmttime(unsigned int, int);
int file_buffer(struct r_magic_set *, int, const char *, const void *,
size_t);
int file_fsmagic(struct r_magic_set *, const char *, struct stat *);
int file_pipe2file(struct r_magic_set *, int, const void *, size_t);
int file_printf(struct r_magic_set *, const char *, ...);
int file_reset(struct r_magic_set *);
int file_tryelf(struct r_magic_set *, int, const unsigned char *,
size_t);
int file_zmagic(struct r_magic_set *, int, const char *,
const unsigned char *, size_t);
int file_ascmagic(struct r_magic_set *, const unsigned char *, size_t);
int file_is_tar(struct r_magic_set *, const unsigned char *, size_t);
int file_softmagic(struct r_magic_set *, const unsigned char *, size_t, int);
struct mlist *file_apprentice(struct r_magic_set *, const char *, int);
ut64 file_signextend(struct r_magic_set *, struct magic *,
ut64);
void file_delmagic(struct magic *, int type, size_t entries);
void file_badread(struct r_magic_set *);
void file_badseek(struct r_magic_set *);
void file_oomem(struct r_magic_set *, size_t);
void file_error(struct r_magic_set *, int, const char *, ...);
void file_magerror(struct r_magic_set *, const char *, ...);
void file_magwarn(struct r_magic_set *, const char *, ...);
void file_mdump(struct magic *);
void file_showstr(FILE *, const char *, size_t);
size_t file_mbswidth(const char *);
const char *file_getbuffer(struct r_magic_set *);
ssize_t sread(int, void *, size_t, int);
int file_check_mem(struct r_magic_set *, unsigned int);
int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *);
#ifndef COMPILE_ONLY
extern const char *file_names[];
extern const size_t file_nnames;
#endif
#ifndef HAVE_STRERROR
extern int sys_nerr;
extern char *sys_errlist[];
#define strerror(e) \
(((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error")
#endif
#ifndef HAVE_STRTOUL
#define strtoul(a, b, c) strtol(a, b, c)
#endif
#ifndef HAVE_VASPRINTF
int vasprintf(char **ptr, const char *format_string, va_list vargs);
#endif
#ifndef HAVE_ASPRINTF
int asprintf(char **ptr, const char *format_string, ...);
#endif
#if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
#define QUICK
#endif
#ifndef O_BINARY
#define O_BINARY 0
#endif
#endif /* __file_h__ */

49
libr/magic/file_opts.h Normal file
View File

@ -0,0 +1,49 @@
/* $OpenBSD: file_opts.h,v 1.2 2009/04/26 14:17:45 chl Exp $ */
/*
* Table of command-line options
*
* The first column specifies the short name, if any, or 0 if none.
* The second column specifies the long name.
* The third column specifies whether it takes a parameter.
* The fourth column is the documentation.
*
* N.B. The long options' order must correspond to the code in file.c,
* and OPTSTRING must be kept up-to-date with the short options.
* Pay particular attention to the numbers of long-only options in the
* switch statement!
*/
OPT_LONGONLY("help", 0, " display this help and exit\n")
OPT('v', "version", 0, " output version information and exit\n")
OPT('m', "magic-file", 1, " LIST use LIST as a colon-separated list of magic\n"
" number files\n")
OPT('z', "uncompress", 0, " try to look inside compressed files\n")
OPT('b', "brief", 0, " do not prepend filenames to output lines\n")
OPT('c', "checking-printout", 0, " print the parsed form of the magic file, use in\n"
" conjunction with -m to debug a new magic file\n"
" before installing it\n")
OPT('e', "exclude", 1, " TEST exclude TEST from the list of test to be\n"
" performed for file. Valid tests are:\n"
" ascii, apptype, compress, elf, soft, tar, tokens, troff\n")
OPT('f', "files-from", 1, " FILE read the filenames to be examined from FILE\n")
OPT('F', "separator", 1, " STRING use string as separator instead of `:'\n")
OPT('i', "mime", 0, " output MIME type strings (--mime-type and\n"
" --mime-encoding)\n")
OPT_LONGONLY("mime-type", 0, " output the MIME type\n")
OPT_LONGONLY("mime-encoding", 0, " output the MIME encoding\n")
OPT('k', "keep-going", 0, " don't stop at the first match\n")
#ifdef S_IFLNK
OPT('L', "dereference", 0, " follow symlinks (default)\n")
OPT('h', "no-dereference", 0, " don't follow symlinks\n")
#endif
OPT('n', "no-buffer", 0, " do not buffer output\n")
OPT('N', "no-pad", 0, " do not pad output\n")
OPT('0', "print0", 0, " terminate filenames with ASCII NUL\n")
#if defined(HAVE_UTIME) || defined(HAVE_UTIMES)
OPT('p', "preserve-date", 0, " preserve access times on files\n")
#endif
OPT('r', "raw", 0, " don't translate unprintable chars to \\ooo\n")
OPT('s', "special-files", 0, " treat special (block/char devices) files as\n"
" ordinary ones\n")
OPT('C', "compile", 0, " compile file specified by -m\n")
OPT('d', "debug", 0, " print debugging messages\n")

309
libr/magic/fsmagic.c Normal file
View File

@ -0,0 +1,309 @@
/* $OpenBSD: fsmagic.c,v 1.14 2009/10/27 23:59:37 deraadt Exp $ */
/*
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice immediately at the beginning of the file, without modification,
* this list of conditions, and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* fsmagic - magic based on filesystem info - directory, special files, etc.
*/
#include <r_magic.h>
#include "file.h"
#include <string.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <stdlib.h>
#include <sys/stat.h>
/* Since major is a function on SVR4, we cannot use `ifndef major'. */
#ifdef MAJOR_IN_MKDEV
# include <sys/mkdev.h>
# define HAVE_MAJOR
#endif
#ifdef MAJOR_IN_SYSMACROS
# include <sys/sysmacros.h>
# define HAVE_MAJOR
#endif
#ifdef major /* Might be defined in sys/types.h. */
# define HAVE_MAJOR
#endif
#ifndef HAVE_MAJOR
# define major(dev) (((dev) >> 8) & 0xff)
# define minor(dev) ((dev) & 0xff)
#endif
#undef HAVE_MAJOR
static int
bad_link(struct r_magic_set *ms, int err, char *buf)
{
char *errfmt;
if (err == ELOOP)
errfmt = "symbolic link in a loop";
else
errfmt = "broken symbolic link to `%s'";
if (ms->flags & R_MAGIC_ERROR) {
file_error(ms, err, errfmt, buf);
return -1;
}
if (file_printf(ms, errfmt, buf) == -1)
return -1;
return 1;
}
int
file_fsmagic(struct r_magic_set *ms, const char *fn, struct stat *sb)
{
int ret = 0;
int mime = ms->flags & R_MAGIC_MIME;
#ifdef S_IFLNK
char buf[BUFSIZ+4];
int nch;
struct stat tstatbuf;
#endif
if (fn == NULL)
return 0;
/*
* Fstat is cheaper but fails for files you don't have read perms on.
* On 4.2BSD and similar systems, use lstat() to identify symlinks.
*/
#ifdef S_IFLNK
if ((ms->flags & R_MAGIC_SYMLINK) == 0)
ret = lstat(fn, sb);
else
#endif
ret = stat(fn, sb); /* don't merge into if; see "ret =" above */
if (ret) {
if (ms->flags & R_MAGIC_ERROR) {
file_error(ms, errno, "cannot stat `%s'", fn);
return -1;
}
if (file_printf(ms, "cannot open `%s' (%s)",
fn, strerror(errno)) == -1)
return -1;
return 1;
}
if (mime) {
if ((sb->st_mode & S_IFMT) != S_IFREG) {
if ((mime & R_MAGIC_MIME_TYPE) &&
file_printf(ms, "application/x-not-regular-file")
== -1)
return -1;
return 1;
}
}
else {
#ifdef S_ISUID
if (sb->st_mode & S_ISUID)
if (file_printf(ms, "setuid ") == -1)
return -1;
#endif
#ifdef S_ISGID
if (sb->st_mode & S_ISGID)
if (file_printf(ms, "setgid ") == -1)
return -1;
#endif
#ifdef S_ISVTX
if (sb->st_mode & S_ISVTX)
if (file_printf(ms, "sticky ") == -1)
return -1;
#endif
}
switch (sb->st_mode & S_IFMT) {
case S_IFDIR:
if (file_printf(ms, "directory") == -1)
return -1;
return 1;
#ifdef S_IFCHR
case S_IFCHR:
/*
* If -s has been specified, treat character special files
* like ordinary files. Otherwise, just report that they
* are block special files and go on to the next file.
*/
if ((ms->flags & R_MAGIC_DEVICES) != 0)
break;
#ifdef HAVE_STAT_ST_RDEV
# ifdef dv_unit
if (file_printf(ms, "character special (%d/%d/%d)",
major(sb->st_rdev), dv_unit(sb->st_rdev),
dv_subunit(sb->st_rdev)) == -1)
return -1;
# else
if (file_printf(ms, "character special (%ld/%ld)",
(long) major(sb->st_rdev), (long) minor(sb->st_rdev)) == -1)
return -1;
# endif
#else
if (file_printf(ms, "character special") == -1)
return -1;
#endif
return 1;
#endif
#ifdef S_IFBLK
case S_IFBLK:
/*
* If -s has been specified, treat block special files
* like ordinary files. Otherwise, just report that they
* are block special files and go on to the next file.
*/
if ((ms->flags & R_MAGIC_DEVICES) != 0)
break;
#ifdef HAVE_STAT_ST_RDEV
# ifdef dv_unit
if (file_printf(ms, "block special (%d/%d/%d)",
major(sb->st_rdev), dv_unit(sb->st_rdev),
dv_subunit(sb->st_rdev)) == -1)
return -1;
# else
if (file_printf(ms, "block special (%ld/%ld)",
(long)major(sb->st_rdev), (long)minor(sb->st_rdev)) == -1)
return -1;
# endif
#else
if (file_printf(ms, "block special") == -1)
return -1;
#endif
return 1;
#endif
/* TODO add code to handle V7 MUX and Blit MUX files */
#ifdef S_IFIFO
case S_IFIFO:
if((ms->flags & R_MAGIC_DEVICES) != 0)
break;
if (file_printf(ms, "fifo (named pipe)") == -1)
return -1;
return 1;
#endif
#ifdef S_IFDOOR
case S_IFDOOR:
if (file_printf(ms, "door") == -1)
return -1;
return 1;
#endif
#ifdef S_IFLNK
case S_IFLNK:
if ((nch = readlink(fn, buf, BUFSIZ-1)) <= 0) {
if (ms->flags & R_MAGIC_ERROR) {
file_error(ms, errno, "unreadable symlink `%s'",
fn);
return -1;
}
if (file_printf(ms,
"unreadable symlink `%s' (%s)", fn,
strerror(errno)) == -1)
return -1;
return 1;
}
buf[nch] = '\0'; /* readlink(2) does not do this */
/* If broken symlink, say so and quit early. */
if (*buf == '/') {
if (stat(buf, &tstatbuf) < 0)
return bad_link(ms, errno, buf);
} else {
char *tmp;
char buf2[BUFSIZ+BUFSIZ+4];
if ((tmp = strrchr(fn, '/')) == NULL) {
tmp = buf; /* in current directory anyway */
} else {
if (tmp - fn + 1 > BUFSIZ) {
if (ms->flags & R_MAGIC_ERROR) {
file_error(ms, 0,
"path too long: `%s'", buf);
return -1;
}
if (file_printf(ms,
"path too long: `%s'", fn) == -1)
return -1;
return 1;
}
(void)strlcpy(buf2, fn, sizeof buf2); /* take dir part */
buf2[tmp - fn + 1] = '\0';
(void)strlcat(buf2, buf, sizeof buf2); /* plus (rel) link */
tmp = buf2;
}
if (stat(tmp, &tstatbuf) < 0)
return bad_link(ms, errno, buf);
}
/* Otherwise, handle it. */
if ((ms->flags & R_MAGIC_SYMLINK) != 0) {
const char *p;
ms->flags &= R_MAGIC_SYMLINK;
p = r_magic_file(ms, buf);
ms->flags |= R_MAGIC_SYMLINK;
return p != NULL ? 1 : -1;
} else { /* just print what it points to */
if (file_printf(ms, "symbolic link to `%s'",
buf) == -1)
return -1;
}
return 1;
#endif
#ifdef S_IFSOCK
#ifndef __COHERENT__
case S_IFSOCK:
if (file_printf(ms, "socket") == -1)
return -1;
return 1;
#endif
#endif
case S_IFREG:
break;
default:
file_error(ms, 0, "invalid mode 0%o", sb->st_mode);
return -1;
/*NOTREACHED*/
}
/*
* regular file, check next possibility
*
* If stat() tells us the file has zero length, report here that
* the file is empty, so we can skip all the work of opening and
* reading the file.
* But if the -s option has been given, we skip this optimization,
* since on some systems, stat() reports zero size for raw disk
* partitions. (If the block special device really has zero length,
* the fact that it is empty will be detected and reported correctly
* when we read the file.)
*/
if ((ms->flags & R_MAGIC_DEVICES) == 0 && sb->st_size == 0) {
if ((!mime || (mime & R_MAGIC_MIME_TYPE)) &&
file_printf(ms, mime ? "application/x-empty" :
"empty") == -1)
return -1;
return 1;
}
return 0;
}

312
libr/magic/funcs.c Normal file
View File

@ -0,0 +1,312 @@
/* $OpenBSD: funcs.c,v 1.7 2009/10/27 23:59:37 deraadt Exp $ */
/*
* Copyright (c) Christos Zoulas 2003.
* All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice immediately at the beginning of the file, without modification,
* this list of conditions, and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include "file.h"
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#if defined(HAVE_WCHAR_H)
#include <wchar.h>
#endif
#if defined(HAVE_WCTYPE_H)
#include <wctype.h>
#endif
/*
* Like printf, only we append to a buffer.
*/
int
file_printf(struct r_magic_set *ms, const char *fmt, ...)
{
va_list ap;
int len;
char *buf, *newstr;
va_start(ap, fmt);
len = vasprintf(&buf, fmt, ap);
if (len < 0)
goto out;
va_end(ap);
if (ms->o.buf != NULL) {
len = asprintf(&newstr, "%s%s", ms->o.buf, buf);
free(buf);
if (len < 0)
goto out;
free(ms->o.buf);
buf = newstr;
}
ms->o.buf = buf;
return 0;
out:
file_error(ms, errno, "vasprintf failed");
return -1;
}
/*
* error - print best error message possible
*/
/*VARARGS*/
static void
file_error_core(struct r_magic_set *ms, int error, const char *f, va_list va,
uint32_t lineno)
{
/* Only the first error is ok */
if (ms->haderr)
return;
if (lineno != 0) {
free(ms->o.buf);
ms->o.buf = NULL;
file_printf(ms, "line %u: ", lineno);
}
file_printf(ms, f, va);
if (error > 0)
file_printf(ms, " (%s)", strerror(error));
ms->haderr++;
ms->error = error;
}
/*VARARGS*/
void
file_error(struct r_magic_set *ms, int error, const char *f, ...)
{
va_list va;
va_start(va, f);
file_error_core(ms, error, f, va, 0);
va_end(va);
}
/*
* Print an error with magic line number.
*/
/*VARARGS*/
void
file_magerror(struct r_magic_set *ms, const char *f, ...)
{
va_list va;
va_start(va, f);
file_error_core(ms, 0, f, va, ms->line);
va_end(va);
}
void
file_oomem(struct r_magic_set *ms, size_t len)
{
file_error(ms, errno, "cannot allocate %zu bytes", len);
}
void
file_badseek(struct r_magic_set *ms)
{
file_error(ms, errno, "error seeking");
}
void
file_badread(struct r_magic_set *ms)
{
file_error(ms, errno, "error reading");
}
#ifndef COMPILE_ONLY
int
file_buffer(struct r_magic_set *ms, int fd, const char *inname, const void *buf,
size_t nb)
{
int m;
int mime = ms->flags & R_MAGIC_MIME;
if (nb == 0) {
if ((!mime || (mime & R_MAGIC_MIME_TYPE)) &&
file_printf(ms, mime ? "application/x-empty" :
"empty") == -1)
return -1;
return 1;
} else if (nb == 1) {
if ((!mime || (mime & R_MAGIC_MIME_TYPE)) &&
file_printf(ms, mime ? "application/octet-stream" :
"very short file (no magic)") == -1)
return -1;
return 1;
}
#ifdef __EMX__
if ((ms->flags & R_MAGIC_NO_CHECK_APPTYPE) == 0 && inname) {
switch (file_os2_apptype(ms, inname, buf, nb)) {
case -1:
return -1;
case 0:
break;
default:
return 1;
}
}
#endif
/* try compression stuff */
if ((ms->flags & R_MAGIC_NO_CHECK_COMPRESS) != 0 ||
(m = file_zmagic(ms, fd, inname, buf, nb)) == 0) {
/* Check if we have a tar file */
if ((ms->flags & R_MAGIC_NO_CHECK_TAR) != 0 ||
(m = file_is_tar(ms, buf, nb)) == 0) {
/* try tests in /etc/magic (or surrogate magic file) */
if ((ms->flags & R_MAGIC_NO_CHECK_SOFT) != 0 ||
(m = file_softmagic(ms, buf, nb, BINTEST)) == 0) {
/* try known keywords, check whether it is ASCII */
if ((ms->flags & R_MAGIC_NO_CHECK_ASCII) != 0 ||
(m = file_ascmagic(ms, buf, nb)) == 0) {
/* abandon hope, all ye who remain here */
if ((!mime || (mime & R_MAGIC_MIME_TYPE)) &&
file_printf(ms, mime ? "application/octet-stream" :
"data") == -1)
return -1;
m = 1;
}
}
}
}
return m;
}
#endif
int
file_reset(struct r_magic_set *ms)
{
if (ms->mlist == NULL) {
file_error(ms, 0, "no magic files loaded");
return -1;
}
ms->o.buf = NULL;
ms->haderr = 0;
ms->error = -1;
return 0;
}
#define OCTALIFY(n, o) \
/*LINTED*/ \
(void)(*(n)++ = '\\', \
*(n)++ = (((uint32_t)*(o) >> 6) & 3) + '0', \
*(n)++ = (((uint32_t)*(o) >> 3) & 7) + '0', \
*(n)++ = (((uint32_t)*(o) >> 0) & 7) + '0', \
(o)++)
const char *
file_getbuffer(struct r_magic_set *ms)
{
char *pbuf, *op, *np;
size_t psize, len;
if (ms->haderr)
return NULL;
if (ms->flags & R_MAGIC_RAW)
return ms->o.buf;
/* * 4 is for octal representation, + 1 is for NUL */
len = strlen(ms->o.buf);
if (len > (SIZE_MAX - 1) / 4) {
file_oomem(ms, len);
return NULL;
}
psize = len * 4 + 1;
if ((pbuf = realloc(ms->o.pbuf, psize)) == NULL) {
file_oomem(ms, psize);
return NULL;
}
ms->o.pbuf = pbuf;
#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
{
mbstate_t state;
wchar_t nextchar;
int mb_conv = 1;
size_t bytesconsumed;
char *eop;
(void)memset(&state, 0, sizeof(mbstate_t));
np = ms->o.pbuf;
op = ms->o.buf;
eop = op + len;
while (op < eop) {
bytesconsumed = mbrtowc(&nextchar, op,
(size_t)(eop - op), &state);
if (bytesconsumed == (size_t)(-1) ||
bytesconsumed == (size_t)(-2)) {
mb_conv = 0;
break;
}
if (iswprint(nextchar)) {
(void)memcpy(np, op, bytesconsumed);
op += bytesconsumed;
np += bytesconsumed;
} else {
while (bytesconsumed-- > 0)
OCTALIFY(np, op);
}
}
*np = '\0';
/* Parsing succeeded as a multi-byte sequence */
if (mb_conv != 0)
return ms->o.pbuf;
}
#endif
for (np = ms->o.pbuf, op = ms->o.buf; *op; op++) {
if (isprint((unsigned char)*op)) {
*np++ = *op;
} else {
OCTALIFY(np, op);
}
}
*np = '\0';
return ms->o.pbuf;
}
int
file_check_mem(struct r_magic_set *ms, unsigned int level)
{
size_t len;
if (level >= ms->c.len) {
len = (ms->c.len += 20) * sizeof(*ms->c.li);
ms->c.li = (ms->c.li == NULL) ? malloc(len) :
realloc(ms->c.li, len);
if (ms->c.li == NULL) {
file_oomem(ms, len);
return -1;
}
}
ms->c.li[level].got_match = 0;
#ifdef ENABLE_CONDITIONALS
ms->c.li[level].last_match = 0;
ms->c.li[level].last_cond = COND_NONE;
#endif /* ENABLE_CONDITIONALS */
return 0;
}

152
libr/magic/is_tar.c Normal file
View File

@ -0,0 +1,152 @@
/* $OpenBSD: is_tar.c,v 1.10 2009/10/27 23:59:37 deraadt Exp $ */
/*
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice immediately at the beginning of the file, without modification,
* this list of conditions, and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* is_tar() -- figure out whether file is a tar archive.
*
* Stolen (by the author!) from the public domain tar program:
* Public Domain version written 26 Aug 1985 John Gilmore (ihnp4!hoptoad!gnu).
*
* @(#)list.c 1.18 9/23/86 Public Domain - gnu
*
* Comments changed and some code/comments reformatted
* for file command by Ian Darwin.
*/
#include "file.h"
#include <string.h>
#include <ctype.h>
#include <sys/types.h>
#include "tar.h"
#define isodigit(c) ( ((c) >= '0') && ((c) <= '7') )
static int is_tar(const unsigned char *, size_t);
static int from_oct(int, const char *); /* Decode octal number */
static const char tartype[][32] = {
"tar archive",
"POSIX tar archive",
"POSIX tar archive (GNU)",
};
int
file_is_tar(struct r_magic_set *ms, const unsigned char *buf, size_t nbytes)
{
/*
* Do the tar test first, because if the first file in the tar
* archive starts with a dot, we can confuse it with an nroff file.
*/
int tar = is_tar(buf, nbytes);
int mime = ms->flags & R_MAGIC_MIME;
if (tar < 1 || tar > 3)
return 0;
if (mime == R_MAGIC_MIME_ENCODING)
return 0;
if (file_printf(ms, mime ? "application/x-tar" :
tartype[tar - 1]) == -1)
return -1;
return 1;
}
/*
* Return
* 0 if the checksum is bad (i.e., probably not a tar archive),
* 1 for old UNIX tar file,
* 2 for Unix Std (POSIX) tar file,
* 3 for GNU tar file.
*/
static int
is_tar(const unsigned char *buf, size_t nbytes)
{
const union record *header = (const union record *)(const void *)buf;
int i;
int sum, recsum;
const char *p;
if (nbytes < sizeof(union record))
return 0;
recsum = from_oct(8, header->header.chksum);
sum = 0;
p = header->charptr;
for (i = sizeof(union record); --i >= 0;) {
/*
* We cannot use unsigned char here because of old compilers,
* e.g. V7.
*/
sum += 0xFF & *p++;
}
/* Adjust checksum to count the "chksum" field as blanks. */
for (i = sizeof(header->header.chksum); --i >= 0;)
sum -= 0xFF & header->header.chksum[i];
sum += ' '* sizeof header->header.chksum;
if (sum != recsum)
return 0; /* Not a tar archive */
if (strcmp(header->header.magic, GNUTMAGIC) == 0)
return 3; /* GNU Unix Standard tar archive */
if (strcmp(header->header.magic, TMAGIC) == 0)
return 2; /* Unix Standard tar archive */
return 1; /* Old fashioned tar archive */
}
/*
* Quick and dirty octal conversion.
*
* Result is -1 if the field is invalid (all blank, or nonoctal).
*/
static int
from_oct(int digs, const char *where)
{
int value;
while (isspace((unsigned char)*where)) { /* Skip spaces */
where++;
if (--digs <= 0)
return -1; /* All blank field */
}
value = 0;
while (digs > 0 && isodigit(*where)) { /* Scan til nonoctal */
value = (value << 3) | (*where++ - '0');
--digs;
}
if (digs > 0 && *where && !isspace((unsigned char)*where))
return -1; /* Ended on non-space/nul */
return value;
}

395
libr/magic/magic.c Normal file
View File

@ -0,0 +1,395 @@
/* $OpenBSD: magic.c,v 1.8 2009/10/27 23:59:37 deraadt Exp $ */
/*
* Copyright (c) Christos Zoulas 2003.
* All Rights Reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice immediately at the beginning of the file, without modification,
* this list of conditions, and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/types.h>
#include <sys/param.h> /* for MAXPATHLEN */
#include <sys/stat.h>
#include "file.h"
#include <r_magic.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#ifdef QUICK
#include <sys/mman.h>
#endif
#include <limits.h> /* for PIPE_BUF */
#if defined(HAVE_UTIMES)
# include <sys/time.h>
#elif defined(HAVE_UTIME)
# if defined(HAVE_SYS_UTIME_H)
# include <sys/utime.h>
# elif defined(HAVE_UTIME_H)
# include <utime.h>
# endif
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h> /* for read() */
#endif
#ifdef HAVE_LOCALE_H
#include <locale.h>
#endif
#include <netinet/in.h> /* for byte swapping */
#include "patchlevel.h"
#ifndef PIPE_BUF
/* Get the PIPE_BUF from pathconf */
#ifdef _PC_PIPE_BUF
#define PIPE_BUF pathconf(".", _PC_PIPE_BUF)
#else
#define PIPE_BUF 512
#endif
#endif
#ifdef __EMX__
static char *apptypeName = NULL;
int file_os2_apptype(struct r_magic_set *ms, const char *fn,
const void *buf, size_t nb);
#endif /* __EMX__ */
static void free_mlist(struct mlist *);
static void close_and_restore(const struct r_magic_set *, const char *, int,
const struct stat *);
static int info_from_stat(struct r_magic_set *, mode_t);
#ifndef COMPILE_ONLY
static const char *file_or_fd(struct r_magic_set *, const char *, int);
#endif
#ifndef STDIN_FILENO
#define STDIN_FILENO 0
#endif
public struct r_magic_set *
r_magic_open(int flags)
{
struct r_magic_set *ms;
if ((ms = calloc((size_t)1, sizeof(struct r_magic_set))) == NULL)
return NULL;
if (r_magic_setflags(ms, flags) == -1) {
errno = EINVAL;
goto free;
}
ms->o.buf = ms->o.pbuf = NULL;
ms->c.li = malloc((ms->c.len = 10) * sizeof(*ms->c.li));
if (ms->c.li == NULL)
goto free;
ms->haderr = 0;
ms->error = -1;
ms->mlist = NULL;
ms->file = "unknown";
ms->line = 0;
return ms;
free:
free(ms);
return NULL;
}
static void
free_mlist(struct mlist *mlist)
{
struct mlist *ml;
if (mlist == NULL)
return;
for (ml = mlist->next; ml != mlist;) {
struct mlist *next = ml->next;
struct magic *mg = ml->magic;
file_delmagic(mg, ml->mapped, ml->nmagic);
free(ml);
ml = next;
}
free(ml);
}
static int
info_from_stat(struct r_magic_set *ms, mode_t md)
{
/* We cannot open it, but we were able to stat it. */
if (md & 0222)
if (file_printf(ms, "writable, ") == -1)
return -1;
if (md & 0111)
if (file_printf(ms, "executable, ") == -1)
return -1;
if (S_ISREG(md))
if (file_printf(ms, "regular file, ") == -1)
return -1;
if (file_printf(ms, "no read permission") == -1)
return -1;
return 0;
}
public void
r_magic_close(struct r_magic_set *ms)
{
free_mlist(ms->mlist);
free(ms->o.pbuf);
free(ms->o.buf);
free(ms->c.li);
free(ms);
}
/*
* load a magic file
*/
public int
r_magic_load(struct r_magic_set *ms, const char *magicfile)
{
struct mlist *ml = file_apprentice(ms, magicfile, FILE_LOAD);
if (ml) {
free_mlist(ms->mlist);
ms->mlist = ml;
return 0;
}
return -1;
}
public int
r_magic_compile(struct r_magic_set *ms, const char *magicfile)
{
struct mlist *ml = file_apprentice(ms, magicfile, FILE_COMPILE);
free_mlist(ml);
return ml ? 0 : -1;
}
public int
r_magic_check(struct r_magic_set *ms, const char *magicfile)
{
struct mlist *ml = file_apprentice(ms, magicfile, FILE_CHECK);
free_mlist(ml);
return ml ? 0 : -1;
}
static void
close_and_restore(const struct r_magic_set *ms, const char *name, int fd,
const struct stat *sb)
{
if (fd == STDIN_FILENO)
return;
(void) close(fd);
if ((ms->flags & R_MAGIC_PRESERVE_ATIME) != 0) {
/*
* Try to restore access, modification times if read it.
* This is really *bad* because it will modify the status
* time of the file... And of course this will affect
* backup programs
*/
#ifdef HAVE_UTIMES
struct timeval utsbuf[2];
(void)memset(utsbuf, 0, sizeof(utsbuf));
utsbuf[0].tv_sec = sb->st_atime;
utsbuf[1].tv_sec = sb->st_mtime;
(void) utimes(name, utsbuf); /* don't care if loses */
#elif defined(HAVE_UTIME_H) || defined(HAVE_SYS_UTIME_H)
struct utimbuf utbuf;
(void)memset(utbuf, 0, sizeof(utbuf));
utbuf.actime = sb->st_atime;
utbuf.modtime = sb->st_mtime;
(void) utime(name, &utbuf); /* don't care if loses */
#endif
}
}
#ifndef COMPILE_ONLY
/*
* find type of descriptor
*/
public const char *
r_magic_descriptor(struct r_magic_set *ms, int fd)
{
return file_or_fd(ms, NULL, fd);
}
/*
* find type of named file
*/
public const char *
r_magic_file(struct r_magic_set *ms, const char *inname)
{
return file_or_fd(ms, inname, STDIN_FILENO);
}
static const char *
file_or_fd(struct r_magic_set *ms, const char *inname, int fd)
{
int rv = -1;
unsigned char *buf;
struct stat sb;
ssize_t nbytes = 0; /* number of bytes read from a datafile */
int ispipe = 0;
/*
* one extra for terminating '\0', and
* some overlapping space for matches near EOF
*/
#define SLOP (1 + sizeof(union VALUETYPE))
if ((buf = malloc(HOWMANY + SLOP)) == NULL)
return NULL;
if (file_reset(ms) == -1)
goto done;
switch (file_fsmagic(ms, inname, &sb)) {
case -1: /* error */
goto done;
case 0: /* nothing found */
break;
default: /* matched it and printed type */
rv = 0;
goto done;
}
if (inname == NULL) {
if (fstat(fd, &sb) == 0 && S_ISFIFO(sb.st_mode))
ispipe = 1;
} else {
int flags = O_RDONLY|O_BINARY;
if (stat(inname, &sb) == 0 && S_ISFIFO(sb.st_mode)) {
flags |= O_NONBLOCK;
ispipe = 1;
}
errno = 0;
if ((fd = open(inname, flags)) < 0) {
#ifdef __CYGWIN__
/* FIXME: Do this with EXEEXT from autotools */
char *tmp = alloca(strlen(inname) + 5);
(void)strcat(strcpy(tmp, inname), ".exe");
if ((fd = open(tmp, flags)) < 0) {
#endif
fprintf(stderr, "couldn't open file\n");
if (info_from_stat(ms, sb.st_mode) == -1)
goto done;
rv = 0;
goto done;
#ifdef __CYGWIN__
}
#endif
}
#ifdef O_NONBLOCK
if ((flags = fcntl(fd, F_GETFL)) != -1) {
flags &= ~O_NONBLOCK;
(void)fcntl(fd, F_SETFL, flags);
}
#endif
}
/*
* try looking at the first HOWMANY bytes
*/
if (ispipe) {
ssize_t r = 0;
while ((r = sread(fd, (void *)&buf[nbytes],
(size_t)(HOWMANY - nbytes), 1)) > 0) {
nbytes += r;
if (r < PIPE_BUF) break;
}
if (nbytes == 0) {
/* We can not read it, but we were able to stat it. */
if (info_from_stat(ms, sb.st_mode) == -1)
goto done;
rv = 0;
goto done;
}
} else {
if ((nbytes = read(fd, (char *)buf, HOWMANY)) == -1) {
file_error(ms, errno, "cannot read `%s'", inname);
goto done;
}
}
(void)memset(buf + nbytes, 0, SLOP); /* NUL terminate */
if (file_buffer(ms, fd, inname, buf, (size_t)nbytes) == -1)
goto done;
rv = 0;
done:
free(buf);
close_and_restore(ms, inname, fd, &sb);
return rv == 0 ? file_getbuffer(ms) : NULL;
}
public const char *
r_magic_buffer(struct r_magic_set *ms, const void *buf, size_t nb)
{
if (file_reset(ms) == -1)
return NULL;
/*
* The main work is done here!
* We have the file name and/or the data buffer to be identified.
*/
if (file_buffer(ms, -1, NULL, buf, nb) == -1) {
return NULL;
}
return file_getbuffer(ms);
}
#endif
public const char *
r_magic_error(struct r_magic_set *ms)
{
return ms->haderr ? ms->o.buf : NULL;
}
public int
r_magic_errno(struct r_magic_set *ms)
{
return ms->haderr ? ms->error : 0;
}
public int
r_magic_setflags(struct r_magic_set *ms, int flags)
{
#if !defined(HAVE_UTIME) && !defined(HAVE_UTIMES)
if (flags & R_MAGIC_PRESERVE_ATIME)
return -1;
#endif
ms->flags = flags;
return 0;
}

174
libr/magic/names.h Normal file
View File

@ -0,0 +1,174 @@
/* $OpenBSD: names.h,v 1.8 2009/04/24 18:54:34 chl Exp $ */
/*
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice immediately at the beginning of the file, without modification,
* this list of conditions, and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Names.h - names and types used by ascmagic in file(1).
* These tokens are here because they can appear anywhere in
* the first HOWMANY bytes, while tokens in MAGIC must
* appear at fixed offsets into the file. Don't make HOWMANY
* too high unless you have a very fast CPU.
*
* $Id: names.h,v 1.8 2009/04/24 18:54:34 chl Exp $
*/
/*
modified by Chris Lowth - 9 April 2000
to add mime type strings to the types table.
*/
/* these types are used to index the table 'types': keep em in sync! */
#define L_C 0 /* first and foremost on UNIX */
#define L_CC 1 /* Bjarne's postincrement */
#define L_MAKE 2 /* Makefiles */
#define L_PLI 3 /* PL/1 */
#define L_MACH 4 /* some kinda assembler */
#define L_ENG 5 /* English */
#define L_PAS 6 /* Pascal */
#define L_MAIL 7 /* Electronic mail */
#define L_NEWS 8 /* Usenet Netnews */
#define L_JAVA 9 /* Java code */
#define L_HTML 10 /* HTML */
#define L_BCPL 11 /* BCPL */
#define L_M4 12 /* M4 */
#define L_PO 13 /* PO */
static const struct {
char human[48];
char mime[16];
} types[] = {
{ "C program", "text/x-c", },
{ "C++ program", "text/x-c++" },
{ "make commands", "text/x-makefile" },
{ "PL/1 program", "text/x-pl1" },
{ "assembler program", "text/x-asm" },
{ "English", "text/plain" },
{ "Pascal program", "text/x-pascal" },
{ "mail", "text/x-mail" },
{ "news", "text/x-news" },
{ "Java program", "text/x-java" },
{ "HTML document", "text/html", },
{ "BCPL program", "text/x-bcpl" },
{ "M4 macro language pre-processor", "text/x-m4" },
{ "PO (gettext message catalogue)", "text/x-po" },
{ "cannot happen error on names.h/types", "error/x-error" }
};
/*
* XXX - how should we distinguish Java from C++?
* The trick used in a Debian snapshot, of having "extends" or "implements"
* as tags for Java, doesn't work very well, given that those keywords
* are often preceded by "class", which flags it as C++.
*
* Perhaps we need to be able to say
*
* If "class" then
*
* if "extends" or "implements" then
* Java
* else
* C++
* endif
*
* Or should we use other keywords, such as "package" or "import"?
* Unfortunately, Ada95 uses "package", and Modula-3 uses "import",
* although I infer from the language spec at
*
* http://www.research.digital.com/SRC/m3defn/html/m3.html
*
* that Modula-3 uses "IMPORT" rather than "import", i.e. it must be
* in all caps.
*
* So, for now, we go with "import". We must put it before the C++
* stuff, so that we don't misidentify Java as C++. Not using "package"
* means we won't identify stuff that defines a package but imports
* nothing; hopefully, very little Java code imports nothing (one of the
* reasons for doing OO programming is to import as much as possible
* and write only what you need to, right?).
*
* Unfortunately, "import" may cause us to misidentify English text
* as Java, as it comes after "the" and "The". Perhaps we need a fancier
* heuristic to identify Java?
*/
static const struct names {
char name[14];
short type;
} names[] = {
/* These must be sorted by eye for optimal hit rate */
/* Add to this list only after substantial meditation */
{"msgid", L_PO},
{"dnl", L_M4},
{"import", L_JAVA},
{"\"libhdr\"", L_BCPL},
{"\"LIBHDR\"", L_BCPL},
{"//", L_CC},
{"template", L_CC},
{"virtual", L_CC},
{"class", L_CC},
{"public:", L_CC},
{"private:", L_CC},
{"/*", L_C}, /* must precede "The", "the", etc. */
{"#include", L_C},
{"char", L_C},
{"The", L_ENG},
{"the", L_ENG},
{"double", L_C},
{"extern", L_C},
{"float", L_C},
{"struct", L_C},
{"union", L_C},
{"CFLAGS", L_MAKE},
{"LDFLAGS", L_MAKE},
{"all:", L_MAKE},
{".PRECIOUS", L_MAKE},
{".ascii", L_MACH},
{".asciiz", L_MACH},
{".byte", L_MACH},
{".even", L_MACH},
{".globl", L_MACH},
{".text", L_MACH},
{"clr", L_MACH},
{"(input,", L_PAS},
{"program", L_PAS},
{"record", L_PAS},
{"dcl", L_PLI},
{"Received:", L_MAIL},
{">From", L_MAIL},
{"Return-Path:",L_MAIL},
{"Cc:", L_MAIL},
{"Newsgroups:", L_NEWS},
{"Path:", L_NEWS},
{"Organization:",L_NEWS},
{"href=", L_HTML},
{"HREF=", L_HTML},
{"<body", L_HTML},
{"<BODY", L_HTML},
{"<html", L_HTML},
{"<HTML", L_HTML},
{"<!--", L_HTML},
};
#define NNAMES (sizeof(names)/sizeof(struct names))

348
libr/magic/patchlevel.h Normal file
View File

@ -0,0 +1,348 @@
/* $OpenBSD: patchlevel.h,v 1.9 2009/04/24 18:54:34 chl Exp $ */
#define FILE_VERSION_MAJOR 4
#define patchlevel 24
/*
* Patchlevel file for Ian Darwin's MAGIC command.
* $File: patchlevel.h,v 1.68 2008/03/22 21:39:43 christos Exp $
*
* $Log: patchlevel.h,v $
* Revision 1.9 2009/04/24 18:54:34 chl
* file update to 4.24
*
* The '-i' switch is now enabled so file(1) can output mime type strings.
*
* ok ian@
* builk ports build test on amd64 by jasper@
*
* ok ray@ gilles@ on a almost identical diff
* builk ports build test on sparc64 on this almost identical diff by ajacoutot@
* also tested by landry@
*
* Revision 1.68 2008/03/22 21:39:43 christos
* file 4.24
*
* Revision 1.67 2007/12/28 20:08:40 christos
* welcome to 4.23.
*
* Revision 1.66 2007/12/27 16:38:24 christos
* welcome to 4.22
*
* Revision 1.65 2007/05/24 17:22:27 christos
* Welcome to 4.21
*
* Revision 1.64 2007/03/01 22:14:55 christos
* welcome to 4.20
*
* Revision 1.63 2007/01/12 17:38:28 christos
* Use File id.
*
* Revision 1.62 2006/12/11 21:49:58 christos
* time for 4.19
*
* Revision 1.61 2006/10/31 21:18:09 christos
* bump
*
* Revision 1.60 2006/03/02 22:15:12 christos
* welcome to 4.17
*
* Revision 1.59 2005/10/17 17:15:21 christos
* welcome to 4.16
*
* Revision 1.58 2005/08/18 15:52:56 christos
* welcome to 4.15
*
* Revision 1.57 2005/06/25 15:52:14 christos
* Welcome to 4.14
*
* Revision 1.56 2005/02/09 19:25:13 christos
* Welcome to 4.13
*
* Revision 1.55 2004/11/24 18:57:47 christos
* Re-do the autoconf stuff once more; passes make dist now.
*
* Revision 1.54 2004/11/21 05:52:05 christos
* ready for 4.11
*
* Revision 1.53 2004/07/24 20:40:46 christos
* welcome to 4.10
*
* Revision 1.52 2004/04/07 00:32:25 christos
* welcome to 4.09
*
* Revision 1.51 2004/03/22 21:17:11 christos
* welcome to 4.08.
*
* Revision 1.50 2003/12/23 17:34:04 christos
* 4.07
*
* Revision 1.49 2003/10/15 02:08:27 christos
* welcome to 4.06
*
* Revision 1.48 2003/09/12 19:41:14 christos
* this is 4.04
*
* Revision 1.47 2003/05/23 21:38:21 christos
* welcome to 4.03
*
* Revision 1.46 2003/04/02 18:57:43 christos
* prepare for 4.02
*
* Revision 1.45 2003/03/26 15:37:25 christos
* - Pass lint
* - make NULL in magic_file mean stdin
* - Fix "-" argument to file to pass NULL to magic_file
* - avoid pointer casts by using memcpy
* - rename magic_buf -> magic_buffer
* - keep only the first error
* - manual page: new sentence, new line
* - fix typo in api function (magic_buf -> magic_buffer)
*
* Revision 1.44 2003/03/23 22:23:31 christos
* finish librarification.
*
* Revision 1.43 2003/03/23 21:16:26 christos
* update copyrights.
*
* Revision 1.42 2003/03/23 04:06:05 christos
* Library re-organization
*
* Revision 1.41 2003/02/27 20:53:45 christos
* - fix memory allocation problem (Jeff Johnson)
* - fix stack overflow corruption (David Endler)
* - fixes from NetBSD source (Antti Kantee)
* - magic fixes
*
* Revision 1.40 2003/02/08 18:33:53 christos
* - detect inttypes.h too (Dave Love <d.love@dl.ac.uk>)
* - eliminate unsigned char warnings (Petter Reinholdtsen <pere@hungry.com>)
* - better elf PT_NOTE handling (Nalin Dahyabhai <nalin@redhat.com>)
* - add options to format the output differently
* - much more magic.
*
* Revision 1.39 2002/07/03 18:57:52 christos
* - ansify/c99ize
* - more magic
* - better COMPILE_ONLY support.
* - new magic files.
* - fix solaris compilation problems.
*
* Revision 1.38 2002/05/16 18:45:56 christos
* - pt_note elf additions from NetBSD
* - EMX os specific changes (Alexander Mai)
* - stdint.h detection, acconfig.h fixes (Maciej W. Rozycki, Franz Korntner)
* - regex file additions (Kim Cromie)
* - getopt_long support and misc cleanups (Michael Piefel)
* - many magic fixes and additions
*
* Revision 1.37 2001/09/03 14:44:22 christos
* daylight/tm_isdst detection
* magic fixes
* don't eat the whole file if it has only nulls
*
* Revision 1.36 2001/07/22 21:04:15 christos
* - magic fixes
* - add new operators, pascal strings, UTC date printing, $HOME/.magic
* [from "Tom N Harris" <telliamed@mac.com>]
*
* Revision 1.35 2001/04/24 14:40:25 christos
* - rename magic file sgi to mips and fix it
* - add support for building magic.mgc
* - portability fixes for mmap()
* - try gzip before uncompress, because uncompress sometimes hangs
* - be more conservative about pipe reads and writes
* - many magic fixes
*
* Revision 1.34 2001/03/12 05:05:57 christos
* - new compiled magic format
* - lots of magic additions
*
* Revision 1.33 2000/11/13 00:30:50 christos
* - wordperfect magic fix: freebsd pr 9388
* - more msdos fixes from freebsd pr's 20131 and 20812
* - sas and spss magic [Bruce Foster]
* - mkinstalldirs [John Fremlin]
* - sgi opengl fixes [Michael Pruett]
* - netbsd magic fixes [Ignatios Souvatzis]
* - audio additions [Michael Pruett]
* - fix problem with non ansi RCSID [Andreas Ley]
* - oggs magic [Felix von Leitner]
* - gmon magic [Eugen Dedu]
* - TNEF magic [Joomy]
* - netpbm magic and misc other image stuff [Bryan Henderson]
*
* Revision 1.32 2000/08/05 18:24:18 christos
* Correct indianness detection in elf (Charles Hannum)
* FreeBSD elf core support (Guy Harris)
* Use gzip in systems that don't have uncompress (Anthon van der Neut)
* Internationalization/EBCDIC support (Eric Fisher)
* Many many magic changes
*
* Revision 1.31 2000/05/14 17:58:36 christos
* - new magic for claris files
* - new magic for mathematica and maple files
* - new magic for msvc files
* - new -k flag to keep going matching all possible entries
* - add the word executable on #! magic files, and fix the usage of
* the word script
* - lots of other magic fixes
* - fix typo test -> text
*
* Revision 1.30 2000/04/11 02:41:17 christos
* - add support for mime output (-i)
* - make sure we free memory in case realloc fails
* - magic fixes
*
* Revision 1.29 1999/11/28 20:02:29 christos
* new string/[Bcb] magic from anthon, and adjustments to the magic files to
* use it.
*
* Revision 1.28 1999/10/31 22:11:48 christos
* - add "char" type for compatibility with HP/UX
* - recognize HP/UX syntax &=n etc.
* - include errno.h for CYGWIN
* - conditionalize the S_IS* macros
* - revert the SHT_DYNSYM test that broke the linux stripped binaries test
* - lots of Magdir changes
*
* Revision 1.27 1999/02/14 17:21:41 christos
* Automake support and misc cleanups from Rainer Orth
* Enable reading character and block special files from Dale R. Worley
*
* Revision 1.26 1998/09/12 13:19:39 christos
* - add support for bi-endian indirect offsets (Richard Verhoeven)
* - add recognition for bcpl (Joseph Myers)
* - remove non magic files from Magdir to avoid difficulties building
* on os2 where files are case independent
* - magic fixes.
*
* Revision 1.25 1998/06/27 14:04:04 christos
* OLF patch Guy Harris
* Recognize java/html (debian linux)
* Const poisoning (debian linux)
* More magic!
*
* Revision 1.24 1998/02/15 23:20:38 christos
* Autoconf patch: Felix von Leitner <leitner@math.fu-berlin.de>
* More magic fixes
* Elf64 fixes
*
* Revision 1.23 1997/11/05 16:03:37 christos
* - correct elf prps offset for SunOS-2.5.1 [guy@netapp.com]
* - handle 64 bit time_t's correctly [ewt@redhat.com]
* - new mime style magic [clarosse@netvista.net]
* - new TI calculator magic [rmcguire@freenet.columbus.oh.us]
* - new figlet fonts [obrien@freebsd.org]
* - new cisco magic, and elf fixes [jhawk@bbnplanet.com]
* - -b flag addition, and x86 filesystem magic [vax@linkhead.paranoia.com]
* - s/Mpeg/MPEG, header and elf typo fixes [guy@netapp.com]
* - Windows/NT registry files, audio code [guy@netapp.com]
* - libGrx graphics lib fonts [guy@netapp.com]
* - PNG fixes [guy@netapp.com]
* - more m$ document magic [guy@netapp.com]
* - PPD files [guy@netapp.com]
* - archive magic cleanup [guy@netapp.com]
* - linux kernel magic cleanup [guy@netapp.com]
* - lecter magic [guy@netapp.com]
* - vgetty magic [guy@netapp.com]
* - sniffer additions [guy@netapp.com]
*
* Revision 1.22 1997/01/15 17:23:24 christos
* - add support for elf core files: find the program name under SVR4 [Ken Pizzini]
* - print strings only up to the first carriage return [various]
* - freebsd international ascii support [J Wunsch]
* - magic fixes and additions [Guy Harris]
* - 64 bit fixes [Larry Schwimmer]
* - support for both utime and utimes, but don't restore file access times
* by default [various]
* - \xXX only takes 2 hex digits, not 3.
* - re-implement support for core files [Guy Harris]
*
* Revision 1.21 1996/10/05 18:15:29 christos
* Segregate elf stuff and conditionally enable it with -DBUILTIN_ELF
* More magic fixes
*
* Revision 1.20 1996/06/22 22:15:52 christos
* - support relative offsets of the form >&
* - fix bug with truncating magic strings that contain \n
* - file -f - did not read from stdin as documented
* - support elf file parsing using our own elf support.
* - as always magdir fixes and additions.
*
* Revision 1.19 1995/10/27 23:14:46 christos
* Ability to parse colon separated list of magic files
* New LEGAL.NOTICE
* Various magic file changes
*
* Revision 1.18 1995/05/20 22:09:21 christos
* Passed incorrect argument to eatsize().
* Use %ld and %lx where appropriate.
* Remove unused variables
* ELF support for both big and little endian
* Fixes for small files again.
*
* Revision 1.17 1995/04/28 17:29:13 christos
* - Incorrect nroff detection fix from der Mouse
* - Lost and incorrect magic entries.
* - Added ELF stripped binary detection [in C; ugh]
* - Look for $MAGIC to find the magic file.
* - Eat trailing size specifications from numbers i.e. ignore 10L
* - More fixes for very short files
*
* Revision 1.16 1995/03/25 22:06:45 christos
* - use strtoul() where it exists.
* - fix sign-extend bug
* - try to detect tar archives before nroff files, otherwise
* tar files where the first file starts with a . will not work
*
* Revision 1.15 1995/01/21 21:03:35 christos
* Added CSECTION for the file man page
* Added version flag -v
* Fixed bug with -f input flag (from iorio@violet.berkeley.edu)
* Lots of magic fixes and reorganization...
*
* Revision 1.14 1994/05/03 17:58:23 christos
* changes from mycroft@gnu.ai.mit.edu (Charles Hannum) for unsigned
*
* Revision 1.13 1994/01/21 01:27:01 christos
* Fixed null termination bug from Don Seeley at BSDI in ascmagic.c
*
* Revision 1.12 1993/10/27 20:59:05 christos
* Changed -z flag to understand gzip format too.
* Moved builtin compression detection to a table, and move
* the compress magic entry out of the source.
* Made printing of numbers unsigned, and added the mask to it.
* Changed the buffer size to 8k, because gzip will refuse to
* unzip just a few bytes.
*
* Revision 1.11 1993/09/24 18:49:06 christos
* Fixed small bug in softmagic.c introduced by
* copying the data to be examined out of the input
* buffer. Changed the Makefile to use sed to create
* the correct man pages.
*
* Revision 1.10 1993/09/23 21:56:23 christos
* Passed purify. Fixed indirections. Fixed byte order printing.
* Fixed segmentation faults caused by referencing past the end
* of the magic buffer. Fixed bus errors caused by referencing
* unaligned shorts or longs.
*
* Revision 1.9 1993/03/24 14:23:40 ian
* Batch of minor changes from several contributors.
*
* Revision 1.8 93/02/19 15:01:26 ian
* Numerous changes from Guy Harris too numerous to mention but including
* byte-order independance, fixing "old-style masking", etc. etc. A bugfix
* for broken symlinks from martin@@d255s004.zfe.siemens.de.
*
* Revision 1.7 93/01/05 14:57:27 ian
* Couple of nits picked by Christos (again, thanks).
*
* Revision 1.6 93/01/05 13:51:09 ian
* Lotsa work on the Magic directory.
*
* Revision 1.5 92/09/14 14:54:51 ian
* Fix a tiny null-pointer bug in previous fix for tar archive + uncompress.
*
*/

233
libr/magic/print.c Normal file
View File

@ -0,0 +1,233 @@
/* $OpenBSD: print.c,v 1.16 2009/10/27 23:59:37 deraadt Exp $ */
/*
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice immediately at the beginning of the file, without modification,
* this list of conditions, and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* print.c - debugging printout routines
*/
#include "file.h"
#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <stdarg.h>
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <time.h>
#define SZOF(a) (sizeof(a) / sizeof(a[0]))
#ifndef COMPILE_ONLY
void
file_mdump(struct magic *m)
{
static const char optyp[] = { FILE_OPS };
(void) fprintf(stderr, "[%u", m->lineno);
(void) fprintf(stderr, ">>>>>>>> %u" + 8 - (m->cont_level & 7),
m->offset);
if (m->flag & INDIR) {
(void) fprintf(stderr, "(%s,",
/* Note: type is unsigned */
(m->in_type < file_nnames) ?
file_names[m->in_type] : "*bad*");
if (m->in_op & FILE_OPINVERSE)
(void) fputc('~', stderr);
(void) fprintf(stderr, "%c%u),",
((m->in_op & FILE_OPS_MASK) < SZOF(optyp)) ?
optyp[m->in_op & FILE_OPS_MASK] : '?',
m->in_offset);
}
(void) fprintf(stderr, " %s%s", (m->flag & UNSIGNED) ? "u" : "",
/* Note: type is unsigned */
(m->type < file_nnames) ? file_names[m->type] : "*bad*");
if (m->mask_op & FILE_OPINVERSE)
(void) fputc('~', stderr);
if (IS_STRING(m->type)) {
if (m->str_flags) {
(void) fputc('/', stderr);
if (m->str_flags & STRING_COMPACT_BLANK)
(void) fputc(CHAR_COMPACT_BLANK, stderr);
if (m->str_flags & STRING_COMPACT_OPTIONAL_BLANK)
(void) fputc(CHAR_COMPACT_OPTIONAL_BLANK,
stderr);
if (m->str_flags & STRING_IGNORE_LOWERCASE)
(void) fputc(CHAR_IGNORE_LOWERCASE, stderr);
if (m->str_flags & STRING_IGNORE_UPPERCASE)
(void) fputc(CHAR_IGNORE_UPPERCASE, stderr);
if (m->str_flags & REGEX_OFFSET_START)
(void) fputc(CHAR_REGEX_OFFSET_START, stderr);
}
if (m->str_range)
(void) fprintf(stderr, "/%u", m->str_range);
}
else {
if ((m->mask_op & FILE_OPS_MASK) < SZOF(optyp))
(void) fputc(optyp[m->mask_op & FILE_OPS_MASK], stderr);
else
(void) fputc('?', stderr);
if (m->num_mask) {
(void) fprintf(stderr, "%.8llx",
(unsigned long long)m->num_mask);
}
}
(void) fprintf(stderr, ",%c", m->reln);
if (m->reln != 'x') {
switch (m->type) {
case FILE_BYTE:
case FILE_SHORT:
case FILE_LONG:
case FILE_LESHORT:
case FILE_LELONG:
case FILE_MELONG:
case FILE_BESHORT:
case FILE_BELONG:
(void) fprintf(stderr, "%d", m->value.l);
break;
case FILE_BEQUAD:
case FILE_LEQUAD:
case FILE_QUAD:
(void) fprintf(stderr, "%lld",
(unsigned long long)m->value.q);
break;
case FILE_PSTRING:
case FILE_STRING:
case FILE_REGEX:
case FILE_BESTRING16:
case FILE_LESTRING16:
case FILE_SEARCH:
file_showstr(stderr, m->value.s, (size_t)m->vallen);
break;
case FILE_DATE:
case FILE_LEDATE:
case FILE_BEDATE:
case FILE_MEDATE:
(void)fprintf(stderr, "%s,",
file_fmttime(m->value.l, 1));
break;
case FILE_LDATE:
case FILE_LELDATE:
case FILE_BELDATE:
case FILE_MELDATE:
(void)fprintf(stderr, "%s,",
file_fmttime(m->value.l, 0));
break;
case FILE_QDATE:
case FILE_LEQDATE:
case FILE_BEQDATE:
(void)fprintf(stderr, "%s,",
file_fmttime((uint32_t)m->value.q, 1));
break;
case FILE_QLDATE:
case FILE_LEQLDATE:
case FILE_BEQLDATE:
(void)fprintf(stderr, "%s,",
file_fmttime((uint32_t)m->value.q, 0));
break;
case FILE_FLOAT:
case FILE_BEFLOAT:
case FILE_LEFLOAT:
(void) fprintf(stderr, "%G", m->value.f);
break;
case FILE_DOUBLE:
case FILE_BEDOUBLE:
case FILE_LEDOUBLE:
(void) fprintf(stderr, "%G", m->value.d);
break;
case FILE_DEFAULT:
/* XXX - do anything here? */
break;
default:
(void) fputs("*bad*", stderr);
break;
}
}
(void) fprintf(stderr, ",\"%s\"]\n", m->desc);
}
#endif
/*VARARGS*/
void
file_magwarn(struct r_magic_set *ms, const char *f, ...)
{
va_list va;
/* cuz we use stdout for most, stderr here */
(void) fflush(stdout);
if (ms->file)
(void) fprintf(stderr, "%s, %lu: ", ms->file,
(unsigned long)ms->line);
(void) fprintf(stderr, "Warning: ");
va_start(va, f);
(void) vfprintf(stderr, f, va);
va_end(va);
(void) fputc('\n', stderr);
}
const char *
file_fmttime(uint32_t v, int local)
{
char *pp;
time_t t = (time_t)v;
struct tm *tm;
if (local) {
pp = ctime(&t);
} else {
#ifndef HAVE_DAYLIGHT
static int daylight = 0;
#ifdef HAVE_TM_ISDST
static time_t now = (time_t)0;
if (now == (time_t)0) {
struct tm *tm1;
(void)time(&now);
tm1 = localtime(&now);
if (tm1 == NULL)
return "*Invalid time*";
daylight = tm1->tm_isdst;
}
#endif /* HAVE_TM_ISDST */
#endif /* HAVE_DAYLIGHT */
if (daylight)
t += 3600;
tm = gmtime(&t);
if (tm == NULL)
return "*Invalid time*";
pp = asctime(tm);
}
pp[strcspn(pp, "\n")] = '\0';
return pp;
}

1820
libr/magic/softmagic.c Normal file

File diff suppressed because it is too large Load Diff

74
libr/magic/tar.h Normal file
View File

@ -0,0 +1,74 @@
/* $OpenBSD: tar.h,v 1.7 2009/04/24 18:54:34 chl Exp $ */
/*
* Copyright (c) Ian F. Darwin 1986-1995.
* Software written by Ian F. Darwin and others;
* maintained 1995-present by Christos Zoulas and others.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice immediately at the beginning of the file, without modification,
* this list of conditions, and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Header file for public domain tar (tape archive) program.
*
* @(#)tar.h 1.20 86/10/29 Public Domain.
*
* Created 25 August 1985 by John Gilmore, ihnp4!hoptoad!gnu.
*
* $Id: tar.h,v 1.7 2009/04/24 18:54:34 chl Exp $ # checkin only
*/
/*
* Header block on tape.
*
* I'm going to use traditional DP naming conventions here.
* A "block" is a big chunk of stuff that we do I/O on.
* A "record" is a piece of info that we care about.
* Typically many "record"s fit into a "block".
*/
#define RECORDSIZE 512
#define NAMSIZ 100
#define TUNMLEN 32
#define TGNMLEN 32
union record {
char charptr[RECORDSIZE];
struct header {
char name[NAMSIZ];
char mode[8];
char uid[8];
char gid[8];
char size[12];
char mtime[12];
char chksum[8];
char linkflag;
char linkname[NAMSIZ];
char magic[8];
char uname[TUNMLEN];
char gname[TGNMLEN];
char devmajor[8];
char devminor[8];
} header;
};
/* The magic field is filled with this if uname and gname are valid. */
#define TMAGIC "ustar" /* 5 chars and a null */
#define GNUTMAGIC "ustar " /* 7 chars and a null */

11
pkgcfg/r_magic.pc.acr Normal file
View File

@ -0,0 +1,11 @@
prefix=@PREFIX@
exec_prefix=${prefix}
libdir=@LIBDIR@
includedir=${prefix}/include
Name: r_magic
Description: radare foundation libraries
Version: @VERSION@
Requires:
Libs: -L${libdir} -lr_magic
Cflags: -I${includedir}/libr