mirror of
https://git.hardenedbsd.org/hardenedbsd/HardenedBSD.git
synced 2024-11-17 16:10:46 +01:00
Add another matching algorithhm to do heuristics for international
language text files. Should finally close PR # bin/1925: file does not consider cyrillic text..., though i've never got any response from the originator about my suggestion. While i was at it, also move out the `magic' file to /usr/share/misc, there's nothing that magic with this file to justify its life under /etc.
This commit is contained in:
parent
09ab8202c0
commit
97857d5a23
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=20323
@ -1,6 +1,6 @@
|
||||
# Makefile for file(1) cmd.
|
||||
# Copyright (c) Ian F. Darwin 86/09/01 - see LEGAL.NOTICE.
|
||||
# @(#)$Id: Makefile,v 1.4 1995/07/25 00:36:03 bde Exp $
|
||||
# @(#)$Id: Makefile,v 1.5 1996/08/17 22:27:08 wosch Exp $
|
||||
#
|
||||
# This software is not subject to any license of the American Telephone
|
||||
# and Telegraph Company or of the Regents of the University of California.
|
||||
@ -23,7 +23,7 @@
|
||||
# 4. This notice may not be removed or altered.
|
||||
#
|
||||
# Hacked and dismembered for bmake (Geoff Rehmet).
|
||||
MAGIC= /etc/magic
|
||||
MAGIC= /usr/share/misc/magic
|
||||
MAGICOWN= bin
|
||||
MAGICGRP= bin
|
||||
MAGICMODE= 444
|
||||
@ -33,7 +33,7 @@ CFLAGS+= -DMAGIC='"$(MAGIC)"'
|
||||
|
||||
PROG= file
|
||||
SRCS= file.c apprentice.c fsmagic.c softmagic.c ascmagic.c \
|
||||
compress.c is_tar.c print.c
|
||||
compress.c is_tar.c print.c international.c
|
||||
|
||||
MAN1= file.1
|
||||
MAN5= magic.5
|
||||
@ -51,7 +51,7 @@ magic: $(MAGFILES)
|
||||
cat $(MAGFILES) > $(.TARGET)
|
||||
|
||||
# called from /usr/src/etc/Makefile
|
||||
etc-magic:
|
||||
beforeinstall:
|
||||
${INSTALL} -c -o $(MAGICOWN) -g $(MAGICGRP) -m $(MAGICMODE) magic \
|
||||
$(DESTDIR)$(MAGIC)
|
||||
|
||||
|
@ -26,7 +26,7 @@
|
||||
*/
|
||||
#ifndef lint
|
||||
static char *moduleid =
|
||||
"@(#)$Id: file.c,v 1.2 1995/05/30 06:30:01 rgrimes Exp $";
|
||||
"@(#)$Id: file.c,v 1.3 1996/01/23 12:40:11 mpp Exp $";
|
||||
#endif /* lint */
|
||||
|
||||
#include <stdio.h>
|
||||
@ -343,6 +343,10 @@ int nb, zflag;
|
||||
if (ascmagic(buf, nb))
|
||||
return 'a';
|
||||
|
||||
/* see if it's international language text */
|
||||
if (internatmagic(buf, nb))
|
||||
return 'i';
|
||||
|
||||
/* abandon hope, all ye who remain here */
|
||||
ckfputs("data", stdout);
|
||||
return '\0';
|
||||
|
@ -1,6 +1,6 @@
|
||||
/*
|
||||
* file.h - definitions for file(1) program
|
||||
* @(#)$Id: file.h,v 1.2 1995/05/30 06:30:02 rgrimes Exp $
|
||||
* @(#)$Id: file.h,v 1.3 1996/01/23 12:40:13 mpp Exp $
|
||||
*
|
||||
* Copyright (c) Ian F. Darwin, 1987.
|
||||
* Written by Ian F. Darwin.
|
||||
@ -87,6 +87,7 @@ extern void error __P((const char *, ...));
|
||||
extern void ckfputs __P((const char *, FILE *));
|
||||
struct stat;
|
||||
extern int fsmagic __P((const char *, struct stat *));
|
||||
extern int internatmagic __P((unsigned char *, int));
|
||||
extern int is_compress __P((const unsigned char *, int *));
|
||||
extern int is_tar __P((unsigned char *, int));
|
||||
extern void magwarn __P((const char *, ...));
|
||||
|
72
usr.bin/file/international.c
Normal file
72
usr.bin/file/international.c
Normal file
@ -0,0 +1,72 @@
|
||||
#include "file.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#define F 0
|
||||
#define T 1
|
||||
|
||||
/*
|
||||
* List of characters that look "reasonable" in international
|
||||
* language texts. That's almost all characters :), except a
|
||||
* few in the control range of ASCII (all the known international
|
||||
* charactersets share the bottom half with ASCII).
|
||||
*/
|
||||
static char maybe_internat[256] = {
|
||||
F, F, F, F, F, F, F, F, T, T, T, T, T, T, F, F, /* 0x0X */
|
||||
F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x8X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x9X */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xaX */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xbX */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xcX */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xdX */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0xeX */
|
||||
T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T /* 0xfX */
|
||||
};
|
||||
|
||||
/* Maximal length of a line we consider "reasonable". */
|
||||
#define MAXLINELEN 300
|
||||
|
||||
int
|
||||
internatmagic(buf, nbytes)
|
||||
unsigned char *buf;
|
||||
int nbytes;
|
||||
{
|
||||
int i;
|
||||
unsigned char *cp;
|
||||
|
||||
nbytes--;
|
||||
|
||||
/* First, look whether there are "unreasonable" characters. */
|
||||
for (i = 0, cp = buf; i < nbytes; i++, cp++)
|
||||
if (!maybe_internat[*cp])
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Now, look whether the file consists of lines of
|
||||
* "reasonable" length.
|
||||
*/
|
||||
|
||||
for (i = 0; i < nbytes;) {
|
||||
cp = memchr(buf, '\n', nbytes - i);
|
||||
if (cp == NULL) {
|
||||
/* Don't fail if we hit the end of buffer. */
|
||||
if (i + MAXLINELEN >= nbytes)
|
||||
break;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
if (cp - buf > MAXLINELEN)
|
||||
return 0;
|
||||
i += (cp - buf + 1);
|
||||
buf = cp + 1;
|
||||
}
|
||||
ckfputs("International language text", stdout);
|
||||
return 1;
|
||||
}
|
Loading…
Reference in New Issue
Block a user