mirror of
https://git.hardenedbsd.org/hardenedbsd/HardenedBSD.git
synced 2024-12-26 21:13:11 +01:00
bsdgrep: add a primitive literal matcher
fgrep/grep -F will error out at runtime if compiled with a regex(3) that does not define REG_NOSPEC or REG_LITERAL. glibc is one such regex(3) implementation, and as it turns out they don't support literal matching at all. Provide a primitive literal matcher for use with glibc and other implementations that don't support literal matching so that we don't completely lose fgrep/grep -F if compiled against libgnuregex on stable/10, stable/11, or other systems that we don't necessarily support. This is a wholly unoptimized implementation with no plans to optimize it as of now. This is due to both its use-case being primarily on unsupported systems in the near-distant future and that it's reinventing the wheel that we already have available as a feature of regex(3). Reviewed by: cem, emaste, ngie Approved by: emaste (mentor) MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D12056
This commit is contained in:
parent
c552f48b6f
commit
05ad821531
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=322826
@ -720,12 +720,19 @@ main(int argc, char *argv[])
|
||||
case GREP_BASIC:
|
||||
break;
|
||||
case GREP_FIXED:
|
||||
/*
|
||||
* regex(3) implementations that support fixed-string searches generally
|
||||
* define either REG_NOSPEC or REG_LITERAL. Set the appropriate flag
|
||||
* here. If neither are defined, GREP_FIXED later implies that the
|
||||
* internal literal matcher should be used. Other cflags that have
|
||||
* the same interpretation as REG_NOSPEC and REG_LITERAL should be
|
||||
* similarly added here, and grep.h should be amended to take this into
|
||||
* consideration when defining WITH_INTERNAL_NOSPEC.
|
||||
*/
|
||||
#if defined(REG_NOSPEC)
|
||||
cflags |= REG_NOSPEC;
|
||||
#elif defined(REG_LITERAL)
|
||||
cflags |= REG_LITERAL;
|
||||
#else
|
||||
errx(2, "literal expressions not supported at compile time");
|
||||
#endif
|
||||
break;
|
||||
case GREP_EXTENDED:
|
||||
@ -742,7 +749,11 @@ main(int argc, char *argv[])
|
||||
r_pattern = grep_calloc(patterns, sizeof(*r_pattern));
|
||||
|
||||
/* Don't process any patterns if we have a blank one */
|
||||
#ifdef WITH_INTERNAL_NOSPEC
|
||||
if (!matchall && grepbehave != GREP_FIXED) {
|
||||
#else
|
||||
if (!matchall) {
|
||||
#endif
|
||||
/* Check if cheating is allowed (always is for fgrep). */
|
||||
for (i = 0; i < patterns; ++i) {
|
||||
#ifndef WITHOUT_FASTMATCH
|
||||
|
@ -57,6 +57,10 @@ extern const char *errstr[];
|
||||
#define GREP_BASIC 1
|
||||
#define GREP_EXTENDED 2
|
||||
|
||||
#if !defined(REG_NOSPEC) && !defined(REG_LITERAL)
|
||||
#define WITH_INTERNAL_NOSPEC
|
||||
#endif
|
||||
|
||||
#define BINFILE_BIN 0
|
||||
#define BINFILE_SKIP 1
|
||||
#define BINFILE_TEXT 2
|
||||
|
@ -70,7 +70,10 @@ struct parsec {
|
||||
bool binary; /* Binary file? */
|
||||
};
|
||||
|
||||
|
||||
#ifdef WITH_INTERNAL_NOSPEC
|
||||
static int litexec(const struct pat *pat, const char *string,
|
||||
size_t nmatch, regmatch_t pmatch[]);
|
||||
#endif
|
||||
static int procline(struct parsec *pc);
|
||||
static void printline(struct parsec *pc, int sep);
|
||||
static void printline_metadata(struct str *line, int sep);
|
||||
@ -350,6 +353,67 @@ procfile(const char *fn)
|
||||
return (c);
|
||||
}
|
||||
|
||||
#ifdef WITH_INTERNAL_NOSPEC
|
||||
/*
|
||||
* Internal implementation of literal string search within a string, modeled
|
||||
* after regexec(3), for use when the regex(3) implementation doesn't offer
|
||||
* either REG_NOSPEC or REG_LITERAL. This does not apply in the default FreeBSD
|
||||
* config, but in other scenarios such as building against libgnuregex or on
|
||||
* some non-FreeBSD OSes.
|
||||
*/
|
||||
static int
|
||||
litexec(const struct pat *pat, const char *string, size_t nmatch,
|
||||
regmatch_t pmatch[])
|
||||
{
|
||||
char *(*strstr_fn)(const char *, const char *);
|
||||
char *sub, *subject;
|
||||
const char *search;
|
||||
size_t idx, n, ofs, stringlen;
|
||||
|
||||
if (cflags & REG_ICASE)
|
||||
strstr_fn = strcasestr;
|
||||
else
|
||||
strstr_fn = strstr;
|
||||
idx = 0;
|
||||
ofs = pmatch[0].rm_so;
|
||||
stringlen = pmatch[0].rm_eo;
|
||||
if (ofs >= stringlen)
|
||||
return (REG_NOMATCH);
|
||||
subject = strndup(string, stringlen);
|
||||
if (subject == NULL)
|
||||
return (REG_ESPACE);
|
||||
for (n = 0; ofs < stringlen;) {
|
||||
search = (subject + ofs);
|
||||
if ((unsigned long)pat->len > strlen(search))
|
||||
break;
|
||||
sub = strstr_fn(search, pat->pat);
|
||||
/*
|
||||
* Ignoring the empty string possibility due to context: grep optimizes
|
||||
* for empty patterns and will never reach this point.
|
||||
*/
|
||||
if (sub == NULL)
|
||||
break;
|
||||
++n;
|
||||
/* Fill in pmatch if necessary */
|
||||
if (nmatch > 0) {
|
||||
pmatch[idx].rm_so = ofs + (sub - search);
|
||||
pmatch[idx].rm_eo = pmatch[idx].rm_so + pat->len;
|
||||
if (++idx == nmatch)
|
||||
break;
|
||||
ofs = pmatch[idx].rm_so + 1;
|
||||
} else
|
||||
/* We only needed to know if we match or not */
|
||||
break;
|
||||
}
|
||||
free(subject);
|
||||
if (n > 0 && nmatch > 0)
|
||||
for (n = idx; n < nmatch; ++n)
|
||||
pmatch[n].rm_so = pmatch[n].rm_eo = -1;
|
||||
|
||||
return (n > 0 ? 0 : REG_NOMATCH);
|
||||
}
|
||||
#endif /* WITH_INTERNAL_NOSPEC */
|
||||
|
||||
#define iswword(x) (iswalnum((x)) || (x) == L'_')
|
||||
|
||||
/*
|
||||
@ -400,6 +464,11 @@ procline(struct parsec *pc)
|
||||
for (i = 0; i < patterns; i++) {
|
||||
pmatch.rm_so = st;
|
||||
pmatch.rm_eo = pc->ln.len;
|
||||
#ifdef WITH_INTERNAL_NOSPEC
|
||||
if (grepbehave == GREP_FIXED)
|
||||
r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch);
|
||||
else
|
||||
#endif
|
||||
#ifndef WITHOUT_FASTMATCH
|
||||
if (fg_pattern[i].pattern)
|
||||
r = fastexec(&fg_pattern[i],
|
||||
|
Loading…
Reference in New Issue
Block a user