awk: Merge in bsd-feature branch of OTA from 20240422 (31bb33a32f71)

In the last 2nd edition import, I mistakenly grabbed from the 'main'
branch of upstream rather than the bsd-feature branch. This means that
we have a regression in awk from that point forward: all the
BSD-specific bit functions (and a few others) were dropped. This
restores it at the same level.

MFC After:		1 day
Sponsored by:		Netflix
This commit is contained in:
Warner Losh 2024-05-14 12:15:43 -06:00
commit eb690a0576
9 changed files with 377 additions and 3 deletions

View File

@ -47,6 +47,30 @@
* test/T.lilly: Remove gawk warnings from output, improves
portability.
2019-10-17 Arnold D. Robbins <arnold@skeeve.com>
Pull in systime() and strftime() from the NetBSD awk.
* awk.1: Document the functions.
* run.c (bltin): Implement the functions.
* awk.h: Add defines for systime and strftime.
* lex.c: Add support for systime and strftime.
2019-10-07 Arnold D. Robbins <arnold@skeeve.com>
Integrate features from different *BSD versions of awk.
Gensub support from NetBSD. Bitwise functions from OpenBSD.
* awk.h: Add defines for and, or, xor, compl, lshift and rshift.
* awkgram.y: Add support for gensub.
* maketab.c: Ditto.
* lex.c: Add support for gensub and bitwise functions.
* parse.c (node5, op5): New functions.
* proto.h (node5, op5): New declarations.
* run.c (bltin): Implement the bitwise functions.
(gensub): New function.
* awk.1: Document additional functions.
2019-10-07 Arnold D. Robbins <arnold@skeeve.com>
* b.c (fnematch): Change type of pbuf from unsigned char to char.

View File

@ -305,6 +305,25 @@ and
.B gsub
return the number of replacements.
.TP
\fBgensub(\fIpat\fB, \fIrepl\fB, \fIhow\fR [\fB, \fItarget\fR]\fB)\fR
replaces instances of
.I pat
in
.I target
with
.IR repl .
If
.I how
is \fB"g"\fR or \fB"G"\fR, do so globally. Otherwise,
.I how
is a number indicating which occurrence to replace. If no
.IR target ,
use
.BR $0 .
Return the resulting string;
.I target
is not modified.
.TP
.BI sprintf( fmt , " expr" , " ...\fB)
the string resulting from formatting
.I expr ...
@ -313,6 +332,28 @@ according to the
format
.IR fmt .
.TP
.B systime()
returns the current date and time as a standard
``seconds since the epoch'' value.
.TP
.BI strftime( fmt ", " timestamp\^ )
formats
.I timestamp
(a value in seconds since the epoch)
according to
.IR fmt ,
which is a format string as supported by
.IR strftime (3).
Both
.I timestamp
and
.I fmt
may be omitted; if no
.IR timestamp ,
the current time of day is used, and if no
.IR fmt ,
a default format of \fB"%a %b %e %H:%M:%S %Z %Y"\fR is used.
.TP
.BI system( cmd )
executes
.I cmd
@ -372,6 +413,17 @@ In all cases,
returns 1 for a successful input,
0 for end of file, and \-1 for an error.
.PP
The functions
.BR compl ,
.BR and ,
.BR or ,
.BR xor ,
.BR lshift ,
and
.B rshift
peform the corresponding bitwise operations on their
operands, which are first truncated to integer.
.PP
Patterns are arbitrary Boolean combinations
(with
.BR "! || &&" )

View File

@ -154,6 +154,14 @@ extern Cell *symtabloc; /* SYMTAB */
#define FTOUPPER 12
#define FTOLOWER 13
#define FFLUSH 14
#define FAND 15
#define FFOR 16
#define FXOR 17
#define FCOMPL 18
#define FLSHIFT 19
#define FRSHIFT 20
#define FSYSTIME 21
#define FSTRFTIME 22
/* Node: parse tree is made of nodes, with Cell's at bottom */

View File

@ -53,7 +53,7 @@ Node *arglist = 0; /* list of args for current function */
%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
%token <i> AND BOR APPEND EQ GE GT LE LT NE IN
%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
%token <i> GENSUB SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
%token <i> ADD MINUS MULT DIVIDE MOD
%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
%token <i> PRINT PRINTF SPRINTF
@ -377,6 +377,24 @@ term:
| INCR var { $$ = op1(PREINCR, $2); }
| var DECR { $$ = op1(POSTDECR, $1); }
| var INCR { $$ = op1(POSTINCR, $1); }
| GENSUB '(' reg_expr comma pattern comma pattern ')'
{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, rectonode()); }
| GENSUB '(' pattern comma pattern comma pattern ')'
{ if (constnode($3)) {
$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3), 1), $5, $7, rectonode());
free($3);
} else
$$ = op5(GENSUB, (Node *)1, $3, $5, $7, rectonode());
}
| GENSUB '(' reg_expr comma pattern comma pattern comma pattern ')'
{ $$ = op5(GENSUB, NIL, (Node*)makedfa($3, 1), $5, $7, $9); }
| GENSUB '(' pattern comma pattern comma pattern comma pattern ')'
{ if (constnode($3)) {
$$ = op5(GENSUB, NIL, (Node *)makedfa(strnode($3),1), $5,$7,$9);
free($3);
} else
$$ = op5(GENSUB, (Node *)1, $3, $5, $7, $9);
}
| GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
| GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
| GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }

View File

@ -47,9 +47,11 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
{ "BEGIN", XBEGIN, XBEGIN },
{ "END", XEND, XEND },
{ "NF", VARNF, VARNF },
{ "and", FAND, BLTIN },
{ "atan2", FATAN, BLTIN },
{ "break", BREAK, BREAK },
{ "close", CLOSE, CLOSE },
{ "compl", FCOMPL, BLTIN },
{ "continue", CONTINUE, CONTINUE },
{ "cos", FCOS, BLTIN },
{ "delete", DELETE, DELETE },
@ -61,6 +63,7 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
{ "for", FOR, FOR },
{ "func", FUNC, FUNC },
{ "function", FUNC, FUNC },
{ "gensub", GENSUB, GENSUB },
{ "getline", GETLINE, GETLINE },
{ "gsub", GSUB, GSUB },
{ "if", IF, IF },
@ -69,24 +72,30 @@ const Keyword keywords[] = { /* keep sorted: binary searched */
{ "int", FINT, BLTIN },
{ "length", FLENGTH, BLTIN },
{ "log", FLOG, BLTIN },
{ "lshift", FLSHIFT, BLTIN },
{ "match", MATCHFCN, MATCHFCN },
{ "next", NEXT, NEXT },
{ "nextfile", NEXTFILE, NEXTFILE },
{ "or", FFOR, BLTIN },
{ "print", PRINT, PRINT },
{ "printf", PRINTF, PRINTF },
{ "rand", FRAND, BLTIN },
{ "return", RETURN, RETURN },
{ "rshift", FRSHIFT, BLTIN },
{ "sin", FSIN, BLTIN },
{ "split", SPLIT, SPLIT },
{ "sprintf", SPRINTF, SPRINTF },
{ "sqrt", FSQRT, BLTIN },
{ "srand", FSRAND, BLTIN },
{ "strftime", FSTRFTIME, BLTIN },
{ "sub", SUB, SUB },
{ "substr", SUBSTR, SUBSTR },
{ "system", FSYSTEM, BLTIN },
{ "systime", FSYSTIME, BLTIN },
{ "tolower", FTOLOWER, BLTIN },
{ "toupper", FTOUPPER, BLTIN },
{ "while", WHILE, WHILE },
{ "xor", FXOR, BLTIN },
};
#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }

View File

@ -104,6 +104,7 @@ struct xx
{ ARG, "arg", "arg" },
{ VARNF, "getnf", "NF" },
{ GETLINE, "awkgetline", "getline" },
{ GENSUB, "gensub", "gensub" },
{ 0, "", "" },
};

View File

@ -93,6 +93,20 @@ Node *node4(int a, Node *b, Node *c, Node *d, Node *e)
return(x);
}
Node *node5(int a, Node *b, Node *c, Node *d, Node *e, Node *f)
{
Node *x;
x = nodealloc(5);
x->nobj = a;
x->narg[0] = b;
x->narg[1] = c;
x->narg[2] = d;
x->narg[3] = e;
x->narg[4] = f;
return(x);
}
Node *stat1(int a, Node *b)
{
Node *x;
@ -165,6 +179,15 @@ Node *op4(int a, Node *b, Node *c, Node *d, Node *e)
return(x);
}
Node *op5(int a, Node *b, Node *c, Node *d, Node *e, Node *f)
{
Node *x;
x = node5(a,b,c,d,e,f);
x->ntype = NEXPR;
return(x);
}
Node *celltonode(Cell *a, int b)
{
Node *x;

View File

@ -73,12 +73,14 @@ extern Node *node1(int, Node *);
extern Node *node2(int, Node *, Node *);
extern Node *node3(int, Node *, Node *, Node *);
extern Node *node4(int, Node *, Node *, Node *, Node *);
extern Node *node5(int, Node *, Node *, Node *, Node *, Node *);
extern Node *stat3(int, Node *, Node *, Node *);
extern Node *op2(int, Node *, Node *);
extern Node *op1(int, Node *);
extern Node *stat1(int, Node *);
extern Node *op3(int, Node *, Node *, Node *);
extern Node *op4(int, Node *, Node *, Node *, Node *);
extern Node *op5(int, Node *, Node *, Node *, Node *, Node *);
extern Node *stat2(int, Node *, Node *);
extern Node *stat4(int, Node *, Node *, Node *, Node *);
extern Node *celltonode(Cell *, int);
@ -197,6 +199,7 @@ extern const char *filename(FILE *);
extern Cell *closefile(Node **, int);
extern void closeall(void);
extern Cell *dosub(Node **, int);
extern Cell *gensub(Node **, int);
extern FILE *popen(const char *, const char *);
extern int pclose(FILE *);

View File

@ -2062,12 +2062,14 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
{
Cell *x, *y;
Awkfloat u;
int t;
int t, sz;
Awkfloat tmp;
char *buf;
char *buf, *fmt;
Node *nextarg;
FILE *fp;
int status = 0;
time_t tv;
struct tm *tm;
int estatus = 0;
t = ptoi(a[0]);
@ -2109,6 +2111,64 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
nextarg = nextarg->nnext;
}
break;
case FCOMPL:
u = ~((int)getfval(x));
break;
case FAND:
if (nextarg == 0) {
WARNING("and requires two arguments; returning 0");
u = 0;
break;
}
y = execute(a[1]->nnext);
u = ((int)getfval(x)) & ((int)getfval(y));
tempfree(y);
nextarg = nextarg->nnext;
break;
case FFOR:
if (nextarg == 0) {
WARNING("or requires two arguments; returning 0");
u = 0;
break;
}
y = execute(a[1]->nnext);
u = ((int)getfval(x)) | ((int)getfval(y));
tempfree(y);
nextarg = nextarg->nnext;
break;
case FXOR:
if (nextarg == 0) {
WARNING("xor requires two arguments; returning 0");
u = 0;
break;
}
y = execute(a[1]->nnext);
u = ((int)getfval(x)) ^ ((int)getfval(y));
tempfree(y);
nextarg = nextarg->nnext;
break;
case FLSHIFT:
if (nextarg == 0) {
WARNING("lshift requires two arguments; returning 0");
u = 0;
break;
}
y = execute(a[1]->nnext);
u = ((int)getfval(x)) << ((int)getfval(y));
tempfree(y);
nextarg = nextarg->nnext;
break;
case FRSHIFT:
if (nextarg == 0) {
WARNING("rshift requires two arguments; returning 0");
u = 0;
break;
}
y = execute(a[1]->nnext);
u = ((int)getfval(x)) >> ((int)getfval(y));
tempfree(y);
nextarg = nextarg->nnext;
break;
case FSYSTEM:
fflush(stdout); /* in case something is buffered already */
estatus = status = system(getsval(x));
@ -2163,6 +2223,41 @@ Cell *bltin(Node **a, int n) /* builtin functions. a[0] is type, a[1] is arg lis
else
u = fflush(fp);
break;
case FSYSTIME:
u = time((time_t *) 0);
break;
case FSTRFTIME:
/* strftime([format [,timestamp]]) */
if (nextarg) {
y = execute(nextarg);
nextarg = nextarg->nnext;
tv = (time_t) getfval(y);
tempfree(y);
} else
tv = time((time_t *) 0);
tm = localtime(&tv);
if (tm == NULL)
FATAL("bad time %ld", (long)tv);
if (isrec(x)) {
/* format argument not provided, use default */
fmt = tostring("%a %b %d %H:%M:%S %Z %Y");
} else
fmt = tostring(getsval(x));
sz = 32;
buf = NULL;
do {
if ((buf = realloc(buf, (sz *= 2))) == NULL)
FATAL("out of memory in strftime");
} while (strftime(buf, sz, fmt, tm) == 0 && fmt[0] != '\0');
y = gettemp();
setsval(y, buf);
free(fmt);
free(buf);
return y;
default: /* can't happen */
FATAL("illegal function type %d", t);
break;
@ -2542,6 +2637,147 @@ next_search:
return x;
}
Cell *gensub(Node **a, int nnn) /* global selective substitute */
/* XXX incomplete - doesn't support backreferences \0 ... \9 */
{
Cell *x, *y, *res, *h;
char *rptr;
const char *sptr;
char *buf, *pb;
const char *t, *q;
fa *pfa;
int mflag, tempstat, num, whichm;
int bufsz = recsize;
if ((buf = malloc(bufsz)) == NULL)
FATAL("out of memory in gensub");
mflag = 0; /* if mflag == 0, can replace empty string */
num = 0;
x = execute(a[4]); /* source string */
t = getsval(x);
res = copycell(x); /* target string - initially copy of source */
res->csub = CTEMP; /* result values are temporary */
if (a[0] == 0) /* 0 => a[1] is already-compiled regexpr */
pfa = (fa *) a[1]; /* regular expression */
else {
y = execute(a[1]);
pfa = makedfa(getsval(y), 1);
tempfree(y);
}
y = execute(a[2]); /* replacement string */
h = execute(a[3]); /* which matches should be replaced */
sptr = getsval(h);
if (sptr[0] == 'g' || sptr[0] == 'G')
whichm = -1;
else {
/*
* The specified number is index of replacement, starting
* from 1. GNU awk treats index lower than 0 same as
* 1, we do same for compatibility.
*/
whichm = (int) getfval(h) - 1;
if (whichm < 0)
whichm = 0;
}
tempfree(h);
if (pmatch(pfa, t)) {
char *sl;
tempstat = pfa->initstat;
pfa->initstat = 2;
pb = buf;
rptr = getsval(y);
/*
* XXX if there are any backreferences in subst string,
* complain now.
*/
for (sl = rptr; (sl = strchr(sl, '\\')) && sl[1]; sl++) {
if (strchr("0123456789", sl[1])) {
FATAL("gensub doesn't support backreferences (subst \"%s\")", rptr);
}
}
do {
if (whichm >= 0 && whichm != num) {
num++;
adjbuf(&buf, &bufsz, (pb - buf) + (patbeg - t) + patlen, recsize, &pb, "gensub");
/* copy the part of string up to and including
* match to output buffer */
while (t < patbeg + patlen)
*pb++ = *t++;
continue;
}
if (patlen == 0 && *patbeg != 0) { /* matched empty string */
if (mflag == 0) { /* can replace empty */
num++;
sptr = rptr;
while (*sptr != 0) {
adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
if (*sptr == '\\') {
backsub(&pb, &sptr);
} else if (*sptr == '&') {
sptr++;
adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
for (q = patbeg; q < patbeg+patlen; )
*pb++ = *q++;
} else
*pb++ = *sptr++;
}
}
if (*t == 0) /* at end */
goto done;
adjbuf(&buf, &bufsz, 2+pb-buf, recsize, &pb, "gensub");
*pb++ = *t++;
if (pb > buf + bufsz) /* BUG: not sure of this test */
FATAL("gensub result0 %.30s too big; can't happen", buf);
mflag = 0;
}
else { /* matched nonempty string */
num++;
sptr = t;
adjbuf(&buf, &bufsz, 1+(patbeg-sptr)+pb-buf, recsize, &pb, "gensub");
while (sptr < patbeg)
*pb++ = *sptr++;
sptr = rptr;
while (*sptr != 0) {
adjbuf(&buf, &bufsz, 5+pb-buf, recsize, &pb, "gensub");
if (*sptr == '\\') {
backsub(&pb, &sptr);
} else if (*sptr == '&') {
sptr++;
adjbuf(&buf, &bufsz, 1+patlen+pb-buf, recsize, &pb, "gensub");
for (q = patbeg; q < patbeg+patlen; )
*pb++ = *q++;
} else
*pb++ = *sptr++;
}
t = patbeg + patlen;
if (patlen == 0 || *t == 0 || *(t-1) == 0)
goto done;
if (pb > buf + bufsz)
FATAL("gensub result1 %.30s too big; can't happen", buf);
mflag = 1;
}
} while (pmatch(pfa,t));
sptr = t;
adjbuf(&buf, &bufsz, 1+strlen(sptr)+pb-buf, 0, &pb, "gensub");
while ((*pb++ = *sptr++) != 0)
;
done: if (pb > buf + bufsz)
FATAL("gensub result2 %.30s too big; can't happen", buf);
*pb = '\0';
setsval(res, buf);
pfa->initstat = tempstat;
}
tempfree(x);
tempfree(y);
free(buf);
return(res);
}
void backsub(char **pb_ptr, const char **sptr_ptr) /* handle \\& variations */
{ /* sptr[0] == '\\' */
char *pb = *pb_ptr;