HardenedBSD/sys/ddb/db_lex.c
Conrad Meyer 25b3370cfd ddb(4): Add some support for lexing IPv6 addresses
Allow commands to specify that (hex) numbers may start with A-F, by adding
the DRT_HEX flag for db_read_token_flags().  As before, numbers containing
invalid digits for the current radix are rejected.

Also, lex ':' and '::' tokens as tCOLON and tCOLONCOLON respectively.

There is a mild conflict here with lexed "identifiers" (tIDENT): ddb
identifiers may contain arbitrary colons, and the ddb lexer is greedy.  So
the identifier lex will swallow any colons it finds inside identifiers, and
consumers are still unable to expect the token sequence 'tIDENT tCOLON'.
That limitation does not matter for IPv6 addresses, because the lexer always
attempts to lex numbers before identifiers.

Reviewed by:	markj
Differential Revision:	https://reviews.freebsd.org/D21509
2019-09-09 16:32:23 +00:00

382 lines
7.2 KiB
C

/*-
* SPDX-License-Identifier: MIT-CMU
*
* Mach Operating System
* Copyright (c) 1991,1990 Carnegie Mellon University
* All Rights Reserved.
*
* Permission to use, copy, modify and distribute this software and its
* documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
* ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
/*
* Author: David B. Golub, Carnegie Mellon University
* Date: 7/90
*/
/*
* Lexical analyzer.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/libkern.h>
#include <sys/lock.h>
#include <ddb/ddb.h>
#include <ddb/db_lex.h>
static char db_line[DB_MAXLINE];
static char * db_lp, *db_endlp;
static int db_lex(int);
static void db_flush_line(void);
static int db_read_char(void);
static void db_unread_char(int);
int
db_read_line(void)
{
int i;
i = db_readline(db_line, sizeof(db_line));
if (i == 0)
return (0); /* EOI */
db_lp = db_line;
db_endlp = db_lp + i;
return (i);
}
/*
* Simulate a line of input into DDB.
*/
void
db_inject_line(const char *command)
{
strlcpy(db_line, command, sizeof(db_line));
db_lp = db_line;
db_endlp = db_lp + strlen(command);
}
/*
* In rare cases, we may want to pull the remainder of the line input
* verbatim, rather than lexing it. For example, when assigning literal
* values associated with scripts. In that case, return a static pointer to
* the current location in the input buffer. The caller must be aware that
* the contents are not stable if other lex/input calls are made.
*/
char *
db_get_line(void)
{
return (db_lp);
}
static void
db_flush_line()
{
db_lp = db_line;
db_endlp = db_line;
}
static int db_look_char = 0;
static int
db_read_char(void)
{
int c;
if (db_look_char != 0) {
c = db_look_char;
db_look_char = 0;
}
else if (db_lp >= db_endlp)
c = -1;
else
c = *db_lp++;
return (c);
}
static void
db_unread_char(c)
int c;
{
db_look_char = c;
}
static int db_look_token = 0;
void
db_unread_token(int t)
{
db_look_token = t;
}
int
db_read_token_flags(int flags)
{
int t;
MPASS((flags & ~(DRT_VALID_FLAGS_MASK)) == 0);
if (db_look_token) {
t = db_look_token;
db_look_token = 0;
}
else
t = db_lex(flags);
return (t);
}
db_expr_t db_tok_number;
char db_tok_string[TOK_STRING_SIZE];
db_expr_t db_radix = 16;
void
db_flush_lex(void)
{
db_flush_line();
db_look_char = 0;
db_look_token = 0;
}
static int
db_lex(int flags)
{
int c, n, radix_mode;
bool lex_wspace, lex_hex_numbers;
switch (flags & DRT_RADIX_MASK) {
case DRT_DEFAULT_RADIX:
radix_mode = -1;
break;
case DRT_OCTAL:
radix_mode = 8;
break;
case DRT_DECIMAL:
radix_mode = 10;
break;
case DRT_HEXADECIMAL:
radix_mode = 16;
break;
}
lex_wspace = ((flags & DRT_WSPACE) != 0);
lex_hex_numbers = ((flags & DRT_HEX) != 0);
c = db_read_char();
for (n = 0; c <= ' ' || c > '~'; n++) {
if (c == '\n' || c == -1)
return (tEOL);
c = db_read_char();
}
if (lex_wspace && n != 0) {
db_unread_char(c);
return (tWSPACE);
}
if ((c >= '0' && c <= '9') ||
(lex_hex_numbers &&
((c >= 'a' && c <= 'f') ||
(c >= 'A' && c <= 'F')))) {
/* number */
int r, digit = 0;
if (radix_mode != -1)
r = radix_mode;
else if (c != '0')
r = db_radix;
else {
c = db_read_char();
if (c == 'O' || c == 'o')
r = 8;
else if (c == 'T' || c == 't')
r = 10;
else if (c == 'X' || c == 'x')
r = 16;
else {
r = db_radix;
db_unread_char(c);
}
c = db_read_char();
}
db_tok_number = 0;
for (;;) {
if (c >= '0' && c <= ((r == 8) ? '7' : '9'))
digit = c - '0';
else if (r == 16 && ((c >= 'A' && c <= 'F') ||
(c >= 'a' && c <= 'f'))) {
if (c >= 'a')
digit = c - 'a' + 10;
else if (c >= 'A')
digit = c - 'A' + 10;
}
else
break;
db_tok_number = db_tok_number * r + digit;
c = db_read_char();
}
if ((c >= '0' && c <= '9') ||
(c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z') ||
(c == '_'))
{
db_error("Bad character in number\n");
db_flush_lex();
return (tEOF);
}
db_unread_char(c);
return (tNUMBER);
}
if ((c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z') ||
c == '_' || c == '\\')
{
/* string */
char *cp;
cp = db_tok_string;
if (c == '\\') {
c = db_read_char();
if (c == '\n' || c == -1)
db_error("Bad escape\n");
}
*cp++ = c;
while (1) {
c = db_read_char();
if ((c >= 'A' && c <= 'Z') ||
(c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9') ||
c == '_' || c == '\\' || c == ':' || c == '.')
{
if (c == '\\') {
c = db_read_char();
if (c == '\n' || c == -1)
db_error("Bad escape\n");
}
*cp++ = c;
if (cp == db_tok_string+sizeof(db_tok_string)) {
db_error("String too long\n");
db_flush_lex();
return (tEOF);
}
continue;
}
else {
*cp = '\0';
break;
}
}
db_unread_char(c);
return (tIDENT);
}
switch (c) {
case '+':
return (tPLUS);
case '-':
return (tMINUS);
case '.':
c = db_read_char();
if (c == '.')
return (tDOTDOT);
db_unread_char(c);
return (tDOT);
case '*':
return (tSTAR);
case '/':
return (tSLASH);
case '=':
c = db_read_char();
if (c == '=')
return (tLOG_EQ);
db_unread_char(c);
return (tEQ);
case '%':
return (tPCT);
case '#':
return (tHASH);
case '(':
return (tLPAREN);
case ')':
return (tRPAREN);
case ',':
return (tCOMMA);
case '"':
return (tDITTO);
case '$':
return (tDOLLAR);
case '!':
c = db_read_char();
if (c == '='){
return (tLOG_NOT_EQ);
}
db_unread_char(c);
return (tEXCL);
case ':':
c = db_read_char();
if (c == ':')
return (tCOLONCOLON);
db_unread_char(c);
return (tCOLON);
case ';':
return (tSEMI);
case '&':
c = db_read_char();
if (c == '&')
return (tLOG_AND);
db_unread_char(c);
return (tBIT_AND);
case '|':
c = db_read_char();
if (c == '|')
return (tLOG_OR);
db_unread_char(c);
return (tBIT_OR);
case '<':
c = db_read_char();
if (c == '<')
return (tSHIFT_L);
if (c == '=')
return (tLESS_EQ);
db_unread_char(c);
return (tLESS);
case '>':
c = db_read_char();
if (c == '>')
return (tSHIFT_R);
if (c == '=')
return (tGREATER_EQ);
db_unread_char(c);
return (tGREATER);
case '?':
return (tQUESTION);
case '~':
return (tBIT_NOT);
case -1:
return (tEOF);
}
db_printf("Bad character\n");
db_flush_lex();
return (tEOF);
}