Treat filenames as multibyte character strings (according to the current

LC_CTYPE setting) when determining which characters are printable.
This is an often-requested feature.

Use wcwidth() to determine the number of column positions a character
takes up, although there are still a few places left where we assume
1 byte = 1 column position, e.g. line-wrapping when handling the -m option.

The error handling here is somewhat more complicated than usual: we do
our best to show what we can of a filename in the presence of conversion
errors, instead of simply aborting.
This commit is contained in:
Tim J. Robbins 2004-05-02 11:25:37 +00:00
parent 30d3088041
commit 107409f46e
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=128823
4 changed files with 132 additions and 65 deletions

View File

@ -45,6 +45,7 @@ int printname(const char *);
void printscol(const DISPLAY *);
void printstream(const DISPLAY *);
void usage(void);
int prn_normal(const char *);
size_t len_octal(const char *, int);
int prn_octal(const char *);
int prn_printable(const char *);

View File

@ -31,7 +31,7 @@
.\" @(#)ls.1 8.7 (Berkeley) 7/29/94
.\" $FreeBSD$
.\"
.Dd March 21, 2004
.Dd May 2, 2004
.Dt LS 1
.Os
.Sh NAME
@ -673,7 +673,3 @@ command appeared in
.Sh BUGS
To maintain backward compatibility, the relationships between the many
options are quite complex.
.Pp
The
.Nm
utility does not recognize multibyte characters in filenames.

View File

@ -147,7 +147,7 @@ printname(const char *name)
else if (f_nonprint)
return prn_printable(name);
else
return printf("%s", name);
return prn_normal(name);
}
void
@ -239,6 +239,7 @@ printstream(const DISPLAY *dp)
for (p = dp->list, chcnt = 0; p; p = p->fts_link) {
if (p->fts_number == NO_PRINT)
continue;
/* XXX strlen does not take octal escapes into account. */
if (strlen(p->fts_name) + chcnt +
(p->fts_link ? 2 : 0) >= (unsigned)termwidth) {
putchar('\n');

View File

@ -44,25 +44,81 @@ __FBSDID("$FreeBSD$");
#include <ctype.h>
#include <err.h>
#include <fts.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
#include "ls.h"
#include "extern.h"
int
prn_normal(const char *s)
{
mbstate_t mbs;
wchar_t wc;
int i, n;
size_t clen;
memset(&mbs, 0, sizeof(mbs));
n = 0;
while ((clen = mbrtowc(&wc, s, MB_LEN_MAX, &mbs)) != 0) {
if (clen == (size_t)-2) {
n += printf("%s", s);
break;
}
if (clen == (size_t)-1) {
memset(&mbs, 0, sizeof(mbs));
putchar((unsigned char)*s);
s++;
n++;
continue;
}
for (i = 0; i < (int)clen; i++)
putchar((unsigned char)s[i]);
s += clen;
n += wcwidth(wc);
}
return (n);
}
int
prn_printable(const char *s)
{
char c;
int n;
mbstate_t mbs;
wchar_t wc;
int i, n;
size_t clen;
for (n = 0; (c = *s) != '\0'; ++s, ++n)
if (isprint((unsigned char)c))
putchar(c);
else
memset(&mbs, 0, sizeof(mbs));
n = 0;
while ((clen = mbrtowc(&wc, s, MB_LEN_MAX, &mbs)) != 0) {
if (clen == (size_t)-1) {
putchar('?');
return n;
s++;
n++;
memset(&mbs, 0, sizeof(mbs));
continue;
}
if (clen == (size_t)-2) {
putchar('?');
n++;
break;
}
if (!iswprint(wc)) {
putchar('?');
s += clen;
n++;
continue;
}
for (i = 0; i < (int)clen; i++)
putchar((unsigned char)s[i]);
s += clen;
n += wcwidth(wc);
}
return (n);
}
/*
@ -81,70 +137,83 @@ prn_printable(const char *s)
size_t
len_octal(const char *s, int len)
{
size_t r = 0;
mbstate_t mbs;
wchar_t wc;
size_t clen, r;
while (len--)
if (isprint((unsigned const char)*s++)) r++; else r += 4;
return r;
memset(&mbs, 0, sizeof(mbs));
r = 0;
while (len != 0 && (clen = mbrtowc(&wc, s, len, &mbs)) != 0) {
if (clen == (size_t)-1) {
r += 4;
s++;
len--;
memset(&mbs, 0, sizeof(mbs));
continue;
}
if (clen == (size_t)-2) {
r += 4 * len;
break;
}
if (iswprint(wc))
r++;
else
r += 4 * clen;
s += clen;
}
return (r);
}
int
prn_octal(const char *s)
{
unsigned char ch;
int len = 0;
static const char esc[] = "\\\\\"\"\aa\bb\ff\nn\rr\tt\vv";
const char *p;
mbstate_t mbs;
wchar_t wc;
size_t clen;
unsigned char ch;
int goodchar, i, len, prtlen;
while ((ch = (unsigned char)*s++)) {
if (isprint(ch) && (ch != '\"') && (ch != '\\'))
putchar(ch), len++;
else if (f_octal_escape) {
putchar('\\');
switch (ch) {
case '\\':
putchar('\\');
break;
case '\"':
putchar('"');
break;
case '\a':
putchar('a');
break;
case '\b':
putchar('b');
break;
case '\f':
putchar('f');
break;
case '\n':
putchar('n');
break;
case '\r':
putchar('r');
break;
case '\t':
putchar('t');
break;
case '\v':
putchar('v');
break;
default:
memset(&mbs, 0, sizeof(mbs));
len = 0;
while ((clen = mbrtowc(&wc, s, MB_LEN_MAX, &mbs)) != 0) {
goodchar = clen != (size_t)-1 && clen != (size_t)-2;
if (goodchar && iswprint(wc) && wc != L'\"' && wc != L'\\') {
for (i = 0; i < (int)clen; i++)
putchar((unsigned char)s[i]);
len += wcwidth(wc);
} else if (goodchar && f_octal_escape && wc >= 0 &&
wc <= (wchar_t)UCHAR_MAX &&
(p = strchr(esc, (char)wc)) != NULL) {
putchar('\\');
putchar(p[1]);
len += 2;
} else {
if (goodchar)
prtlen = clen;
else if (clen == (size_t)-1)
prtlen = 1;
else
prtlen = strlen(s);
for (i = 0; i < prtlen; i++) {
ch = (unsigned char)s[i];
putchar('\\');
putchar('0' + (ch >> 6));
putchar('0' + ((ch >> 3) & 7));
putchar('0' + (ch & 7));
len += 2;
break;
}
len += 2;
}
else {
putchar('\\');
putchar('0' + (ch >> 6));
putchar('0' + ((ch >> 3) & 7));
putchar('0' + (ch & 7));
len += 4;
len += 4;
}
}
if (clen == (size_t)-2)
break;
if (clen == (size_t)-1) {
memset(&mbs, 0, sizeof(mbs));
s++;
} else
s += clen;
}
return len;
return (len);
}
void