From 64793e748cfa3a3d753631a13b6ef212cd645358 Mon Sep 17 00:00:00 2001 From: Baptiste Daroussin Date: Fri, 7 Feb 2020 10:17:13 +0000 Subject: [PATCH] diff: implement -y (--side-by-side) along with -W and --suppress-common-lines PR: 219933 Submitted by: fehmi noyan isi MFC after: 3 weeks --- usr.bin/diff/diff.1 | 41 ++++++- usr.bin/diff/diff.c | 33 +++++- usr.bin/diff/diff.h | 6 +- usr.bin/diff/diffreg.c | 191 ++++++++++++++++++++++++++------ usr.bin/diff/tests/diff_test.sh | 2 - 5 files changed, 229 insertions(+), 44 deletions(-) diff --git a/usr.bin/diff/diff.1 b/usr.bin/diff/diff.1 index 2ceb273ead1d..2ccf18094ff1 100644 --- a/usr.bin/diff/diff.1 +++ b/usr.bin/diff/diff.1 @@ -30,7 +30,7 @@ .\" @(#)diff.1 8.1 (Berkeley) 6/30/93 .\" $FreeBSD$ .\" -.Dd August 18, 2018 +.Dd February 07, 2020 .Dt DIFF 1 .Os .Sh NAME @@ -41,7 +41,7 @@ .Op Fl aBbdipTtw .Oo .Fl c | e | f | -.Fl n | q | u +.Fl n | q | u | y .Oc .Op Fl -brief .Op Fl -changed-group-format Ar GFMT @@ -182,6 +182,21 @@ .Op Fl x Ar pattern | Fl -exclude Ar pattern .Ek .Ar dir1 dir2 +.Nm diff +.Op Fl aBbditwW +.Op --expand-tabs +.Op --ignore-all-blanks +.Op --ignore-blank-lines +.Op --ignore-case +.Op --minimal +.Op --no-ignore-file-name-case +.Op --strip-trailing-cr +.Op --suppress-common-lines +.Op --tabsize +.Op --text +.Op --width +.Fl y | Fl -side-by-side +.Ar file1 file2 .Sh DESCRIPTION The .Nm @@ -284,6 +299,20 @@ However, unlike with .Fl c , all lines to be changed (added and/or removed) are present in a single section. +.It Fl y Fl -side-by-side +Output in two columns with a marker between them. The marker can be one +of the following: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It space +Corresponding lines are identical. +.It '|' +Corresponding lines are different. +.It '<' +Files differ and only the first file contains the line. +.It '>' +Files differ and only the second file contains the line. +.El .El .Pp Comparison options: @@ -362,6 +391,10 @@ E.g., .Dq if (\ \&a == b \&) will compare equal to .Dq if(a==b) . +.It Fl W Ar number Fl -width Ar number +Output at most +.Ar number +columns when using side by side format. The default value is 130. .It Fl -changed-group-format Ar GFMT Format input groups in the provided .Pp @@ -382,7 +415,9 @@ default diff output stub option for compatibility with GNU diff .It Fl -strip-trailing-cr strip carriage return on input files -.It Fl tabsize Ar number +.It Fl -suppress-common-lines +Do not output common lines when using the side by side format +.It Fl -tabsize Ar number Number of spaces representing a tab (default 8) .El .Pp diff --git a/usr.bin/diff/diff.c b/usr.bin/diff/diff.c index f135040dab98..e2e65e520cb5 100644 --- a/usr.bin/diff/diff.c +++ b/usr.bin/diff/diff.c @@ -37,16 +37,16 @@ __FBSDID("$FreeBSD$"); #include "diff.h" #include "xmalloc.h" -int lflag, Nflag, Pflag, rflag, sflag, Tflag, cflag; -int diff_format, diff_context, status, ignore_file_case; -int tabsize = 8; +int lflag, Nflag, Pflag, rflag, sflag, Tflag, cflag, Wflag; +int diff_format, diff_context, status, ignore_file_case, suppress_common; +int tabsize = 8, width = 130; char *start, *ifdefname, *diffargs, *label[2], *ignore_pats; char *group_format = NULL; struct stat stb1, stb2; struct excludes *excludes_list; regex_t ignore_re; -#define OPTIONS "0123456789aBbC:cdD:efHhI:iL:lnNPpqrS:sTtU:uwX:x:" +#define OPTIONS "0123456789aBbC:cdD:efHhI:iL:lnNPpqrS:sTtU:uwW:X:x:y" enum { OPT_TSIZE = CHAR_MAX + 1, OPT_STRIPCR, @@ -55,6 +55,7 @@ enum { OPT_NORMAL, OPT_HORIZON_LINES, OPT_CHANGED_GROUP_FORMAT, + OPT_SUPPRESS_COMMON, }; static struct option longopts[] = { @@ -83,8 +84,10 @@ static struct option longopts[] = { { "initial-tab", no_argument, 0, 'T' }, { "unified", optional_argument, 0, 'U' }, { "ignore-all-space", no_argument, 0, 'w' }, + { "width", required_argument, 0, 'W' }, { "exclude", required_argument, 0, 'x' }, { "exclude-from", required_argument, 0, 'X' }, + { "side-by-side", no_argument, NULL, 'y' }, { "ignore-file-name-case", no_argument, NULL, OPT_IGN_FN_CASE }, { "horizon-lines", required_argument, NULL, OPT_HORIZON_LINES }, { "no-ignore-file-name-case", no_argument, NULL, OPT_NO_IGN_FN_CASE }, @@ -92,6 +95,7 @@ static struct option longopts[] = { { "strip-trailing-cr", no_argument, NULL, OPT_STRIPCR }, { "tabsize", optional_argument, NULL, OPT_TSIZE }, { "changed-group-format", required_argument, NULL, OPT_CHANGED_GROUP_FORMAT}, + { "suppress-common-lines", no_argument, NULL, OPT_SUPPRESS_COMMON }, { NULL, 0, 0, '\0'} }; @@ -230,12 +234,23 @@ main(int argc, char **argv) case 'w': dflags |= D_IGNOREBLANKS; break; + case 'W': + Wflag = 1; + width = (int) strtonum(optarg, 1, INT_MAX, &errstr); + if (errstr) { + warnx("Invalid argument for width"); + usage(); + } + break; case 'X': read_excludes_file(optarg); break; case 'x': push_excludes(optarg); break; + case 'y': + diff_format = D_SIDEBYSIDE; + break; case OPT_CHANGED_GROUP_FORMAT: diff_format = D_GFORMAT; group_format = optarg; @@ -261,6 +276,9 @@ main(int argc, char **argv) case OPT_STRIPCR: dflags |= D_STRIPCR; break; + case OPT_SUPPRESS_COMMON: + suppress_common = 1; + break; default: usage(); break; @@ -464,7 +482,12 @@ usage(void) " -U number file1 file2\n" " diff [-aBbdilNPprsTtw] [-c | -e | -f | -n | -q | -u] [--ignore-case]\n" " [--no-ignore-case] [--normal] [--tabsize] [-I pattern] [-L label]\n" - " [-S name] [-X file] [-x pattern] dir1 dir2\n"); + " [-S name] [-X file] [-x pattern] dir1 dir2\n" + " diff [-aBbditwW] [--expand-tabs] [--ignore-all-blanks]\n" + " [--ignore-blank-lines] [--ignore-case] [--minimal]\n" + " [--no-ignore-file-name-case] [--strip-trailing-cr]\n" + " [--suppress-common-lines] [--tabsize] [--text] [--width]\n" + " -y | --side-by-side file1 file2\n"); exit(2); } diff --git a/usr.bin/diff/diff.h b/usr.bin/diff/diff.h index 1801587f9e68..6a2c4cbea721 100644 --- a/usr.bin/diff/diff.h +++ b/usr.bin/diff/diff.h @@ -48,6 +48,7 @@ lines and no trailing . */ #define D_BRIEF 6 /* Say if the files differ */ #define D_GFORMAT 7 /* Diff with defined changed group format */ +#define D_SIDEBYSIDE 8 /* Side by side */ /* * Output flags @@ -85,9 +86,10 @@ struct excludes { struct excludes *next; }; -extern int lflag, Nflag, Pflag, rflag, sflag, Tflag, cflag; +extern int lflag, Nflag, Pflag, rflag, sflag, Tflag, cflag, Wflag; extern int diff_format, diff_context, status, ignore_file_case; -extern int tabsize; +extern int suppress_common; +extern int tabsize, width; extern char *start, *ifdefname, *diffargs, *label[2], *ignore_pats; extern char *group_format; extern struct stat stb1, stb2; diff --git a/usr.bin/diff/diffreg.c b/usr.bin/diff/diffreg.c index fc63912b5ef3..1ee4be59fbc5 100644 --- a/usr.bin/diff/diffreg.c +++ b/usr.bin/diff/diffreg.c @@ -181,6 +181,7 @@ struct context_vec { }; #define diff_output printf +#define MIN_PAD 1 static FILE *opentemp(const char *); static void output(char *, FILE *, char *, FILE *, int); static void check(FILE *, FILE *, int); @@ -196,6 +197,7 @@ static void unsort(struct line *, int, int *); static void change(char *, FILE *, char *, FILE *, int, int, int, int, int *); static void sort(struct line *, int); static void print_header(const char *, const char *); +static void print_space(int, int, int); static bool ignoreline_pattern(char *); static bool ignoreline(char *, bool); static int asciifile(FILE *); @@ -220,6 +222,8 @@ static int len[2]; static int pref, suff; /* length of prefix and suffix */ static int slen[2]; static int anychange; +static int hw, padding; /* half width and padding */ +static int edoffset; static long *ixnew; /* will be overlaid on file[1] */ static long *ixold; /* will be overlaid on klist */ static struct cand *clist; /* merely a free storage pot for candidates */ @@ -263,6 +267,22 @@ diffreg(char *file1, char *file2, int flags, int capsicum) lastline = 0; lastmatchline = 0; context_vec_ptr = context_vec_start - 1; + + /* + * hw excludes padding and make sure when -t is not used, + * the second column always starts from the closest tab stop + */ + if (diff_format == D_SIDEBYSIDE) { + hw = width >> 1; + padding = tabsize - (hw % tabsize); + if ((flags & D_EXPANDTABS) != 0 || (padding % tabsize == 0)) + padding = MIN_PAD; + + hw = (width >> 1) - + ((padding == MIN_PAD) ? (padding << 1) : padding) - 1; + } + + if (flags & D_IGNORECASE) chrtran = cup2low; else @@ -865,7 +885,7 @@ skipline(FILE *f) static void output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) { - int m, i0, i1, j0, j1; + int i, j, m, i0, i1, j0, j1, nc; rewind(f1); rewind(f2); @@ -874,15 +894,55 @@ output(char *file1, FILE *f1, char *file2, FILE *f2, int flags) J[m + 1] = len[1] + 1; if (diff_format != D_EDIT) { for (i0 = 1; i0 <= m; i0 = i1 + 1) { - while (i0 <= m && J[i0] == J[i0 - 1] + 1) + while (i0 <= m && J[i0] == J[i0 - 1] + 1){ + if (diff_format == D_SIDEBYSIDE && + suppress_common != 1) { + nc = fetch(ixold, i0, i0, f1, '\0', + 1, flags); + print_space(nc, + (hw - nc) + (padding << 1) + 1, + flags); + fetch(ixnew, J[i0], J[i0], f2, '\0', + 0, flags); + diff_output("\n"); + } i0++; + } j0 = J[i0 - 1] + 1; i1 = i0 - 1; while (i1 < m && J[i1 + 1] == 0) i1++; j1 = J[i1 + 1] - 1; J[i1] = j1; - change(file1, f1, file2, f2, i0, i1, j0, j1, &flags); + + /* + * When using side-by-side, lines from both of the + * files are printed. The algorithm used by diff(1) + * identifies the ranges in which two files differ. + * See the change() function below. + * The for loop below consumes the shorter range, + * whereas one of the while loops deals with the + * longer one. + */ + if (diff_format == D_SIDEBYSIDE) { + for (i=i0, j=j0; i<=i1 && j<=j1; i++, j++) + change(file1, f1, file2, f2, i, i, + j, j, &flags); + + while (i <= i1) { + change(file1, f1, file2, f2, + i, i, j+1, j, &flags); + i++; + } + + while (j <= j1) { + change(file1, f1, file2, f2, + i+1, i, j, j, &flags); + j++; + } + } else + change(file1, f1, file2, f2, i0, i1, j0, + j1, &flags); } } else { for (i0 = m; i0 >= 1; i0 = i1 - 1) { @@ -987,7 +1047,7 @@ change(char *file1, FILE *f1, char *file2, FILE *f2, int a, int b, int c, int d, { static size_t max_context = 64; long curpos; - int i, nc, f; + int i, nc; const char *walk; bool skip_blanks; @@ -1116,27 +1176,38 @@ proceed: diff_output("%c", *walk); } } + if (diff_format == D_SIDEBYSIDE) { + if (a > b) { + print_space(0, hw + padding , *pflags); + } else { + nc = fetch(ixold, a, b, f1, '\0', 1, *pflags); + print_space(nc, hw - nc + padding, *pflags); + } + diff_output("%c", (a>b)? '>' : ((c>d)? '<' : '|')); + print_space(hw + padding + 1 , padding, *pflags); + fetch(ixnew, c, d, f2, '\0', 0, *pflags); + diff_output("\n"); + } if (diff_format == D_NORMAL || diff_format == D_IFDEF) { fetch(ixold, a, b, f1, '<', 1, *pflags); if (a <= b && c <= d && diff_format == D_NORMAL) diff_output("---\n"); } - f = 0; - if (diff_format != D_GFORMAT) - f = fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); - if (f != 0 && diff_format == D_EDIT) { + if (diff_format != D_GFORMAT && diff_format != D_SIDEBYSIDE) + fetch(ixnew, c, d, f2, diff_format == D_NORMAL ? '>' : '\0', 0, *pflags); + if (edoffset != 0 && diff_format == D_EDIT) { /* - * A non-zero return value for D_EDIT indicates that the + * A non-zero edoffset value for D_EDIT indicates that the * last line printed was a bare dot (".") that has been * escaped as ".." to prevent ed(1) from misinterpreting * it. We have to add a substitute command to change this * back and restart where we left off. */ diff_output(".\n"); - diff_output("%ds/.//\n", a + f - 1); - b = a + f - 1; + diff_output("%ds/.//\n", a + edoffset - 1); + b = a + edoffset - 1; a = b + 1; - c += f; + c += edoffset; goto restart; } if ((diff_format == D_EDIT || diff_format == D_REVERSE) && c <= d) @@ -1150,9 +1221,10 @@ proceed: static int fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) { - int i, j, c, lastc, col, nc; - int newcol; + int i, j, c, lastc, col, nc, newcol; + edoffset = 0; + nc = 0; /* * When doing #ifdef's, copy down to current line * if this is the first file, so that stuff makes it to output. @@ -1180,12 +1252,15 @@ fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) } for (i = a; i <= b; i++) { fseek(lb, f[i - 1], SEEK_SET); - nc = f[i] - f[i - 1]; - if ((diff_format != D_IFDEF && diff_format != D_GFORMAT) && + nc = (f[i] - f[i - 1]); + if (diff_format == D_SIDEBYSIDE && hw < nc) + nc = hw; + if ((diff_format != D_IFDEF && diff_format != D_GFORMAT) && ch != '\0') { diff_output("%c", ch); - if (Tflag && (diff_format == D_NORMAL || diff_format == D_CONTEXT - || diff_format == D_UNIFIED)) + if (Tflag && (diff_format == D_NORMAL || + diff_format == D_CONTEXT || + diff_format == D_UNIFIED)) diff_output("\t"); else if (diff_format != D_UNIFIED) diff_output(" "); @@ -1193,38 +1268,68 @@ fetch(long *f, int a, int b, FILE *lb, int ch, int oldfile, int flags) col = 0; for (j = 0, lastc = '\0'; j < nc; j++, lastc = c) { if ((c = getc(lb)) == EOF) { - if (diff_format == D_EDIT || diff_format == D_REVERSE || + if (diff_format == D_EDIT || + diff_format == D_REVERSE || diff_format == D_NREVERSE) warnx("No newline at end of file"); else diff_output("\n\\ No newline at end of " "file\n"); - return (0); + return col; } - if (c == '\t' && (flags & D_EXPANDTABS)) { - newcol = ((col/tabsize)+1)*tabsize; - do { - diff_output(" "); - } while (++col < newcol); + /* + * when using --side-by-side, col needs to be increased + * in any case to keep the columns aligned + */ + if (c == '\t') { + if (flags & D_EXPANDTABS) { + newcol = ((col/tabsize)+1)*tabsize; + do { + if (diff_format == D_SIDEBYSIDE) + j++; + diff_output(" "); + } while (++col < newcol && j < nc); + } else { + if (diff_format == D_SIDEBYSIDE) { + if ((j + tabsize) > nc) { + diff_output("%*s", + nc - j,""); + j = col = nc; + } else { + diff_output("\t"); + col += tabsize - 1; + j += tabsize - 1; + } + } else { + diff_output("\t"); + col++; + } + } } else { if (diff_format == D_EDIT && j == 1 && c == '\n' && lastc == '.') { /* * Don't print a bare "." line * since that will confuse ed(1). - * Print ".." instead and return, - * giving the caller an offset - * from which to restart. + * Print ".." instead and set the, + * global variable edoffset to an + * offset from which to restart. + * The caller must check the value + * of edoffset */ diff_output(".\n"); - return (i - a + 1); + edoffset = i - a + 1; + return edoffset; + } + /* when side-by-side, do not print a newline */ + if (diff_format != D_SIDEBYSIDE || c != '\n') { + diff_output("%c", c); + col++; } - diff_output("%c", c); - col++; } } } - return (0); + return col; } /* @@ -1578,3 +1683,25 @@ print_header(const char *file1, const char *file2) diff_output("%s %s\t%s\n", diff_format == D_CONTEXT ? "---" : "+++", file2, buf2); } + +/* + * Prints n number of space characters either by using tab + * or single space characters. + * nc is the preceding number of characters + */ +static void +print_space(int nc, int n, int flags) { + int i, col; + + col = n; + if ((flags & D_EXPANDTABS) == 0) { + /* first tabstop may be closer than tabsize */ + i = tabsize - (nc % tabsize); + while (col >= tabsize) { + diff_output("\t"); + col -= i; + i = tabsize; + } + } + diff_output("%*s", col, ""); +} \ No newline at end of file diff --git a/usr.bin/diff/tests/diff_test.sh b/usr.bin/diff/tests/diff_test.sh index 01942192e504..ba38befeccb8 100755 --- a/usr.bin/diff/tests/diff_test.sh +++ b/usr.bin/diff/tests/diff_test.sh @@ -103,8 +103,6 @@ group_format_body() side_by_side_body() { - atf_expect_fail "--side-by-side not currently implemented (bug # 219933)" - atf_check -o save:A printf "A\nB\nC\n" atf_check -o save:B printf "D\nB\nE\n"