Commit 8aeb8647 authored by Robert Griesemer's avatar Robert Griesemer

- handle UTF-8 text in tabwriter

R=r
DELTA=84  (27 added, 3 deleted, 54 changed)
OCL=20539
CL=20584
parent c1868bc8
...@@ -8,12 +8,12 @@ import ( ...@@ -8,12 +8,12 @@ import (
"os"; "os";
"io"; "io";
"array"; "array";
"utf8";
) )
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// ByteArray // ByteArray
// TODO should use a ByteArray library eventually
type ByteArray struct { type ByteArray struct {
a *[]byte; a *[]byte;
...@@ -62,11 +62,13 @@ func (b *ByteArray) Append(s *[]byte) { ...@@ -62,11 +62,13 @@ func (b *ByteArray) Append(s *[]byte) {
// ---------------------------------------------------------------------------- // ----------------------------------------------------------------------------
// Writer is a filter implementing the io.Write interface. It assumes // Writer is a filter implementing the io.Write interface. It assumes
// that the incoming bytes represent ASCII encoded text consisting of // that the incoming bytes represent UTF-8 encoded text consisting of
// lines of tab-terminated "cells". Cells in adjacent lines constitute // lines of tab-terminated "cells". Cells in adjacent lines constitute
// a column. Writer rewrites the incoming text such that all cells in // a column. Writer rewrites the incoming text such that all cells in
// a column have the same width; thus it effectively aligns cells. It // a column have the same width; thus it effectively aligns cells. It
// does this by adding padding where necessary. // does this by adding padding where necessary. All characters (ASCII
// or not) are assumed to be of the same width - this may not be true
// for arbitrary UTF-8 characters visualized on the screen.
// //
// Note that any text at the end of a line that is not tab-terminated // Note that any text at the end of a line that is not tab-terminated
// is not a cell and does not enforce alignment of cells in adjacent // is not a cell and does not enforce alignment of cells in adjacent
...@@ -84,8 +86,6 @@ func (b *ByteArray) Append(s *[]byte) { ...@@ -84,8 +86,6 @@ func (b *ByteArray) Append(s *[]byte) {
// (for correct-looking results, cellwidth must correspond // (for correct-looking results, cellwidth must correspond
// to the tabwidth in the editor used to look at the result) // to the tabwidth in the editor used to look at the result)
// TODO Should support UTF-8 (requires more complicated width bookkeeping)
export type Writer struct { export type Writer struct {
// TODO should not export any of the fields // TODO should not export any of the fields
...@@ -97,15 +97,18 @@ export type Writer struct { ...@@ -97,15 +97,18 @@ export type Writer struct {
align_left bool; align_left bool;
// current state // current state
buf ByteArray; // the collected text w/o tabs and newlines buf ByteArray; // collected text w/o tabs and newlines
width int; // width of last incomplete cell size int; // size of last incomplete cell in bytes
lines array.Array; // list of lines; each line is a list of cell widths width int; // width of last incomplete cell in runes
widths array.IntArray; // list of column widths - re-used during formatting lines_size array.Array; // list of lines; each line is a list of cell sizes in bytes
lines_width array.Array; // list of lines; each line is a list of cell widths in runes
widths array.IntArray; // list of column widths in runes - re-used during formatting
} }
func (b *Writer) AddLine() { func (b *Writer) AddLine() {
b.lines.Push(array.NewIntArray(0)); b.lines_size.Push(array.NewIntArray(0));
b.lines_width.Push(array.NewIntArray(0));
} }
...@@ -125,7 +128,8 @@ func (b *Writer) Init(writer io.Write, cellwidth, padding int, padchar byte, ali ...@@ -125,7 +128,8 @@ func (b *Writer) Init(writer io.Write, cellwidth, padding int, padchar byte, ali
b.align_left = align_left || padchar == '\t'; // tab enforces left-alignment b.align_left = align_left || padchar == '\t'; // tab enforces left-alignment
b.buf.Init(1024); b.buf.Init(1024);
b.lines.Init(0); b.lines_size.Init(0);
b.lines_width.Init(0);
b.widths.Init(0); b.widths.Init(0);
b.AddLine(); // the very first line b.AddLine(); // the very first line
...@@ -133,21 +137,23 @@ func (b *Writer) Init(writer io.Write, cellwidth, padding int, padchar byte, ali ...@@ -133,21 +137,23 @@ func (b *Writer) Init(writer io.Write, cellwidth, padding int, padchar byte, ali
} }
func (b *Writer) Line(i int) *array.IntArray { func (b *Writer) Line(i int) (*array.IntArray, *array.IntArray) {
return b.lines.At(i).(*array.IntArray); return
b.lines_size.At(i).(*array.IntArray),
b.lines_width.At(i).(*array.IntArray);
} }
// debugging support // debugging support
func (b *Writer) Dump() { func (b *Writer) Dump() {
pos := 0; pos := 0;
for i := 0; i < b.lines.Len(); i++ { for i := 0; i < b.lines_size.Len(); i++ {
line := b.Line(i); line_size, line_width := b.Line(i);
print("(", i, ") "); print("(", i, ") ");
for j := 0; j < line.Len(); j++ { for j := 0; j < line_size.Len(); j++ {
w := line.At(j); s := line_size.At(j);
print("[", string(b.buf.Slice(pos, pos + w)), "]"); print("[", string(b.buf.Slice(pos, pos + s)), "]");
pos += w; pos += s;
} }
print("\n"); print("\n");
} }
...@@ -198,16 +204,16 @@ exit: ...@@ -198,16 +204,16 @@ exit:
func (b *Writer) WriteLines(pos0 int, line0, line1 int) (pos int, err *os.Error) { func (b *Writer) WriteLines(pos0 int, line0, line1 int) (pos int, err *os.Error) {
pos = pos0; pos = pos0;
for i := line0; i < line1; i++ { for i := line0; i < line1; i++ {
line := b.Line(i); line_size, line_width := b.Line(i);
for j := 0; j < line.Len(); j++ { for j := 0; j < line_size.Len(); j++ {
w := line.At(j); s, w := line_size.At(j), line_width.At(j);
if b.align_left { if b.align_left {
err = b.Write0(b.buf.a[pos : pos + w]); err = b.Write0(b.buf.a[pos : pos + s]);
if err != nil { if err != nil {
goto exit; goto exit;
} }
pos += w; pos += s;
if j < b.widths.Len() { if j < b.widths.Len() {
err = b.WritePadding(w, b.widths.At(j)); err = b.WritePadding(w, b.widths.At(j));
if err != nil { if err != nil {
...@@ -223,20 +229,20 @@ func (b *Writer) WriteLines(pos0 int, line0, line1 int) (pos int, err *os.Error) ...@@ -223,20 +229,20 @@ func (b *Writer) WriteLines(pos0 int, line0, line1 int) (pos int, err *os.Error)
goto exit; goto exit;
} }
} }
err = b.Write0(b.buf.a[pos : pos + w]); err = b.Write0(b.buf.a[pos : pos + s]);
if err != nil { if err != nil {
goto exit; goto exit;
} }
pos += w; pos += s;
} }
} }
if i+1 == b.lines.Len() { if i+1 == b.lines_size.Len() {
// last buffered line - we don't have a newline, so just write // last buffered line - we don't have a newline, so just write
// any outstanding buffered data // any outstanding buffered data
err = b.Write0(b.buf.a[pos : pos + b.width]); err = b.Write0(b.buf.a[pos : pos + b.size]);
pos += b.width; pos += b.size;
b.width = 0; b.size, b.width = 0, 0;
} else { } else {
// not the last line - write newline // not the last line - write newline
err = b.Write0(Newline); err = b.Write0(Newline);
...@@ -256,9 +262,9 @@ func (b *Writer) Format(pos0 int, line0, line1 int) (pos int, err *os.Error) { ...@@ -256,9 +262,9 @@ func (b *Writer) Format(pos0 int, line0, line1 int) (pos int, err *os.Error) {
column := b.widths.Len(); column := b.widths.Len();
last := line0; last := line0;
for this := line0; this < line1; this++ { for this := line0; this < line1; this++ {
line := b.Line(this); line_size, line_width := b.Line(this);
if column < line.Len() - 1 { if column < line_size.Len() - 1 {
// cell exists in this column // cell exists in this column
// (note that the last cell per line is ignored) // (note that the last cell per line is ignored)
...@@ -272,10 +278,10 @@ func (b *Writer) Format(pos0 int, line0, line1 int) (pos int, err *os.Error) { ...@@ -272,10 +278,10 @@ func (b *Writer) Format(pos0 int, line0, line1 int) (pos int, err *os.Error) {
// column block begin // column block begin
width := b.cellwidth; // minimal width width := b.cellwidth; // minimal width
for ; this < line1; this++ { for ; this < line1; this++ {
line = b.Line(this); line_size, line_width = b.Line(this);
if column < line.Len() - 1 { if column < line_size.Len() - 1 {
// cell exists in this column => update width // cell exists in this column => update width
w := line.At(column) + b.padding; w := line_width.At(column) + b.padding;
if w > width { if w > width {
width = w; width = w;
} }
...@@ -302,18 +308,35 @@ exit: ...@@ -302,18 +308,35 @@ exit:
} }
func UnicodeLen(buf *[]byte) int {
l := 0;
for i := 0; i < len(buf); {
if buf[i] < utf8.RuneSelf {
i++;
} else {
rune, size := utf8.DecodeRune(buf[i : len(buf)]);
i += size;
}
l++;
}
return l;
}
func (b *Writer) Append(buf *[]byte) { func (b *Writer) Append(buf *[]byte) {
b.buf.Append(buf); b.buf.Append(buf);
b.width += len(buf); b.size += len(buf);
b.width += UnicodeLen(buf);
} }
/* export */ func (b *Writer) Flush() *os.Error { /* export */ func (b *Writer) Flush() *os.Error {
dummy, err := b.Format(0, 0, b.lines.Len()); dummy, err := b.Format(0, 0, b.lines_size.Len());
// reset (even in the presence of errors) // reset (even in the presence of errors)
b.buf.Clear(); b.buf.Clear();
b.width = 0; b.size, b.width = 0, 0;
b.lines.Init(0); b.lines_size.Init(0);
b.lines_width.Init(0);
b.AddLine(); b.AddLine();
return err; return err;
} }
...@@ -329,13 +352,14 @@ func (b *Writer) Append(buf *[]byte) { ...@@ -329,13 +352,14 @@ func (b *Writer) Append(buf *[]byte) {
i0 = i + 1; // exclude ch from (next) cell i0 = i + 1; // exclude ch from (next) cell
// terminate cell // terminate cell
last := b.Line(b.lines.Len() - 1); last_size, last_width := b.Line(b.lines_size.Len() - 1);
last.Push(b.width); last_size.Push(b.size);
b.width = 0; last_width.Push(b.width);
b.size, b.width = 0, 0;
if ch == '\n' { if ch == '\n' {
b.AddLine(); b.AddLine();
if last.Len() == 1 { if last_size.Len() == 1 {
// The previous line has only one cell which does not have // The previous line has only one cell which does not have
// an impact on the formatting of the following lines (the // an impact on the formatting of the following lines (the
// last cell per line is ignored by Format), thus we can // last cell per line is ignored by Format), thus we can
......
...@@ -189,24 +189,24 @@ export func Test(t *testing.T) { ...@@ -189,24 +189,24 @@ export func Test(t *testing.T) {
Check( Check(
t, 8, 1, ' ', true, t, 8, 1, ' ', true,
"a\tb\tc\n" "\tb\tc\n"
"aa\tbbb\tcccc\tddddd\n" "aa\t\u672c\u672c\u672c\tcccc\tddddd\n"
"aaa\tbbbb\n", "aaa\tbbbb\n",
"a b c\n" " b c\n"
"aa bbb cccc ddddd\n" "aa 本本本 cccc ddddd\n"
"aaa bbbb\n" "aaa bbbb\n"
); );
Check( Check(
t, 8, 1, ' ', false, t, 8, 1, ' ', false,
"a\tb\tc\t\n" "a\tè\tc\t\n"
"aa\tbbb\tcccc\tddddd\t\n" "aa\tèèè\tcccc\tddddd\t\n"
"aaa\tbbbb\t\n", "aaa\tèèèè\t\n",
" a b c\n" " a è c\n"
" aa bbb cccc ddddd\n" " aa èèè cccc ddddd\n"
" aaa bbbb\n" " aaa èèèè\n"
); );
Check( Check(
...@@ -233,7 +233,7 @@ export func Test(t *testing.T) { ...@@ -233,7 +233,7 @@ export func Test(t *testing.T) {
Check( Check(
t, 4, 1, '-', true, t, 4, 1, '-', true,
"4444\t333\t22\t1\t333\n" "4444\t日本語\t22\t1\t333\n"
"999999999\t22\n" "999999999\t22\n"
"7\t22\n" "7\t22\n"
"\t\t\t88888888\n" "\t\t\t88888888\n"
...@@ -241,7 +241,7 @@ export func Test(t *testing.T) { ...@@ -241,7 +241,7 @@ export func Test(t *testing.T) {
"666666\t666666\t666666\t4444\n" "666666\t666666\t666666\t4444\n"
"1\t1\t999999999\t0000000000\n", "1\t1\t999999999\t0000000000\n",
"4444------333-22--1---333\n" "4444------日本語-22--1---333\n"
"999999999-22\n" "999999999-22\n"
"7---------22\n" "7---------22\n"
"------------------88888888\n" "------------------88888888\n"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment