Commit 3a181dc7 authored by Russ Cox's avatar Russ Cox

archive/zip: fix handling of replacement rune in UTF8 check

The replacement rune is a valid rune and can appear as itself in valid UTF8
(it encodes as three bytes). To check for invalid UTF8 it is necessary to
look for utf8.DecodeRune returning the replacement rune and size==1.

Change-Id: I169be8d1fe61605c921ac13cc2fde94f80f3463c
Reviewed-on: https://go-review.googlesource.com/78126
Run-TryBot: Russ Cox <rsc@golang.org>
Reviewed-by: default avatarJoe Tsai <thebrokentoaster@gmail.com>
parent 7de9e5e1
...@@ -219,7 +219,9 @@ func (w *Writer) Create(name string) (io.Writer, error) { ...@@ -219,7 +219,9 @@ func (w *Writer) Create(name string) (io.Writer, error) {
// must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII, // must be considered UTF-8 encoding (i.e., not compatible with CP-437, ASCII,
// or any other common encoding). // or any other common encoding).
func detectUTF8(s string) (valid, require bool) { func detectUTF8(s string) (valid, require bool) {
for _, r := range s { for i := 0; i < len(s); {
r, size := utf8.DecodeRuneInString(s[i:])
i += size
// Officially, ZIP uses CP-437, but many readers use the system's // Officially, ZIP uses CP-437, but many readers use the system's
// local character encoding. Most encoding are compatible with a large // local character encoding. Most encoding are compatible with a large
// subset of CP-437, which itself is ASCII-like. // subset of CP-437, which itself is ASCII-like.
...@@ -227,7 +229,7 @@ func detectUTF8(s string) (valid, require bool) { ...@@ -227,7 +229,7 @@ func detectUTF8(s string) (valid, require bool) {
// Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those // Forbid 0x7e and 0x5c since EUC-KR and Shift-JIS replace those
// characters with localized currency and overline characters. // characters with localized currency and overline characters.
if r < 0x20 || r > 0x7d || r == 0x5c { if r < 0x20 || r > 0x7d || r == 0x5c {
if !utf8.ValidRune(r) || r == utf8.RuneError { if !utf8.ValidRune(r) || (r == utf8.RuneError && size == 1) {
return false, false return false, false
} }
require = true require = true
......
...@@ -136,40 +136,45 @@ func TestWriterUTF8(t *testing.T) { ...@@ -136,40 +136,45 @@ func TestWriterUTF8(t *testing.T) {
var utf8Tests = []struct { var utf8Tests = []struct {
name string name string
comment string comment string
expect uint16
nonUTF8 bool nonUTF8 bool
flags uint16
}{ }{
{ {
name: "hi, hello", name: "hi, hello",
comment: "in the world", comment: "in the world",
expect: 0x8, flags: 0x8,
}, },
{ {
name: "hi, こんにちわ", name: "hi, こんにちわ",
comment: "in the world", comment: "in the world",
expect: 0x808, flags: 0x808,
}, },
{ {
name: "hi, こんにちわ", name: "hi, こんにちわ",
comment: "in the world", comment: "in the world",
nonUTF8: true, nonUTF8: true,
expect: 0x8, flags: 0x8,
}, },
{ {
name: "hi, hello", name: "hi, hello",
comment: "in the 世界", comment: "in the 世界",
expect: 0x808, flags: 0x808,
}, },
{ {
name: "hi, こんにちわ", name: "hi, こんにちわ",
comment: "in the 世界", comment: "in the 世界",
expect: 0x808, flags: 0x808,
},
{
name: "the replacement rune is �",
comment: "the replacement rune is �",
flags: 0x808,
}, },
{ {
// Name is Japanese encoded in Shift JIS. // Name is Japanese encoded in Shift JIS.
name: "\x93\xfa\x96{\x8c\xea.txt", name: "\x93\xfa\x96{\x8c\xea.txt",
comment: "in the 世界", comment: "in the 世界",
expect: 0x008, // UTF-8 must not be set flags: 0x008, // UTF-8 must not be set
}, },
} }
...@@ -201,10 +206,9 @@ func TestWriterUTF8(t *testing.T) { ...@@ -201,10 +206,9 @@ func TestWriterUTF8(t *testing.T) {
t.Fatal(err) t.Fatal(err)
} }
for i, test := range utf8Tests { for i, test := range utf8Tests {
got := r.File[i].Flags flags := r.File[i].Flags
t.Logf("name %v, comment %v", test.name, test.comment) if flags != test.flags {
if got != test.expect { t.Errorf("CreateHeader(name=%q comment=%q nonUTF8=%v): flags=%#x, want %#x", test.name, test.comment, test.nonUTF8, flags, test.flags)
t.Fatalf("Flags: got %v, want %v", got, test.expect)
} }
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment