Commit e14cf90a authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

unicode: move unicode and related packages to Unicode 6.2.0.

R=r, mpvl
CC=golang-dev
https://golang.org/cl/6818067
parent b8b32945
...@@ -129,7 +129,7 @@ func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error { ...@@ -129,7 +129,7 @@ func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
if ce[0] > b.varTop { if ce[0] > b.varTop {
b.varTop = ce[0] b.varTop = ce[0]
} }
} else if ce[0] > 0 { } else if ce[0] > 1 { // 1 is a special primary value reserved for FFFE
if ce[0] <= b.varTop { if ce[0] <= b.varTop {
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", ce[0], b.varTop) return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", ce[0], b.varTop)
} }
......
...@@ -38,7 +38,7 @@ var ( ...@@ -38,7 +38,7 @@ var (
`URL of the Default Unicode Collation Element Table (DUCET). This can be a zip `URL of the Default Unicode Collation Element Table (DUCET). This can be a zip
file containing the file allkeys_CLDR.txt or an allkeys.txt file.`) file containing the file allkeys_CLDR.txt or an allkeys.txt file.`)
cldr = flag.String("cldr", cldr = flag.String("cldr",
"http://www.unicode.org/Public/cldr/2.0.1/core.zip", "http://www.unicode.org/Public/cldr/22/core.zip",
"URL of CLDR archive.") "URL of CLDR archive.")
test = flag.Bool("test", false, test = flag.Bool("test", false,
"test existing tables; can be used to compare web data with package data.") "test existing tables; can be used to compare web data with package data.")
......
This diff is collapsed.
This diff is collapsed.
...@@ -15,6 +15,8 @@ const ( ...@@ -15,6 +15,8 @@ const (
pLl // a lower-case letter. pLl // a lower-case letter.
pp // a printable character according to Go's definition. pp // a printable character according to Go's definition.
pg = pp | pZ // a graphical character according to the Unicode definition. pg = pp | pZ // a graphical character according to the Unicode definition.
pLo = pLl | pLu // a letter that is neither upper nor lower case.
pLmask = pLo
) )
// GraphicRanges defines the set of graphic characters according to Unicode. // GraphicRanges defines the set of graphic characters according to Unicode.
...@@ -76,7 +78,7 @@ func IsControl(r rune) bool { ...@@ -76,7 +78,7 @@ func IsControl(r rune) bool {
// IsLetter reports whether the rune is a letter (category L). // IsLetter reports whether the rune is a letter (category L).
func IsLetter(r rune) bool { func IsLetter(r rune) bool {
if uint32(r) <= MaxLatin1 { if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&(pLu|pLl) != 0 return properties[uint8(r)]&(pLmask) != 0
} }
return isExcludingLatin(Letter, r) return isExcludingLatin(Letter, r)
} }
......
...@@ -180,7 +180,7 @@ func isExcludingLatin(rangeTab *RangeTable, r rune) bool { ...@@ -180,7 +180,7 @@ func isExcludingLatin(rangeTab *RangeTable, r rune) bool {
func IsUpper(r rune) bool { func IsUpper(r rune) bool {
// See comment in IsGraphic. // See comment in IsGraphic.
if uint32(r) <= MaxLatin1 { if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&pLu != 0 return properties[uint8(r)]&pLmask == pLu
} }
return isExcludingLatin(Upper, r) return isExcludingLatin(Upper, r)
} }
...@@ -189,7 +189,7 @@ func IsUpper(r rune) bool { ...@@ -189,7 +189,7 @@ func IsUpper(r rune) bool {
func IsLower(r rune) bool { func IsLower(r rune) bool {
// See comment in IsGraphic. // See comment in IsGraphic.
if uint32(r) <= MaxLatin1 { if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&pLl != 0 return properties[uint8(r)]&pLmask == pLl
} }
return isExcludingLatin(Lower, r) return isExcludingLatin(Lower, r)
} }
......
...@@ -41,7 +41,7 @@ func main() { ...@@ -41,7 +41,7 @@ func main() {
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt") var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt") var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt")
var url = flag.String("url", var url = flag.String("url",
"http://www.unicode.org/Public/6.0.0/ucd/", "http://www.unicode.org/Public/6.2.0/ucd/",
"URL of Unicode database directory") "URL of Unicode database directory")
var tablelist = flag.String("tables", var tablelist = flag.String("tables",
"all", "all",
...@@ -367,7 +367,7 @@ func loadCasefold() { ...@@ -367,7 +367,7 @@ func loadCasefold() {
} }
logger.Fatal(err) logger.Fatal(err)
} }
if line[0] == '#' { if line[0] == '#' || len(strings.TrimSpace(line)) == 0 {
continue continue
} }
field := strings.Split(line, "; ") field := strings.Split(line, "; ")
...@@ -1040,6 +1040,8 @@ func printLatinProperties() { ...@@ -1040,6 +1040,8 @@ func printLatinProperties() {
property = "0" property = "0"
case "Ll": case "Ll":
property = "pLl | pp" property = "pLl | pp"
case "Lo":
property = "pLo | pp"
case "Lu": case "Lu":
property = "pLu | pp" property = "pLu | pp"
case "Nd", "No": case "Nd", "No":
......
...@@ -14,7 +14,7 @@ type T struct { ...@@ -14,7 +14,7 @@ type T struct {
script string script string
} }
// Hand-chosen tests from Unicode 5.1.0 & 6.0..0, mostly to discover when new // Hand-chosen tests from Unicode 5.1.0, 6.0.0 and 6.2.0 mostly to discover when new
// scripts and categories arise. // scripts and categories arise.
var inTest = []T{ var inTest = []T{
{0x06e2, "Arabic"}, {0x06e2, "Arabic"},
...@@ -31,6 +31,7 @@ var inTest = []T{ ...@@ -31,6 +31,7 @@ var inTest = []T{
{0x11011, "Brahmi"}, {0x11011, "Brahmi"},
{0x156d, "Canadian_Aboriginal"}, {0x156d, "Canadian_Aboriginal"},
{0x102a9, "Carian"}, {0x102a9, "Carian"},
{0x11111, "Chakma"},
{0xaa4d, "Cham"}, {0xaa4d, "Cham"},
{0x13c2, "Cherokee"}, {0x13c2, "Cherokee"},
{0x0020, "Common"}, {0x0020, "Common"},
...@@ -76,6 +77,9 @@ var inTest = []T{ ...@@ -76,6 +77,9 @@ var inTest = []T{
{0x0d42, "Malayalam"}, {0x0d42, "Malayalam"},
{0x0843, "Mandaic"}, {0x0843, "Mandaic"},
{0xabd0, "Meetei_Mayek"}, {0xabd0, "Meetei_Mayek"},
{0x1099f, "Meroitic_Hieroglyphs"},
{0x109a0, "Meroitic_Cursive"},
{0x16f00, "Miao"},
{0x1822, "Mongolian"}, {0x1822, "Mongolian"},
{0x104c, "Myanmar"}, {0x104c, "Myanmar"},
{0x19c3, "New_Tai_Lue"}, {0x19c3, "New_Tai_Lue"},
...@@ -94,8 +98,10 @@ var inTest = []T{ ...@@ -94,8 +98,10 @@ var inTest = []T{
{0x16c0, "Runic"}, {0x16c0, "Runic"},
{0x081d, "Samaritan"}, {0x081d, "Samaritan"},
{0xa892, "Saurashtra"}, {0xa892, "Saurashtra"},
{0x111a0, "Sharada"},
{0x10463, "Shavian"}, {0x10463, "Shavian"},
{0x0dbd, "Sinhala"}, {0x0dbd, "Sinhala"},
{0x110d0, "Sora_Sompeng"},
{0x1ba3, "Sundanese"}, {0x1ba3, "Sundanese"},
{0xa803, "Syloti_Nagri"}, {0xa803, "Syloti_Nagri"},
{0x070f, "Syriac"}, {0x070f, "Syriac"},
...@@ -104,6 +110,7 @@ var inTest = []T{ ...@@ -104,6 +110,7 @@ var inTest = []T{
{0x1972, "Tai_Le"}, {0x1972, "Tai_Le"},
{0x1a62, "Tai_Tham"}, {0x1a62, "Tai_Tham"},
{0xaadc, "Tai_Viet"}, {0xaadc, "Tai_Viet"},
{0x116c9, "Takri"},
{0x0bbf, "Tamil"}, {0x0bbf, "Tamil"},
{0x0c55, "Telugu"}, {0x0c55, "Telugu"},
{0x07a7, "Thaana"}, {0x07a7, "Thaana"},
...@@ -121,7 +128,7 @@ var outTest = []T{ // not really worth being thorough ...@@ -121,7 +128,7 @@ var outTest = []T{ // not really worth being thorough
var inCategoryTest = []T{ var inCategoryTest = []T{
{0x0081, "Cc"}, {0x0081, "Cc"},
{0x17b4, "Cf"}, {0x200B, "Cf"},
{0xf0000, "Co"}, {0xf0000, "Co"},
{0xdb80, "Cs"}, {0xdb80, "Cs"},
{0x0236, "Ll"}, {0x0236, "Ll"},
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment