Commit e14cf90a authored by Marcel van Lohuizen's avatar Marcel van Lohuizen

unicode: move unicode and related packages to Unicode 6.2.0.

R=r, mpvl
CC=golang-dev
https://golang.org/cl/6818067
parent b8b32945
......@@ -129,7 +129,7 @@ func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error {
if ce[0] > b.varTop {
b.varTop = ce[0]
}
} else if ce[0] > 0 {
} else if ce[0] > 1 { // 1 is a special primary value reserved for FFFE
if ce[0] <= b.varTop {
return fmt.Errorf("primary value %X of non-variable is smaller than the highest variable %X", ce[0], b.varTop)
}
......
......@@ -38,7 +38,7 @@ var (
`URL of the Default Unicode Collation Element Table (DUCET). This can be a zip
file containing the file allkeys_CLDR.txt or an allkeys.txt file.`)
cldr = flag.String("cldr",
"http://www.unicode.org/Public/cldr/2.0.1/core.zip",
"http://www.unicode.org/Public/cldr/22/core.zip",
"URL of CLDR archive.")
test = flag.Bool("test", false,
"test existing tables; can be used to compare web data with package data.")
......
This diff is collapsed.
This diff is collapsed.
......@@ -6,15 +6,17 @@ package unicode
// Bit masks for each code point under U+0100, for fast lookup.
const (
pC = 1 << iota // a control character.
pP // a punctuation character.
pN // a numeral.
pS // a symbolic character.
pZ // a spacing character.
pLu // an upper-case letter.
pLl // a lower-case letter.
pp // a printable character according to Go's definition.
pg = pp | pZ // a graphical character according to the Unicode definition.
pC = 1 << iota // a control character.
pP // a punctuation character.
pN // a numeral.
pS // a symbolic character.
pZ // a spacing character.
pLu // an upper-case letter.
pLl // a lower-case letter.
pp // a printable character according to Go's definition.
pg = pp | pZ // a graphical character according to the Unicode definition.
pLo = pLl | pLu // a letter that is neither upper nor lower case.
pLmask = pLo
)
// GraphicRanges defines the set of graphic characters according to Unicode.
......@@ -76,7 +78,7 @@ func IsControl(r rune) bool {
// IsLetter reports whether the rune is a letter (category L).
func IsLetter(r rune) bool {
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&(pLu|pLl) != 0
return properties[uint8(r)]&(pLmask) != 0
}
return isExcludingLatin(Letter, r)
}
......
......@@ -180,7 +180,7 @@ func isExcludingLatin(rangeTab *RangeTable, r rune) bool {
func IsUpper(r rune) bool {
// See comment in IsGraphic.
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&pLu != 0
return properties[uint8(r)]&pLmask == pLu
}
return isExcludingLatin(Upper, r)
}
......@@ -189,7 +189,7 @@ func IsUpper(r rune) bool {
func IsLower(r rune) bool {
// See comment in IsGraphic.
if uint32(r) <= MaxLatin1 {
return properties[uint8(r)]&pLl != 0
return properties[uint8(r)]&pLmask == pLl
}
return isExcludingLatin(Lower, r)
}
......
......@@ -41,7 +41,7 @@ func main() {
var dataURL = flag.String("data", "", "full URL for UnicodeData.txt; defaults to --url/UnicodeData.txt")
var casefoldingURL = flag.String("casefolding", "", "full URL for CaseFolding.txt; defaults to --url/CaseFolding.txt")
var url = flag.String("url",
"http://www.unicode.org/Public/6.0.0/ucd/",
"http://www.unicode.org/Public/6.2.0/ucd/",
"URL of Unicode database directory")
var tablelist = flag.String("tables",
"all",
......@@ -367,7 +367,7 @@ func loadCasefold() {
}
logger.Fatal(err)
}
if line[0] == '#' {
if line[0] == '#' || len(strings.TrimSpace(line)) == 0 {
continue
}
field := strings.Split(line, "; ")
......@@ -1040,6 +1040,8 @@ func printLatinProperties() {
property = "0"
case "Ll":
property = "pLl | pp"
case "Lo":
property = "pLo | pp"
case "Lu":
property = "pLu | pp"
case "Nd", "No":
......
......@@ -14,7 +14,7 @@ type T struct {
script string
}
// Hand-chosen tests from Unicode 5.1.0 & 6.0..0, mostly to discover when new
// Hand-chosen tests from Unicode 5.1.0, 6.0.0 and 6.2.0 mostly to discover when new
// scripts and categories arise.
var inTest = []T{
{0x06e2, "Arabic"},
......@@ -31,6 +31,7 @@ var inTest = []T{
{0x11011, "Brahmi"},
{0x156d, "Canadian_Aboriginal"},
{0x102a9, "Carian"},
{0x11111, "Chakma"},
{0xaa4d, "Cham"},
{0x13c2, "Cherokee"},
{0x0020, "Common"},
......@@ -76,6 +77,9 @@ var inTest = []T{
{0x0d42, "Malayalam"},
{0x0843, "Mandaic"},
{0xabd0, "Meetei_Mayek"},
{0x1099f, "Meroitic_Hieroglyphs"},
{0x109a0, "Meroitic_Cursive"},
{0x16f00, "Miao"},
{0x1822, "Mongolian"},
{0x104c, "Myanmar"},
{0x19c3, "New_Tai_Lue"},
......@@ -94,8 +98,10 @@ var inTest = []T{
{0x16c0, "Runic"},
{0x081d, "Samaritan"},
{0xa892, "Saurashtra"},
{0x111a0, "Sharada"},
{0x10463, "Shavian"},
{0x0dbd, "Sinhala"},
{0x110d0, "Sora_Sompeng"},
{0x1ba3, "Sundanese"},
{0xa803, "Syloti_Nagri"},
{0x070f, "Syriac"},
......@@ -104,6 +110,7 @@ var inTest = []T{
{0x1972, "Tai_Le"},
{0x1a62, "Tai_Tham"},
{0xaadc, "Tai_Viet"},
{0x116c9, "Takri"},
{0x0bbf, "Tamil"},
{0x0c55, "Telugu"},
{0x07a7, "Thaana"},
......@@ -121,7 +128,7 @@ var outTest = []T{ // not really worth being thorough
var inCategoryTest = []T{
{0x0081, "Cc"},
{0x17b4, "Cf"},
{0x200B, "Cf"},
{0xf0000, "Co"},
{0xdb80, "Cs"},
{0x0236, "Ll"},
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment