Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
go
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Kirill Smelkov
go
Commits
5b904a3b
Commit
5b904a3b
authored
Jul 07, 2008
by
Rob Pike
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update to Unicode 5
SVN=126184
parent
0d079a53
Changes
24
Hide whitespace changes
Inline
Side-by-side
Showing
24 changed files
with
1520 additions
and
1256 deletions
+1520
-1256
src/lib/math/asin.go
src/lib/math/asin.go
+2
-2
src/lib/math/exp.go
src/lib/math/exp.go
+1
-1
src/lib/math/log.go
src/lib/math/log.go
+2
-2
src/lib/math/main.go
src/lib/math/main.go
+19
-1
src/lib/math/pow.go
src/lib/math/pow.go
+2
-2
src/lib/math/sinh.go
src/lib/math/sinh.go
+2
-2
src/lib/math/sqrt.go
src/lib/math/sqrt.go
+1
-2
src/lib/math/tan.go
src/lib/math/tan.go
+1
-1
src/lib9/utf/mkrunetype.c
src/lib9/utf/mkrunetype.c
+733
-0
src/lib9/utf/rune.c
src/lib9/utf/rune.c
+202
-28
src/lib9/utf/runetype.c
src/lib9/utf/runetype.c
+29
-1110
src/lib9/utf/utf.h
src/lib9/utf/utf.h
+248
-0
src/lib9/utf/utfdef.h
src/lib9/utf/utfdef.h
+8
-27
src/lib9/utf/utfecpy.c
src/lib9/utf/utfecpy.c
+4
-5
src/lib9/utf/utflen.c
src/lib9/utf/utflen.c
+5
-4
src/lib9/utf/utfnlen.c
src/lib9/utf/utfnlen.c
+5
-5
src/lib9/utf/utfrrune.c
src/lib9/utf/utfrrune.c
+7
-5
src/lib9/utf/utfrune.c
src/lib9/utf/utfrune.c
+6
-4
src/lib9/utf/utfutf.c
src/lib9/utf/utfutf.c
+7
-6
src/runtime/Makefile
src/runtime/Makefile
+1
-0
src/runtime/rune.c
src/runtime/rune.c
+224
-0
src/runtime/runtime.h
src/runtime/runtime.h
+2
-0
src/runtime/string.c
src/runtime/string.c
+0
-49
test/string_lit.go
test/string_lit.go
+9
-0
No files found.
src/lib/math/asin.go
View file @
5b904a3b
...
...
@@ -34,7 +34,7 @@ asin(arg double)double
sign
=
true
;
}
if
arg
>
1
{
panic
"return sys.NaN()"
;
return
sys
.
NaN
()
;
}
temp
=
sqrt
(
1
-
x
*
x
);
...
...
@@ -54,7 +54,7 @@ func
acos
(
arg
double
)
double
{
if
(
arg
>
1
||
arg
<
-
1
)
{
panic
"return sys.NaN()"
;
return
sys
.
NaN
()
;
}
return
pio2
-
asin
(
arg
);
}
src/lib/math/exp.go
View file @
5b904a3b
...
...
@@ -40,7 +40,7 @@ exp(arg double) double
return
0.
;
}
if
arg
>
maxf
{
panic
"return sys.Inf(1)"
return
sys
.
Inf
(
1
)
}
x
=
arg
*
log2e
;
...
...
src/lib/math/log.go
View file @
5b904a3b
...
...
@@ -36,7 +36,7 @@ log(arg double) double
var
exp
int
;
if
arg
<=
0
{
panic
"return sys.NaN()"
;
return
sys
.
NaN
()
;
}
exp
,
x
=
sys
.
frexp
(
arg
);
...
...
@@ -63,7 +63,7 @@ log10(arg double) double
{
if
arg
<=
0
{
panic
"return sys.NaN()"
;
return
sys
.
NaN
()
;
}
return
log
(
arg
)
*
ln10o1
;
}
src/lib/math/main.go
View file @
5b904a3b
...
...
@@ -5,7 +5,25 @@
package
main
import
math
"math"
//import math "math"
//////////////////
import
math
"asin"
import
math
"atan"
import
math
"atan2"
import
math
"exp"
import
math
"fabs"
import
math
"floor"
import
math
"fmod"
import
math
"hypot"
import
math
"log"
import
math
"pow"
import
math
"pow10"
import
math
"sin"
import
math
"sinh"
import
math
"sqrt"
import
math
"tan"
import
math
"tanh"
const
(
...
...
src/lib/math/pow.go
View file @
5b904a3b
...
...
@@ -26,14 +26,14 @@ pow(arg1,arg2 double) double
if
arg1
<=
0
{
if
(
arg1
==
0
)
{
if
arg2
<=
0
{
panic
"return sys.NaN()"
;
return
sys
.
NaN
()
;
}
return
0
;
}
temp
=
floor
(
arg2
);
if
temp
!=
arg2
{
panic
"return sys.NaN()"
;
panic
sys
.
NaN
()
;
}
l
=
long
(
temp
);
...
...
src/lib/math/sinh.go
View file @
5b904a3b
...
...
@@ -48,7 +48,7 @@ sinh(arg double) double
temp
=
exp
(
arg
)
/
2
;
case
arg
>
0.5
:
//
temp = (exp(arg) - exp(-arg))/2;
temp
=
(
exp
(
arg
)
-
exp
(
-
arg
))
/
2
;
default
:
argsq
=
arg
*
arg
;
...
...
@@ -71,5 +71,5 @@ cosh(arg double) double
if
arg
>
21
{
return
exp
(
arg
)
/
2
;
}
//
return (exp(arg) + exp(-arg))/2;
return
(
exp
(
arg
)
+
exp
(
-
arg
))
/
2
;
}
src/lib/math/sqrt.go
View file @
5b904a3b
...
...
@@ -19,11 +19,10 @@ sqrt(arg double) double
var
x
,
temp
double
;
var
exp
,
i
int
;
/* BUG: NO isINF
if
sys
.
isInf
(
arg
,
1
)
{
return
arg
;
}
*/
if
arg
<=
0
{
if
arg
<
0
{
panic
"return sys.NaN()"
...
...
src/lib/math/tan.go
View file @
5b904a3b
...
...
@@ -62,7 +62,7 @@ tan(arg double) double
if
flag
{
if
(
temp
==
0
)
{
panic
"return sys.NaN()"
;
panic
sys
.
NaN
()
;
}
temp
=
1
/
temp
;
}
...
...
src/lib9/utf/mkrunetype.c
0 → 100644
View file @
5b904a3b
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
/*
* make is(upper|lower|title|space|alpha)rune and
* to(upper|lower|title)rune from a UnicodeData.txt file.
* these can be found at unicode.org
*
* with -c, runs a check of the existing runetype functions vs.
* those extracted from UnicodeData.
*
* with -p, generates tables for pairs of chars, as well as for ranges
* and singletons.
*
* UnicodeData defines 4 fields of interest:
* 1) a category
* 2) an upper case mapping
* 3) a lower case mapping
* 4) a title case mapping
*
* toupper, tolower, and totitle are defined directly from the mapping.
*
* isalpharune(c) is true iff c is a "letter" category
* isupperrune(c) is true iff c is the target of toupperrune,
* or is in the uppercase letter category
* similarly for islowerrune and istitlerune.
* isspacerune is true for space category chars, "C" locale white space chars,
* and two additions:
* 0085 "next line" control char
* feff] "zero-width non-break space"
* isdigitrune is true iff c is a numeric-digit category.
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <libgen.h>
#include "utf.h"
#include "utfdef.h"
enum
{
/*
* fields in the unicode data file
*/
FIELD_CODE
,
FIELD_NAME
,
FIELD_CATEGORY
,
FIELD_COMBINING
,
FIELD_BIDIR
,
FIELD_DECOMP
,
FIELD_DECIMAL_DIG
,
FIELD_DIG
,
FIELD_NUMERIC_VAL
,
FIELD_MIRRORED
,
FIELD_UNICODE_1_NAME
,
FIELD_COMMENT
,
FIELD_UPPER
,
FIELD_LOWER
,
FIELD_TITLE
,
NFIELDS
,
MAX_LINE
=
1024
,
TO_OFFSET
=
1
<<
20
,
NRUNES
=
1
<<
21
,
};
#define TO_DELTA(xmapped,x) (TO_OFFSET + (xmapped) - (x))
static
char
myisspace
[
NRUNES
];
static
char
myisalpha
[
NRUNES
];
static
char
myisdigit
[
NRUNES
];
static
char
myisupper
[
NRUNES
];
static
char
myislower
[
NRUNES
];
static
char
myistitle
[
NRUNES
];
static
int
mytoupper
[
NRUNES
];
static
int
mytolower
[
NRUNES
];
static
int
mytotitle
[
NRUNES
];
static
void
check
(
void
);
static
void
mktables
(
char
*
src
,
int
usepairs
);
static
void
fatal
(
const
char
*
fmt
,
...);
static
int
mygetfields
(
char
**
fields
,
int
nfields
,
char
*
str
,
const
char
*
delim
);
static
int
getunicodeline
(
FILE
*
in
,
char
**
fields
,
char
*
buf
);
static
int
getcode
(
char
*
s
);
static
void
usage
(
void
)
{
fprintf
(
stderr
,
"usage: mktables [-cp] <UnicodeData.txt>
\n
"
);
exit
(
1
);
}
int
main
(
int
argc
,
char
*
argv
[]){
FILE
*
in
;
char
buf
[
MAX_LINE
],
buf2
[
MAX_LINE
];
char
*
fields
[
NFIELDS
+
1
],
*
fields2
[
NFIELDS
+
1
];
char
*
p
;
int
i
,
code
,
last
,
docheck
,
usepairs
;
docheck
=
0
;
usepairs
=
0
;
ARGBEGIN
{
case
'c'
:
docheck
=
1
;
break
;
case
'p'
:
usepairs
=
1
;
break
;
default:
usage
();
}
ARGEND
if
(
argc
!=
1
){
usage
();
}
in
=
fopen
(
argv
[
0
],
"r"
);
if
(
in
==
NULL
){
fatal
(
"can't open %s"
,
argv
[
0
]);
}
for
(
i
=
0
;
i
<
NRUNES
;
i
++
){
mytoupper
[
i
]
=
i
;
mytolower
[
i
]
=
i
;
mytotitle
[
i
]
=
i
;
}
/*
* make sure isspace has all of the "C" locale whitespace chars
*/
myisspace
[
'\t'
]
=
1
;
myisspace
[
'\n'
]
=
1
;
myisspace
[
'\r'
]
=
1
;
myisspace
[
'\f'
]
=
1
;
myisspace
[
'\v'
]
=
1
;
/*
* a couple of other exceptions
*/
myisspace
[
0x85
]
=
1
;
/* control char, "next line" */
myisspace
[
0xfeff
]
=
1
;
/* zero-width non-break space */
last
=
-
1
;
while
(
getunicodeline
(
in
,
fields
,
buf
)){
code
=
getcode
(
fields
[
FIELD_CODE
]);
if
(
code
>=
NRUNES
)
fatal
(
"code-point value too big: %x"
,
code
);
if
(
code
<=
last
)
fatal
(
"bad code sequence: %x then %x"
,
last
,
code
);
last
=
code
;
/*
* check for ranges
*/
p
=
fields
[
FIELD_CATEGORY
];
if
(
strstr
(
fields
[
FIELD_NAME
],
", First>"
)
!=
NULL
){
if
(
!
getunicodeline
(
in
,
fields2
,
buf2
))
fatal
(
"range start at eof"
);
if
(
strstr
(
fields2
[
FIELD_NAME
],
", Last>"
)
==
NULL
)
fatal
(
"range start not followed by range end"
);
last
=
getcode
(
fields2
[
FIELD_CODE
]);
if
(
last
<=
code
)
fatal
(
"range out of sequence: %x then %x"
,
code
,
last
);
if
(
strcmp
(
p
,
fields2
[
FIELD_CATEGORY
])
!=
0
)
fatal
(
"range with mismatched category"
);
}
/*
* set properties and conversions
*/
for
(;
code
<=
last
;
code
++
){
if
(
p
[
0
]
==
'L'
)
myisalpha
[
code
]
=
1
;
if
(
p
[
0
]
==
'Z'
)
myisspace
[
code
]
=
1
;
if
(
strcmp
(
p
,
"Lu"
)
==
0
)
myisupper
[
code
]
=
1
;
if
(
strcmp
(
p
,
"Ll"
)
==
0
)
myislower
[
code
]
=
1
;
if
(
strcmp
(
p
,
"Lt"
)
==
0
)
myistitle
[
code
]
=
1
;
if
(
strcmp
(
p
,
"Nd"
)
==
0
)
myisdigit
[
code
]
=
1
;
/*
* when finding conversions, also need to mark
* upper/lower case, since some chars, like
* "III" (0x2162), aren't defined as letters but have a
* lower case mapping ("iii" (0x2172)).
*/
if
(
fields
[
FIELD_UPPER
][
0
]
!=
'\0'
){
mytoupper
[
code
]
=
getcode
(
fields
[
FIELD_UPPER
]);
}
if
(
fields
[
FIELD_LOWER
][
0
]
!=
'\0'
){
mytolower
[
code
]
=
getcode
(
fields
[
FIELD_LOWER
]);
}
if
(
fields
[
FIELD_TITLE
][
0
]
!=
'\0'
){
mytotitle
[
code
]
=
getcode
(
fields
[
FIELD_TITLE
]);
}
}
}
fclose
(
in
);
/*
* check for codes with no totitle mapping but a toupper mapping.
* these appear in UnicodeData-2.0.14.txt, but are almost certainly
* erroneous.
*/
for
(
i
=
0
;
i
<
NRUNES
;
i
++
){
if
(
mytotitle
[
i
]
==
i
&&
mytoupper
[
i
]
!=
i
&&
!
myistitle
[
i
])
fprintf
(
stderr
,
"warning: code=%.4x not istitle, totitle is same, toupper=%.4x
\n
"
,
i
,
mytoupper
[
i
]);
}
/*
* make sure isupper[c] is true if for some x toupper[x] == c
* ditto for islower and istitle
*/
for
(
i
=
0
;
i
<
NRUNES
;
i
++
)
{
if
(
mytoupper
[
i
]
!=
i
)
myisupper
[
mytoupper
[
i
]]
=
1
;
if
(
mytolower
[
i
]
!=
i
)
myislower
[
mytolower
[
i
]]
=
1
;
if
(
mytotitle
[
i
]
!=
i
)
myistitle
[
mytotitle
[
i
]]
=
1
;
}
if
(
docheck
){
check
();
}
else
{
mktables
(
argv
[
0
],
usepairs
);
}
return
0
;
}
/*
* generate a properties array for ranges, clearing those cases covered.
* if force, generate one-entry ranges for singletons.
*/
static
int
mkisrange
(
const
char
*
label
,
char
*
prop
,
int
force
)
{
int
start
,
stop
,
some
;
/*
* first, the ranges
*/
some
=
0
;
for
(
start
=
0
;
start
<
NRUNES
;
)
{
if
(
!
prop
[
start
]){
start
++
;
continue
;
}
for
(
stop
=
start
+
1
;
stop
<
NRUNES
;
stop
++
){
if
(
!
prop
[
stop
]){
break
;
}
prop
[
stop
]
=
0
;
}
if
(
force
||
stop
!=
start
+
1
){
if
(
!
some
){
printf
(
"static Rune __is%sr[] = {
\n
"
,
label
);
some
=
1
;
}
prop
[
start
]
=
0
;
printf
(
"
\t
0x%.4x, 0x%.4x,
\n
"
,
start
,
stop
-
1
);
}
start
=
stop
;
}
if
(
some
)
printf
(
"};
\n\n
"
);
return
some
;
}
/*
* generate a mapping array for pairs with a skip between,
* clearing those entries covered.
*/
static
int
mkispair
(
const
char
*
label
,
char
*
prop
)
{
int
start
,
stop
,
some
;
some
=
0
;
for
(
start
=
0
;
start
+
2
<
NRUNES
;
)
{
if
(
!
prop
[
start
]){
start
++
;
continue
;
}
for
(
stop
=
start
+
2
;
stop
<
NRUNES
;
stop
+=
2
){
if
(
!
prop
[
stop
]){
break
;
}
prop
[
stop
]
=
0
;
}
if
(
stop
!=
start
+
2
){
if
(
!
some
){
printf
(
"static Rune __is%sp[] = {
\n
"
,
label
);
some
=
1
;
}
prop
[
start
]
=
0
;
printf
(
"
\t
0x%.4x, 0x%.4x,
\n
"
,
start
,
stop
-
2
);
}
start
=
stop
;
}
if
(
some
)
printf
(
"};
\n\n
"
);
return
some
;
}
/*
* generate a properties array for singletons, clearing those cases covered.
*/
static
int
mkissingle
(
const
char
*
label
,
char
*
prop
)
{
int
start
,
some
;
some
=
0
;
for
(
start
=
0
;
start
<
NRUNES
;
start
++
)
{
if
(
!
prop
[
start
]){
continue
;
}
if
(
!
some
){
printf
(
"static Rune __is%ss[] = {
\n
"
,
label
);
some
=
1
;
}
prop
[
start
]
=
0
;
printf
(
"
\t
0x%.4x,
\n
"
,
start
);
}
if
(
some
)
printf
(
"};
\n\n
"
);
return
some
;
}
/*
* generate tables and a function for is<label>rune
*/
static
void
mkis
(
const
char
*
label
,
char
*
prop
,
int
usepairs
)
{
int
isr
,
isp
,
iss
;
isr
=
mkisrange
(
label
,
prop
,
0
);
isp
=
0
;
if
(
usepairs
)
isp
=
mkispair
(
label
,
prop
);
iss
=
mkissingle
(
label
,
prop
);
printf
(
"int
\n
"
"is%srune(Rune c)
\n
"
"{
\n
"
" Rune *p;
\n
"
"
\n
"
,
label
);
if
(
isr
)
printf
(
" p = rbsearch(c, __is%sr, nelem(__is%sr)/2, 2);
\n
"
" if(p && c >= p[0] && c <= p[1])
\n
"
" return 1;
\n
"
,
label
,
label
);
if
(
isp
)
printf
(
" p = rbsearch(c, __is%sp, nelem(__is%sp)/2, 2);
\n
"
" if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
\n
"
" return 1;
\n
"
,
label
,
label
);
if
(
iss
)
printf
(
" p = rbsearch(c, __is%ss, nelem(__is%ss), 1);
\n
"
" if(p && c == p[0])
\n
"
" return 1;
\n
"
,
label
,
label
);
printf
(
" return 0;
\n
"
"}
\n
"
"
\n
"
);
}
/*
* generate a mapping array for ranges, clearing those entries covered.
* if force, generate one-entry ranges for singletons.
*/
static
int
mktorange
(
const
char
*
label
,
int
*
map
,
int
force
)
{
int
start
,
stop
,
delta
,
some
;
some
=
0
;
for
(
start
=
0
;
start
<
NRUNES
;
)
{
if
(
map
[
start
]
==
start
){
start
++
;
continue
;
}
delta
=
TO_DELTA
(
map
[
start
],
start
);
if
(
delta
!=
(
Rune
)
delta
)
fatal
(
"bad map delta %d"
,
delta
);
for
(
stop
=
start
+
1
;
stop
<
NRUNES
;
stop
++
){
if
(
TO_DELTA
(
map
[
stop
],
stop
)
!=
delta
){
break
;
}
map
[
stop
]
=
stop
;
}
if
(
stop
!=
start
+
1
){
if
(
!
some
){
printf
(
"static Rune __to%sr[] = {
\n
"
,
label
);
some
=
1
;
}
map
[
start
]
=
start
;
printf
(
"
\t
0x%.4x, 0x%.4x, %d,
\n
"
,
start
,
stop
-
1
,
delta
);
}
start
=
stop
;
}
if
(
some
)
printf
(
"};
\n\n
"
);
return
some
;
}
/*
* generate a mapping array for pairs with a skip between,
* clearing those entries covered.
*/
static
int
mktopair
(
const
char
*
label
,
int
*
map
)
{
int
start
,
stop
,
delta
,
some
;
some
=
0
;
for
(
start
=
0
;
start
+
2
<
NRUNES
;
)
{
if
(
map
[
start
]
==
start
){
start
++
;
continue
;
}
delta
=
TO_DELTA
(
map
[
start
],
start
);
if
(
delta
!=
(
Rune
)
delta
)
fatal
(
"bad map delta %d"
,
delta
);
for
(
stop
=
start
+
2
;
stop
<
NRUNES
;
stop
+=
2
){
if
(
TO_DELTA
(
map
[
stop
],
stop
)
!=
delta
){
break
;
}
map
[
stop
]
=
stop
;
}
if
(
stop
!=
start
+
2
){
if
(
!
some
){
printf
(
"static Rune __to%sp[] = {
\n
"
,
label
);
some
=
1
;
}
map
[
start
]
=
start
;
printf
(
"
\t
0x%.4x, 0x%.4x, %d,
\n
"
,
start
,
stop
-
2
,
delta
);
}
start
=
stop
;
}
if
(
some
)
printf
(
"};
\n\n
"
);
return
some
;
}
/*
* generate a mapping array for singletons, clearing those entries covered.
*/
static
int
mktosingle
(
const
char
*
label
,
int
*
map
)
{
int
start
,
delta
,
some
;
some
=
0
;
for
(
start
=
0
;
start
<
NRUNES
;
start
++
)
{
if
(
map
[
start
]
==
start
){
continue
;
}
delta
=
TO_DELTA
(
map
[
start
],
start
);
if
(
delta
!=
(
Rune
)
delta
)
fatal
(
"bad map delta %d"
,
delta
);
if
(
!
some
){
printf
(
"static Rune __to%ss[] = {
\n
"
,
label
);
some
=
1
;
}
map
[
start
]
=
start
;
printf
(
"
\t
0x%.4x, %d,
\n
"
,
start
,
delta
);
}
if
(
some
)
printf
(
"};
\n\n
"
);
return
some
;
}
/*
* generate tables and a function for to<label>rune
*/
static
void
mkto
(
const
char
*
label
,
int
*
map
,
int
usepairs
)
{
int
tor
,
top
,
tos
;
tor
=
mktorange
(
label
,
map
,
0
);
top
=
0
;
if
(
usepairs
)
top
=
mktopair
(
label
,
map
);
tos
=
mktosingle
(
label
,
map
);
printf
(
"Rune
\n
"
"to%srune(Rune c)
\n
"
"{
\n
"
" Rune *p;
\n
"
"
\n
"
,
label
);
if
(
tor
)
printf
(
" p = rbsearch(c, __to%sr, nelem(__to%sr)/3, 3);
\n
"
" if(p && c >= p[0] && c <= p[1])
\n
"
" return c + p[2] - %d;
\n
"
,
label
,
label
,
TO_OFFSET
);
if
(
top
)
printf
(
" p = rbsearch(c, __to%sp, nelem(__to%sp)/3, 3);
\n
"
" if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
\n
"
" return c + p[2] - %d;
\n
"
,
label
,
label
,
TO_OFFSET
);
if
(
tos
)
printf
(
" p = rbsearch(c, __to%ss, nelem(__to%ss)/2, 2);
\n
"
" if(p && c == p[0])
\n
"
" return c + p[1] - %d;
\n
"
,
label
,
label
,
TO_OFFSET
);
printf
(
" return c;
\n
"
"}
\n
"
"
\n
"
);
}
// Make only range tables and a function for is<label>rune.
static
void
mkisronly
(
const
char
*
label
,
char
*
prop
)
{
mkisrange
(
label
,
prop
,
1
);
printf
(
"int
\n
"
"is%srune(Rune c)
\n
"
"{
\n
"
" Rune *p;
\n
"
"
\n
"
" p = rbsearch(c, __is%sr, nelem(__is%sr)/2, 2);
\n
"
" if(p && c >= p[0] && c <= p[1])
\n
"
" return 1;
\n
"
" return 0;
\n
"
"}
\n
"
"
\n
"
,
label
,
label
,
label
);
}
/*
* generate the body of runetype.
* assumes there is a function Rune* rbsearch(Rune c, Rune *t, int n, int ne);
*/
static
void
mktables
(
char
*
src
,
int
usepairs
)
{
printf
(
"/* generated automatically by mkrunetype.c from %s */
\n\n
"
,
basename
(
src
));
/*
* we special case the space and digit tables, since they are assumed
* to be small with several ranges.
*/
mkisronly
(
"space"
,
myisspace
);
mkisronly
(
"digit"
,
myisdigit
);
mkis
(
"alpha"
,
myisalpha
,
0
);
mkis
(
"upper"
,
myisupper
,
usepairs
);
mkis
(
"lower"
,
myislower
,
usepairs
);
mkis
(
"title"
,
myistitle
,
usepairs
);
mkto
(
"upper"
,
mytoupper
,
usepairs
);
mkto
(
"lower"
,
mytolower
,
usepairs
);
mkto
(
"title"
,
mytotitle
,
usepairs
);
}
/*
* find differences between the newly generated tables and current runetypes.
*/
static
void
check
(
void
)
{
int
i
;
for
(
i
=
0
;
i
<
NRUNES
;
i
++
){
if
(
isdigitrune
(
i
)
!=
myisdigit
[
i
])
fprintf
(
stderr
,
"isdigit diff at %x: runetype=%x, unicode=%x
\n
"
,
i
,
isdigitrune
(
i
),
myisdigit
[
i
]);
if
(
isspacerune
(
i
)
!=
myisspace
[
i
])
fprintf
(
stderr
,
"isspace diff at %x: runetype=%x, unicode=%x
\n
"
,
i
,
isspacerune
(
i
),
myisspace
[
i
]);
if
(
isupperrune
(
i
)
!=
myisupper
[
i
])
fprintf
(
stderr
,
"isupper diff at %x: runetype=%x, unicode=%x
\n
"
,
i
,
isupperrune
(
i
),
myisupper
[
i
]);
if
(
islowerrune
(
i
)
!=
myislower
[
i
])
fprintf
(
stderr
,
"islower diff at %x: runetype=%x, unicode=%x
\n
"
,
i
,
islowerrune
(
i
),
myislower
[
i
]);
if
(
isalpharune
(
i
)
!=
myisalpha
[
i
])
fprintf
(
stderr
,
"isalpha diff at %x: runetype=%x, unicode=%x
\n
"
,
i
,
isalpharune
(
i
),
myisalpha
[
i
]);
if
(
toupperrune
(
i
)
!=
mytoupper
[
i
])
fprintf
(
stderr
,
"toupper diff at %x: runetype=%x, unicode=%x
\n
"
,
i
,
toupperrune
(
i
),
mytoupper
[
i
]);
if
(
tolowerrune
(
i
)
!=
mytolower
[
i
])
fprintf
(
stderr
,
"tolower diff at %x: runetype=%x, unicode=%x
\n
"
,
i
,
tolowerrune
(
i
),
mytolower
[
i
]);
if
(
istitlerune
(
i
)
!=
myistitle
[
i
])
fprintf
(
stderr
,
"istitle diff at %x: runetype=%x, unicode=%x
\n
"
,
i
,
istitlerune
(
i
),
myistitle
[
i
]);
if
(
totitlerune
(
i
)
!=
mytotitle
[
i
])
fprintf
(
stderr
,
"totitle diff at %x: runetype=%x, unicode=%x
\n
"
,
i
,
totitlerune
(
i
),
mytotitle
[
i
]);
}
}
static
int
mygetfields
(
char
**
fields
,
int
nfields
,
char
*
str
,
const
char
*
delim
)
{
int
nf
;
fields
[
0
]
=
str
;
nf
=
1
;
if
(
nf
>=
nfields
)
return
nf
;
for
(;
*
str
;
str
++
){
if
(
strchr
(
delim
,
*
str
)
!=
NULL
){
*
str
=
'\0'
;
fields
[
nf
++
]
=
str
+
1
;
if
(
nf
>=
nfields
)
break
;
}
}
return
nf
;
}
static
int
getunicodeline
(
FILE
*
in
,
char
**
fields
,
char
*
buf
)
{
char
*
p
;
if
(
fgets
(
buf
,
MAX_LINE
,
in
)
==
NULL
)
return
0
;
p
=
strchr
(
buf
,
'\n'
);
if
(
p
==
NULL
)
fatal
(
"line too long"
);
*
p
=
'\0'
;
if
(
mygetfields
(
fields
,
NFIELDS
+
1
,
buf
,
";"
)
!=
NFIELDS
)
fatal
(
"bad number of fields"
);
return
1
;
}
static
int
getcode
(
char
*
s
)
{
int
i
,
code
;
code
=
0
;
i
=
0
;
/* Parse a hex number */
while
(
s
[
i
])
{
code
<<=
4
;
if
(
s
[
i
]
>=
'0'
&&
s
[
i
]
<=
'9'
)
code
+=
s
[
i
]
-
'0'
;
else
if
(
s
[
i
]
>=
'A'
&&
s
[
i
]
<=
'F'
)
code
+=
s
[
i
]
-
'A'
+
10
;
else
fatal
(
"bad code char '%c'"
,
s
[
i
]);
i
++
;
}
return
code
;
}
static
void
fatal
(
const
char
*
fmt
,
...)
{
va_list
arg
;
fprintf
(
stderr
,
"%s: fatal error: "
,
argv0
);
va_start
(
arg
,
fmt
);
vfprintf
(
stderr
,
fmt
,
arg
);
va_end
(
arg
);
fprintf
(
stderr
,
"
\n
"
);
exit
(
1
);
}
src/lib9/utf/rune.c
View file @
5b904a3b
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Portions Copyright (c) 2009 The Go Authors. All rights reserved.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
*
ANY
REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "plan9.h"
#include "utf.h"
#include "utfdef.h"
enum
{
...
...
@@ -23,27 +24,150 @@ enum
Bit2
=
5
,
Bit3
=
4
,
Bit4
=
3
,
Bit5
=
2
,
T1
=
((
1
<<
(
Bit1
+
1
))
-
1
)
^
0xFF
,
/* 0000 0000 */
Tx
=
((
1
<<
(
Bitx
+
1
))
-
1
)
^
0xFF
,
/* 1000 0000 */
T2
=
((
1
<<
(
Bit2
+
1
))
-
1
)
^
0xFF
,
/* 1100 0000 */
T3
=
((
1
<<
(
Bit3
+
1
))
-
1
)
^
0xFF
,
/* 1110 0000 */
T4
=
((
1
<<
(
Bit4
+
1
))
-
1
)
^
0xFF
,
/* 1111 0000 */
T5
=
((
1
<<
(
Bit5
+
1
))
-
1
)
^
0xFF
,
/* 1111 1000 */
Rune1
=
(
1
<<
(
Bit1
+
0
*
Bitx
))
-
1
,
/* 0000 0000 0111 1111 */
Rune2
=
(
1
<<
(
Bit2
+
1
*
Bitx
))
-
1
,
/* 0000 0111 1111 1111 */
Rune3
=
(
1
<<
(
Bit3
+
2
*
Bitx
))
-
1
,
/* 1111 1111 1111 1111 */
Rune4
=
(
1
<<
(
Bit4
+
3
*
Bitx
))
-
1
,
/* 0001 1111 1111 1111 1111 1111 */
Maskx
=
(
1
<<
Bitx
)
-
1
,
/* 0011 1111 */
Testx
=
Maskx
^
0xFF
,
/* 1100 0000 */
Bad
=
Runeerror
Bad
=
Runeerror
,
};
/*
* Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
* This is a slower but "safe" version of the old chartorune
* that works on strings that are not necessarily null-terminated.
*
* If you know for sure that your string is null-terminated,
* chartorune will be a bit faster.
*
* It is guaranteed not to attempt to access "length"
* past the incoming pointer. This is to avoid
* possible access violations. If the string appears to be
* well-formed but incomplete (i.e., to get the whole Rune
* we'd need to read past str+length) then we'll set the Rune
* to Bad and return 0.
*
* Note that if we have decoding problems for other
* reasons, we return 1 instead of 0.
*/
int
charntorune
(
Rune
*
rune
,
const
char
*
str
,
int
length
)
{
int
c
,
c1
,
c2
,
c3
;
long
l
;
/* When we're not allowed to read anything */
if
(
length
<=
0
)
{
goto
badlen
;
}
/*
* one character sequence (7-bit value)
* 00000-0007F => T1
*/
c
=
*
(
uchar
*
)
str
;
if
(
c
<
Tx
)
{
*
rune
=
c
;
return
1
;
}
// If we can't read more than one character we must stop
if
(
length
<=
1
)
{
goto
badlen
;
}
/*
* two character sequence (11-bit value)
* 0080-07FF => T2 Tx
*/
c1
=
*
(
uchar
*
)(
str
+
1
)
^
Tx
;
if
(
c1
&
Testx
)
goto
bad
;
if
(
c
<
T3
)
{
if
(
c
<
T2
)
goto
bad
;
l
=
((
c
<<
Bitx
)
|
c1
)
&
Rune2
;
if
(
l
<=
Rune1
)
goto
bad
;
*
rune
=
l
;
return
2
;
}
// If we can't read more than two characters we must stop
if
(
length
<=
2
)
{
goto
badlen
;
}
/*
* three character sequence (16-bit value)
* 0800-FFFF => T3 Tx Tx
*/
c2
=
*
(
uchar
*
)(
str
+
2
)
^
Tx
;
if
(
c2
&
Testx
)
goto
bad
;
if
(
c
<
T4
)
{
l
=
((((
c
<<
Bitx
)
|
c1
)
<<
Bitx
)
|
c2
)
&
Rune3
;
if
(
l
<=
Rune2
)
goto
bad
;
*
rune
=
l
;
return
3
;
}
if
(
length
<=
3
)
goto
badlen
;
/*
* four character sequence (21-bit value)
* 10000-1FFFFF => T4 Tx Tx Tx
*/
c3
=
*
(
uchar
*
)(
str
+
3
)
^
Tx
;
if
(
c3
&
Testx
)
goto
bad
;
if
(
c
<
T5
)
{
l
=
((((((
c
<<
Bitx
)
|
c1
)
<<
Bitx
)
|
c2
)
<<
Bitx
)
|
c3
)
&
Rune4
;
if
(
l
<=
Rune3
)
goto
bad
;
*
rune
=
l
;
return
4
;
}
// Support for 5-byte or longer UTF-8 would go here, but
// since we don't have that, we'll just fall through to bad.
/*
* bad decoding
*/
bad:
*
rune
=
Bad
;
return
1
;
badlen:
*
rune
=
Bad
;
return
0
;
}
/*
* This is the older "unsafe" version, which works fine on
* null-terminated strings.
*/
int
chartorune
(
Rune
*
rune
,
char
*
str
)
chartorune
(
Rune
*
rune
,
c
onst
c
har
*
str
)
{
int
c
,
c1
,
c2
;
int
c
,
c1
,
c2
,
c3
;
long
l
;
/*
...
...
@@ -88,6 +212,26 @@ chartorune(Rune *rune, char *str)
return
3
;
}
/*
* four character sequence (21-bit value)
* 10000-1FFFFF => T4 Tx Tx Tx
*/
c3
=
*
(
uchar
*
)(
str
+
3
)
^
Tx
;
if
(
c3
&
Testx
)
goto
bad
;
if
(
c
<
T5
)
{
l
=
((((((
c
<<
Bitx
)
|
c1
)
<<
Bitx
)
|
c2
)
<<
Bitx
)
|
c3
)
&
Rune4
;
if
(
l
<=
Rune3
)
goto
bad
;
*
rune
=
l
;
return
4
;
}
/*
* Support for 5-byte or longer UTF-8 would go here, but
* since we don't have that, we'll just fall through to bad.
*/
/*
* bad decoding
*/
...
...
@@ -97,9 +241,16 @@ bad:
}
int
runetochar
(
char
*
str
,
Rune
*
rune
)
isvalidcharntorune
(
const
char
*
str
,
int
length
,
Rune
*
rune
,
int
*
consumed
)
{
*
consumed
=
charntorune
(
rune
,
str
,
length
);
return
*
rune
!=
Runeerror
||
*
consumed
==
3
;
}
int
runetochar
(
char
*
str
,
const
Rune
*
rune
)
{
long
c
;
/* Runes are signed, so convert to unsigned for range check. */
unsigned
long
c
;
/*
* one character sequence
...
...
@@ -121,57 +272,80 @@ runetochar(char *str, Rune *rune)
return
2
;
}
/*
* If the Rune is out of range, convert it to the error rune.
* Do this test here because the error rune encodes to three bytes.
* Doing it earlier would duplicate work, since an out of range
* Rune wouldn't have fit in one or two bytes.
*/
if
(
c
>
Runemax
)
c
=
Runeerror
;
/*
* three character sequence
* 0800-FFFF => T3 Tx Tx
*/
str
[
0
]
=
T3
|
(
c
>>
2
*
Bitx
);
str
[
1
]
=
Tx
|
((
c
>>
1
*
Bitx
)
&
Maskx
);
str
[
2
]
=
Tx
|
(
c
&
Maskx
);
return
3
;
if
(
c
<=
Rune3
)
{
str
[
0
]
=
T3
|
(
c
>>
2
*
Bitx
);
str
[
1
]
=
Tx
|
((
c
>>
1
*
Bitx
)
&
Maskx
);
str
[
2
]
=
Tx
|
(
c
&
Maskx
);
return
3
;
}
/*
* four character sequence (21-bit value)
* 10000-1FFFFF => T4 Tx Tx Tx
*/
str
[
0
]
=
T4
|
(
c
>>
3
*
Bitx
);
str
[
1
]
=
Tx
|
((
c
>>
2
*
Bitx
)
&
Maskx
);
str
[
2
]
=
Tx
|
((
c
>>
1
*
Bitx
)
&
Maskx
);
str
[
3
]
=
Tx
|
(
c
&
Maskx
);
return
4
;
}
int
runelen
(
long
c
)
runelen
(
Rune
rune
)
{
Rune
rune
;
char
str
[
10
];
rune
=
c
;
return
runetochar
(
str
,
&
rune
);
}
int
runenlen
(
Rune
*
r
,
int
nrune
)
runenlen
(
const
Rune
*
r
,
int
nrune
)
{
int
nb
,
c
;
nb
=
0
;
while
(
nrune
--
)
{
c
=
*
r
++
;
if
(
c
<=
Rune1
)
if
(
c
<=
Rune1
)
nb
++
;
else
if
(
c
<=
Rune2
)
else
if
(
c
<=
Rune2
)
nb
+=
2
;
else
else
if
(
c
<=
Rune3
)
nb
+=
3
;
else
/* assert(c <= Rune4) */
nb
+=
4
;
}
return
nb
;
}
int
fullrune
(
char
*
str
,
int
n
)
fullrune
(
c
onst
c
har
*
str
,
int
n
)
{
int
c
;
if
(
n
>
0
)
{
c
=
*
(
uchar
*
)
str
;
if
(
c
<
Tx
)
if
(
n
>
0
)
{
int
c
=
*
(
uchar
*
)
str
;
if
(
c
<
Tx
)
return
1
;
if
(
n
>
1
)
if
(
c
<
T3
||
n
>
2
)
if
(
n
>
1
)
{
if
(
c
<
T3
)
return
1
;
if
(
n
>
2
)
{
if
(
c
<
T4
||
n
>
3
)
return
1
;
}
}
}
return
0
;
}
src/lib9/utf/runetype.c
View file @
5b904a3b
...
...
@@ -7,1037 +7,22 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
*
ANY
REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "plan9.h"
#include "utf.h"
#include "utfdef.h"
/*
* alpha ranges -
* only covers ranges not in lower||upper
*/
static
Rune
__alpha2
[]
=
{
0x00d8
,
0x00f6
,
/* Ø - ö */
0x00f8
,
0x01f5
,
/* ø - ǵ */
0x0250
,
0x02a8
,
/* ɐ - ʨ */
0x038e
,
0x03a1
,
/* Ύ - Ρ */
0x03a3
,
0x03ce
,
/* Σ - ώ */
0x03d0
,
0x03d6
,
/* ϐ - ϖ */
0x03e2
,
0x03f3
,
/* Ϣ - ϳ */
0x0490
,
0x04c4
,
/* Ґ - ӄ */
0x0561
,
0x0587
,
/* ա - և */
0x05d0
,
0x05ea
,
/* א - ת */
0x05f0
,
0x05f2
,
/* װ - ײ */
0x0621
,
0x063a
,
/* ء - غ */
0x0640
,
0x064a
,
/* ـ - ي */
0x0671
,
0x06b7
,
/* ٱ - ڷ */
0x06ba
,
0x06be
,
/* ں - ھ */
0x06c0
,
0x06ce
,
/* ۀ - ێ */
0x06d0
,
0x06d3
,
/* ې - ۓ */
0x0905
,
0x0939
,
/* अ - ह */
0x0958
,
0x0961
,
/* क़ - ॡ */
0x0985
,
0x098c
,
/* অ - ঌ */
0x098f
,
0x0990
,
/* এ - ঐ */
0x0993
,
0x09a8
,
/* ও - ন */
0x09aa
,
0x09b0
,
/* প - র */
0x09b6
,
0x09b9
,
/* শ - হ */
0x09dc
,
0x09dd
,
/* ড় - ঢ় */
0x09df
,
0x09e1
,
/* য় - ৡ */
0x09f0
,
0x09f1
,
/* ৰ - ৱ */
0x0a05
,
0x0a0a
,
/* ਅ - ਊ */
0x0a0f
,
0x0a10
,
/* ਏ - ਐ */
0x0a13
,
0x0a28
,
/* ਓ - ਨ */
0x0a2a
,
0x0a30
,
/* ਪ - ਰ */
0x0a32
,
0x0a33
,
/* ਲ - ਲ਼ */
0x0a35
,
0x0a36
,
/* ਵ - ਸ਼ */
0x0a38
,
0x0a39
,
/* ਸ - ਹ */
0x0a59
,
0x0a5c
,
/* ਖ਼ - ੜ */
0x0a85
,
0x0a8b
,
/* અ - ઋ */
0x0a8f
,
0x0a91
,
/* એ - ઑ */
0x0a93
,
0x0aa8
,
/* ઓ - ન */
0x0aaa
,
0x0ab0
,
/* પ - ર */
0x0ab2
,
0x0ab3
,
/* લ - ળ */
0x0ab5
,
0x0ab9
,
/* વ - હ */
0x0b05
,
0x0b0c
,
/* ଅ - ଌ */
0x0b0f
,
0x0b10
,
/* ଏ - ଐ */
0x0b13
,
0x0b28
,
/* ଓ - ନ */
0x0b2a
,
0x0b30
,
/* ପ - ର */
0x0b32
,
0x0b33
,
/* ଲ - ଳ */
0x0b36
,
0x0b39
,
/* ଶ - ହ */
0x0b5c
,
0x0b5d
,
/* ଡ଼ - ଢ଼ */
0x0b5f
,
0x0b61
,
/* ୟ - ୡ */
0x0b85
,
0x0b8a
,
/* அ - ஊ */
0x0b8e
,
0x0b90
,
/* எ - ஐ */
0x0b92
,
0x0b95
,
/* ஒ - க */
0x0b99
,
0x0b9a
,
/* ங - ச */
0x0b9e
,
0x0b9f
,
/* ஞ - ட */
0x0ba3
,
0x0ba4
,
/* ண - த */
0x0ba8
,
0x0baa
,
/* ந - ப */
0x0bae
,
0x0bb5
,
/* ம - வ */
0x0bb7
,
0x0bb9
,
/* ஷ - ஹ */
0x0c05
,
0x0c0c
,
/* అ - ఌ */
0x0c0e
,
0x0c10
,
/* ఎ - ఐ */
0x0c12
,
0x0c28
,
/* ఒ - న */
0x0c2a
,
0x0c33
,
/* ప - ళ */
0x0c35
,
0x0c39
,
/* వ - హ */
0x0c60
,
0x0c61
,
/* ౠ - ౡ */
0x0c85
,
0x0c8c
,
/* ಅ - ಌ */
0x0c8e
,
0x0c90
,
/* ಎ - ಐ */
0x0c92
,
0x0ca8
,
/* ಒ - ನ */
0x0caa
,
0x0cb3
,
/* ಪ - ಳ */
0x0cb5
,
0x0cb9
,
/* ವ - ಹ */
0x0ce0
,
0x0ce1
,
/* ೠ - ೡ */
0x0d05
,
0x0d0c
,
/* അ - ഌ */
0x0d0e
,
0x0d10
,
/* എ - ഐ */
0x0d12
,
0x0d28
,
/* ഒ - ന */
0x0d2a
,
0x0d39
,
/* പ - ഹ */
0x0d60
,
0x0d61
,
/* ൠ - ൡ */
0x0e01
,
0x0e30
,
/* ก - ะ */
0x0e32
,
0x0e33
,
/* า - ำ */
0x0e40
,
0x0e46
,
/* เ - ๆ */
0x0e5a
,
0x0e5b
,
/* ๚ - ๛ */
0x0e81
,
0x0e82
,
/* ກ - ຂ */
0x0e87
,
0x0e88
,
/* ງ - ຈ */
0x0e94
,
0x0e97
,
/* ດ - ທ */
0x0e99
,
0x0e9f
,
/* ນ - ຟ */
0x0ea1
,
0x0ea3
,
/* ມ - ຣ */
0x0eaa
,
0x0eab
,
/* ສ - ຫ */
0x0ead
,
0x0eae
,
/* ອ - ຮ */
0x0eb2
,
0x0eb3
,
/* າ - ຳ */
0x0ec0
,
0x0ec4
,
/* ເ - ໄ */
0x0edc
,
0x0edd
,
/* ໜ - ໝ */
0x0f18
,
0x0f19
,
/* ༘ - ༙ */
0x0f40
,
0x0f47
,
/* ཀ - ཇ */
0x0f49
,
0x0f69
,
/* ཉ - ཀྵ */
0x10d0
,
0x10f6
,
/* ა - ჶ */
0x1100
,
0x1159
,
/* ᄀ - ᅙ */
0x115f
,
0x11a2
,
/* ᅟ - ᆢ */
0x11a8
,
0x11f9
,
/* ᆨ - ᇹ */
0x1e00
,
0x1e9b
,
/* Ḁ - ẛ */
0x1f50
,
0x1f57
,
/* ὐ - ὗ */
0x1f80
,
0x1fb4
,
/* ᾀ - ᾴ */
0x1fb6
,
0x1fbc
,
/* ᾶ - ᾼ */
0x1fc2
,
0x1fc4
,
/* ῂ - ῄ */
0x1fc6
,
0x1fcc
,
/* ῆ - ῌ */
0x1fd0
,
0x1fd3
,
/* ῐ - ΐ */
0x1fd6
,
0x1fdb
,
/* ῖ - Ί */
0x1fe0
,
0x1fec
,
/* ῠ - Ῥ */
0x1ff2
,
0x1ff4
,
/* ῲ - ῴ */
0x1ff6
,
0x1ffc
,
/* ῶ - ῼ */
0x210a
,
0x2113
,
/* ℊ - ℓ */
0x2115
,
0x211d
,
/* ℕ - ℝ */
0x2120
,
0x2122
,
/* ℠ - ™ */
0x212a
,
0x2131
,
/* K - ℱ */
0x2133
,
0x2138
,
/* ℳ - ℸ */
0x3041
,
0x3094
,
/* ぁ - ゔ */
0x30a1
,
0x30fa
,
/* ァ - ヺ */
0x3105
,
0x312c
,
/* ㄅ - ㄬ */
0x3131
,
0x318e
,
/* ㄱ - ㆎ */
0x3192
,
0x319f
,
/* ㆒ - ㆟ */
0x3260
,
0x327b
,
/* ㉠ - ㉻ */
0x328a
,
0x32b0
,
/* ㊊ - ㊰ */
0x32d0
,
0x32fe
,
/* ㋐ - ㋾ */
0x3300
,
0x3357
,
/* ㌀ - ㍗ */
0x3371
,
0x3376
,
/* ㍱ - ㍶ */
0x337b
,
0x3394
,
/* ㍻ - ㎔ */
0x3399
,
0x339e
,
/* ㎙ - ㎞ */
0x33a9
,
0x33ad
,
/* ㎩ - ㎭ */
0x33b0
,
0x33c1
,
/* ㎰ - ㏁ */
0x33c3
,
0x33c5
,
/* ㏃ - ㏅ */
0x33c7
,
0x33d7
,
/* ㏇ - ㏗ */
0x33d9
,
0x33dd
,
/* ㏙ - ㏝ */
0x4e00
,
0x9fff
,
/* 一 - 鿿 */
0xac00
,
0xd7a3
,
/* 가 - 힣 */
0xf900
,
0xfb06
,
/* 豈 - st */
0xfb13
,
0xfb17
,
/* ﬓ - ﬗ */
0xfb1f
,
0xfb28
,
/* ײַ - ﬨ */
0xfb2a
,
0xfb36
,
/* שׁ - זּ */
0xfb38
,
0xfb3c
,
/* טּ - לּ */
0xfb40
,
0xfb41
,
/* נּ - סּ */
0xfb43
,
0xfb44
,
/* ףּ - פּ */
0xfb46
,
0xfbb1
,
/* צּ - ﮱ */
0xfbd3
,
0xfd3d
,
/* ﯓ - ﴽ */
0xfd50
,
0xfd8f
,
/* ﵐ - ﶏ */
0xfd92
,
0xfdc7
,
/* ﶒ - ﷇ */
0xfdf0
,
0xfdf9
,
/* ﷰ - ﷹ */
0xfe70
,
0xfe72
,
/* ﹰ - ﹲ */
0xfe76
,
0xfefc
,
/* ﹶ - ﻼ */
0xff66
,
0xff6f
,
/* ヲ - ッ */
0xff71
,
0xff9d
,
/* ア - ン */
0xffa0
,
0xffbe
,
/* ᅠ - ᄒ */
0xffc2
,
0xffc7
,
/* ᅡ - ᅦ */
0xffca
,
0xffcf
,
/* ᅧ - ᅬ */
0xffd2
,
0xffd7
,
/* ᅭ - ᅲ */
0xffda
,
0xffdc
,
/* ᅳ - ᅵ */
};
/*
* alpha singlets -
* only covers ranges not in lower||upper
*/
static
Rune
__alpha1
[]
=
{
0x00aa
,
/* ª */
0x00b5
,
/* µ */
0x00ba
,
/* º */
0x03da
,
/* Ϛ */
0x03dc
,
/* Ϝ */
0x03de
,
/* Ϟ */
0x03e0
,
/* Ϡ */
0x06d5
,
/* ە */
0x09b2
,
/* ল */
0x0a5e
,
/* ਫ਼ */
0x0a8d
,
/* ઍ */
0x0ae0
,
/* ૠ */
0x0b9c
,
/* ஜ */
0x0cde
,
/* ೞ */
0x0e4f
,
/* ๏ */
0x0e84
,
/* ຄ */
0x0e8a
,
/* ຊ */
0x0e8d
,
/* ຍ */
0x0ea5
,
/* ລ */
0x0ea7
,
/* ວ */
0x0eb0
,
/* ະ */
0x0ebd
,
/* ຽ */
0x1fbe
,
/* ι */
0x207f
,
/* ⁿ */
0x20a8
,
/* ₨ */
0x2102
,
/* ℂ */
0x2107
,
/* ℇ */
0x2124
,
/* ℤ */
0x2126
,
/* Ω */
0x2128
,
/* ℨ */
0xfb3e
,
/* מּ */
0xfe74
,
/* ﹴ */
};
/*
* space ranges
*/
static
Rune
__space2
[]
=
{
0x0009
,
0x000a
,
/* tab and newline */
0x0020
,
0x0020
,
/* space */
0x00a0
,
0x00a0
,
/* */
0x2000
,
0x200b
,
/* - */
0x2028
,
0x2029
,
/* - */
0x3000
,
0x3000
,
/* */
0xfeff
,
0xfeff
,
/* */
};
/*
* lower case ranges
* 3rd col is conversion excess 500
*/
static
Rune
__toupper2
[]
=
{
0x0061
,
0x007a
,
468
,
/* a-z A-Z */
0x00e0
,
0x00f6
,
468
,
/* à-ö À-Ö */
0x00f8
,
0x00fe
,
468
,
/* ø-þ Ø-Þ */
0x0256
,
0x0257
,
295
,
/* ɖ-ɗ Ɖ-Ɗ */
0x0258
,
0x0259
,
298
,
/* ɘ-ə Ǝ-Ə */
0x028a
,
0x028b
,
283
,
/* ʊ-ʋ Ʊ-Ʋ */
0x03ad
,
0x03af
,
463
,
/* έ-ί Έ-Ί */
0x03b1
,
0x03c1
,
468
,
/* α-ρ Α-Ρ */
0x03c3
,
0x03cb
,
468
,
/* σ-ϋ Σ-Ϋ */
0x03cd
,
0x03ce
,
437
,
/* ύ-ώ Ύ-Ώ */
0x0430
,
0x044f
,
468
,
/* а-я А-Я */
0x0451
,
0x045c
,
420
,
/* ё-ќ Ё-Ќ */
0x045e
,
0x045f
,
420
,
/* ў-џ Ў-Џ */
0x0561
,
0x0586
,
452
,
/* ա-ֆ Ա-Ֆ */
0x1f00
,
0x1f07
,
508
,
/* ἀ-ἇ Ἀ-Ἇ */
0x1f10
,
0x1f15
,
508
,
/* ἐ-ἕ Ἐ-Ἕ */
0x1f20
,
0x1f27
,
508
,
/* ἠ-ἧ Ἠ-Ἧ */
0x1f30
,
0x1f37
,
508
,
/* ἰ-ἷ Ἰ-Ἷ */
0x1f40
,
0x1f45
,
508
,
/* ὀ-ὅ Ὀ-Ὅ */
0x1f60
,
0x1f67
,
508
,
/* ὠ-ὧ Ὠ-Ὧ */
0x1f70
,
0x1f71
,
574
,
/* ὰ-ά Ὰ-Ά */
0x1f72
,
0x1f75
,
586
,
/* ὲ-ή Ὲ-Ή */
0x1f76
,
0x1f77
,
600
,
/* ὶ-ί Ὶ-Ί */
0x1f78
,
0x1f79
,
628
,
/* ὸ-ό Ὸ-Ό */
0x1f7a
,
0x1f7b
,
612
,
/* ὺ-ύ Ὺ-Ύ */
0x1f7c
,
0x1f7d
,
626
,
/* ὼ-ώ Ὼ-Ώ */
0x1f80
,
0x1f87
,
508
,
/* ᾀ-ᾇ ᾈ-ᾏ */
0x1f90
,
0x1f97
,
508
,
/* ᾐ-ᾗ ᾘ-ᾟ */
0x1fa0
,
0x1fa7
,
508
,
/* ᾠ-ᾧ ᾨ-ᾯ */
0x1fb0
,
0x1fb1
,
508
,
/* ᾰ-ᾱ Ᾰ-Ᾱ */
0x1fd0
,
0x1fd1
,
508
,
/* ῐ-ῑ Ῐ-Ῑ */
0x1fe0
,
0x1fe1
,
508
,
/* ῠ-ῡ Ῠ-Ῡ */
0x2170
,
0x217f
,
484
,
/* ⅰ-ⅿ Ⅰ-Ⅿ */
0x24d0
,
0x24e9
,
474
,
/* ⓐ-ⓩ Ⓐ-Ⓩ */
0xff41
,
0xff5a
,
468
,
/* a-z A-Z */
};
/*
* lower case singlets
* 2nd col is conversion excess 500
*/
static
Rune
__toupper1
[]
=
{
0x00ff
,
621
,
/* ÿ Ÿ */
0x0101
,
499
,
/* ā Ā */
0x0103
,
499
,
/* ă Ă */
0x0105
,
499
,
/* ą Ą */
0x0107
,
499
,
/* ć Ć */
0x0109
,
499
,
/* ĉ Ĉ */
0x010b
,
499
,
/* ċ Ċ */
0x010d
,
499
,
/* č Č */
0x010f
,
499
,
/* ď Ď */
0x0111
,
499
,
/* đ Đ */
0x0113
,
499
,
/* ē Ē */
0x0115
,
499
,
/* ĕ Ĕ */
0x0117
,
499
,
/* ė Ė */
0x0119
,
499
,
/* ę Ę */
0x011b
,
499
,
/* ě Ě */
0x011d
,
499
,
/* ĝ Ĝ */
0x011f
,
499
,
/* ğ Ğ */
0x0121
,
499
,
/* ġ Ġ */
0x0123
,
499
,
/* ģ Ģ */
0x0125
,
499
,
/* ĥ Ĥ */
0x0127
,
499
,
/* ħ Ħ */
0x0129
,
499
,
/* ĩ Ĩ */
0x012b
,
499
,
/* ī Ī */
0x012d
,
499
,
/* ĭ Ĭ */
0x012f
,
499
,
/* į Į */
0x0131
,
268
,
/* ı I */
0x0133
,
499
,
/* ij IJ */
0x0135
,
499
,
/* ĵ Ĵ */
0x0137
,
499
,
/* ķ Ķ */
0x013a
,
499
,
/* ĺ Ĺ */
0x013c
,
499
,
/* ļ Ļ */
0x013e
,
499
,
/* ľ Ľ */
0x0140
,
499
,
/* ŀ Ŀ */
0x0142
,
499
,
/* ł Ł */
0x0144
,
499
,
/* ń Ń */
0x0146
,
499
,
/* ņ Ņ */
0x0148
,
499
,
/* ň Ň */
0x014b
,
499
,
/* ŋ Ŋ */
0x014d
,
499
,
/* ō Ō */
0x014f
,
499
,
/* ŏ Ŏ */
0x0151
,
499
,
/* ő Ő */
0x0153
,
499
,
/* œ Œ */
0x0155
,
499
,
/* ŕ Ŕ */
0x0157
,
499
,
/* ŗ Ŗ */
0x0159
,
499
,
/* ř Ř */
0x015b
,
499
,
/* ś Ś */
0x015d
,
499
,
/* ŝ Ŝ */
0x015f
,
499
,
/* ş Ş */
0x0161
,
499
,
/* š Š */
0x0163
,
499
,
/* ţ Ţ */
0x0165
,
499
,
/* ť Ť */
0x0167
,
499
,
/* ŧ Ŧ */
0x0169
,
499
,
/* ũ Ũ */
0x016b
,
499
,
/* ū Ū */
0x016d
,
499
,
/* ŭ Ŭ */
0x016f
,
499
,
/* ů Ů */
0x0171
,
499
,
/* ű Ű */
0x0173
,
499
,
/* ų Ų */
0x0175
,
499
,
/* ŵ Ŵ */
0x0177
,
499
,
/* ŷ Ŷ */
0x017a
,
499
,
/* ź Ź */
0x017c
,
499
,
/* ż Ż */
0x017e
,
499
,
/* ž Ž */
0x017f
,
200
,
/* ſ S */
0x0183
,
499
,
/* ƃ Ƃ */
0x0185
,
499
,
/* ƅ Ƅ */
0x0188
,
499
,
/* ƈ Ƈ */
0x018c
,
499
,
/* ƌ Ƌ */
0x0192
,
499
,
/* ƒ Ƒ */
0x0199
,
499
,
/* ƙ Ƙ */
0x01a1
,
499
,
/* ơ Ơ */
0x01a3
,
499
,
/* ƣ Ƣ */
0x01a5
,
499
,
/* ƥ Ƥ */
0x01a8
,
499
,
/* ƨ Ƨ */
0x01ad
,
499
,
/* ƭ Ƭ */
0x01b0
,
499
,
/* ư Ư */
0x01b4
,
499
,
/* ƴ Ƴ */
0x01b6
,
499
,
/* ƶ Ƶ */
0x01b9
,
499
,
/* ƹ Ƹ */
0x01bd
,
499
,
/* ƽ Ƽ */
0x01c5
,
499
,
/* Dž DŽ */
0x01c6
,
498
,
/* dž DŽ */
0x01c8
,
499
,
/* Lj LJ */
0x01c9
,
498
,
/* lj LJ */
0x01cb
,
499
,
/* Nj NJ */
0x01cc
,
498
,
/* nj NJ */
0x01ce
,
499
,
/* ǎ Ǎ */
0x01d0
,
499
,
/* ǐ Ǐ */
0x01d2
,
499
,
/* ǒ Ǒ */
0x01d4
,
499
,
/* ǔ Ǔ */
0x01d6
,
499
,
/* ǖ Ǖ */
0x01d8
,
499
,
/* ǘ Ǘ */
0x01da
,
499
,
/* ǚ Ǚ */
0x01dc
,
499
,
/* ǜ Ǜ */
0x01df
,
499
,
/* ǟ Ǟ */
0x01e1
,
499
,
/* ǡ Ǡ */
0x01e3
,
499
,
/* ǣ Ǣ */
0x01e5
,
499
,
/* ǥ Ǥ */
0x01e7
,
499
,
/* ǧ Ǧ */
0x01e9
,
499
,
/* ǩ Ǩ */
0x01eb
,
499
,
/* ǫ Ǫ */
0x01ed
,
499
,
/* ǭ Ǭ */
0x01ef
,
499
,
/* ǯ Ǯ */
0x01f2
,
499
,
/* Dz DZ */
0x01f3
,
498
,
/* dz DZ */
0x01f5
,
499
,
/* ǵ Ǵ */
0x01fb
,
499
,
/* ǻ Ǻ */
0x01fd
,
499
,
/* ǽ Ǽ */
0x01ff
,
499
,
/* ǿ Ǿ */
0x0201
,
499
,
/* ȁ Ȁ */
0x0203
,
499
,
/* ȃ Ȃ */
0x0205
,
499
,
/* ȅ Ȅ */
0x0207
,
499
,
/* ȇ Ȇ */
0x0209
,
499
,
/* ȉ Ȉ */
0x020b
,
499
,
/* ȋ Ȋ */
0x020d
,
499
,
/* ȍ Ȍ */
0x020f
,
499
,
/* ȏ Ȏ */
0x0211
,
499
,
/* ȑ Ȑ */
0x0213
,
499
,
/* ȓ Ȓ */
0x0215
,
499
,
/* ȕ Ȕ */
0x0217
,
499
,
/* ȗ Ȗ */
0x0253
,
290
,
/* ɓ Ɓ */
0x0254
,
294
,
/* ɔ Ɔ */
0x025b
,
297
,
/* ɛ Ɛ */
0x0260
,
295
,
/* ɠ Ɠ */
0x0263
,
293
,
/* ɣ Ɣ */
0x0268
,
291
,
/* ɨ Ɨ */
0x0269
,
289
,
/* ɩ Ɩ */
0x026f
,
289
,
/* ɯ Ɯ */
0x0272
,
287
,
/* ɲ Ɲ */
0x0283
,
282
,
/* ʃ Ʃ */
0x0288
,
282
,
/* ʈ Ʈ */
0x0292
,
281
,
/* ʒ Ʒ */
0x03ac
,
462
,
/* ά Ά */
0x03cc
,
436
,
/* ό Ό */
0x03d0
,
438
,
/* ϐ Β */
0x03d1
,
443
,
/* ϑ Θ */
0x03d5
,
453
,
/* ϕ Φ */
0x03d6
,
446
,
/* ϖ Π */
0x03e3
,
499
,
/* ϣ Ϣ */
0x03e5
,
499
,
/* ϥ Ϥ */
0x03e7
,
499
,
/* ϧ Ϧ */
0x03e9
,
499
,
/* ϩ Ϩ */
0x03eb
,
499
,
/* ϫ Ϫ */
0x03ed
,
499
,
/* ϭ Ϭ */
0x03ef
,
499
,
/* ϯ Ϯ */
0x03f0
,
414
,
/* ϰ Κ */
0x03f1
,
420
,
/* ϱ Ρ */
0x0461
,
499
,
/* ѡ Ѡ */
0x0463
,
499
,
/* ѣ Ѣ */
0x0465
,
499
,
/* ѥ Ѥ */
0x0467
,
499
,
/* ѧ Ѧ */
0x0469
,
499
,
/* ѩ Ѩ */
0x046b
,
499
,
/* ѫ Ѫ */
0x046d
,
499
,
/* ѭ Ѭ */
0x046f
,
499
,
/* ѯ Ѯ */
0x0471
,
499
,
/* ѱ Ѱ */
0x0473
,
499
,
/* ѳ Ѳ */
0x0475
,
499
,
/* ѵ Ѵ */
0x0477
,
499
,
/* ѷ Ѷ */
0x0479
,
499
,
/* ѹ Ѹ */
0x047b
,
499
,
/* ѻ Ѻ */
0x047d
,
499
,
/* ѽ Ѽ */
0x047f
,
499
,
/* ѿ Ѿ */
0x0481
,
499
,
/* ҁ Ҁ */
0x0491
,
499
,
/* ґ Ґ */
0x0493
,
499
,
/* ғ Ғ */
0x0495
,
499
,
/* ҕ Ҕ */
0x0497
,
499
,
/* җ Җ */
0x0499
,
499
,
/* ҙ Ҙ */
0x049b
,
499
,
/* қ Қ */
0x049d
,
499
,
/* ҝ Ҝ */
0x049f
,
499
,
/* ҟ Ҟ */
0x04a1
,
499
,
/* ҡ Ҡ */
0x04a3
,
499
,
/* ң Ң */
0x04a5
,
499
,
/* ҥ Ҥ */
0x04a7
,
499
,
/* ҧ Ҧ */
0x04a9
,
499
,
/* ҩ Ҩ */
0x04ab
,
499
,
/* ҫ Ҫ */
0x04ad
,
499
,
/* ҭ Ҭ */
0x04af
,
499
,
/* ү Ү */
0x04b1
,
499
,
/* ұ Ұ */
0x04b3
,
499
,
/* ҳ Ҳ */
0x04b5
,
499
,
/* ҵ Ҵ */
0x04b7
,
499
,
/* ҷ Ҷ */
0x04b9
,
499
,
/* ҹ Ҹ */
0x04bb
,
499
,
/* һ Һ */
0x04bd
,
499
,
/* ҽ Ҽ */
0x04bf
,
499
,
/* ҿ Ҿ */
0x04c2
,
499
,
/* ӂ Ӂ */
0x04c4
,
499
,
/* ӄ Ӄ */
0x04c8
,
499
,
/* ӈ Ӈ */
0x04cc
,
499
,
/* ӌ Ӌ */
0x04d1
,
499
,
/* ӑ Ӑ */
0x04d3
,
499
,
/* ӓ Ӓ */
0x04d5
,
499
,
/* ӕ Ӕ */
0x04d7
,
499
,
/* ӗ Ӗ */
0x04d9
,
499
,
/* ә Ә */
0x04db
,
499
,
/* ӛ Ӛ */
0x04dd
,
499
,
/* ӝ Ӝ */
0x04df
,
499
,
/* ӟ Ӟ */
0x04e1
,
499
,
/* ӡ Ӡ */
0x04e3
,
499
,
/* ӣ Ӣ */
0x04e5
,
499
,
/* ӥ Ӥ */
0x04e7
,
499
,
/* ӧ Ӧ */
0x04e9
,
499
,
/* ө Ө */
0x04eb
,
499
,
/* ӫ Ӫ */
0x04ef
,
499
,
/* ӯ Ӯ */
0x04f1
,
499
,
/* ӱ Ӱ */
0x04f3
,
499
,
/* ӳ Ӳ */
0x04f5
,
499
,
/* ӵ Ӵ */
0x04f9
,
499
,
/* ӹ Ӹ */
0x1e01
,
499
,
/* ḁ Ḁ */
0x1e03
,
499
,
/* ḃ Ḃ */
0x1e05
,
499
,
/* ḅ Ḅ */
0x1e07
,
499
,
/* ḇ Ḇ */
0x1e09
,
499
,
/* ḉ Ḉ */
0x1e0b
,
499
,
/* ḋ Ḋ */
0x1e0d
,
499
,
/* ḍ Ḍ */
0x1e0f
,
499
,
/* ḏ Ḏ */
0x1e11
,
499
,
/* ḑ Ḑ */
0x1e13
,
499
,
/* ḓ Ḓ */
0x1e15
,
499
,
/* ḕ Ḕ */
0x1e17
,
499
,
/* ḗ Ḗ */
0x1e19
,
499
,
/* ḙ Ḙ */
0x1e1b
,
499
,
/* ḛ Ḛ */
0x1e1d
,
499
,
/* ḝ Ḝ */
0x1e1f
,
499
,
/* ḟ Ḟ */
0x1e21
,
499
,
/* ḡ Ḡ */
0x1e23
,
499
,
/* ḣ Ḣ */
0x1e25
,
499
,
/* ḥ Ḥ */
0x1e27
,
499
,
/* ḧ Ḧ */
0x1e29
,
499
,
/* ḩ Ḩ */
0x1e2b
,
499
,
/* ḫ Ḫ */
0x1e2d
,
499
,
/* ḭ Ḭ */
0x1e2f
,
499
,
/* ḯ Ḯ */
0x1e31
,
499
,
/* ḱ Ḱ */
0x1e33
,
499
,
/* ḳ Ḳ */
0x1e35
,
499
,
/* ḵ Ḵ */
0x1e37
,
499
,
/* ḷ Ḷ */
0x1e39
,
499
,
/* ḹ Ḹ */
0x1e3b
,
499
,
/* ḻ Ḻ */
0x1e3d
,
499
,
/* ḽ Ḽ */
0x1e3f
,
499
,
/* ḿ Ḿ */
0x1e41
,
499
,
/* ṁ Ṁ */
0x1e43
,
499
,
/* ṃ Ṃ */
0x1e45
,
499
,
/* ṅ Ṅ */
0x1e47
,
499
,
/* ṇ Ṇ */
0x1e49
,
499
,
/* ṉ Ṉ */
0x1e4b
,
499
,
/* ṋ Ṋ */
0x1e4d
,
499
,
/* ṍ Ṍ */
0x1e4f
,
499
,
/* ṏ Ṏ */
0x1e51
,
499
,
/* ṑ Ṑ */
0x1e53
,
499
,
/* ṓ Ṓ */
0x1e55
,
499
,
/* ṕ Ṕ */
0x1e57
,
499
,
/* ṗ Ṗ */
0x1e59
,
499
,
/* ṙ Ṙ */
0x1e5b
,
499
,
/* ṛ Ṛ */
0x1e5d
,
499
,
/* ṝ Ṝ */
0x1e5f
,
499
,
/* ṟ Ṟ */
0x1e61
,
499
,
/* ṡ Ṡ */
0x1e63
,
499
,
/* ṣ Ṣ */
0x1e65
,
499
,
/* ṥ Ṥ */
0x1e67
,
499
,
/* ṧ Ṧ */
0x1e69
,
499
,
/* ṩ Ṩ */
0x1e6b
,
499
,
/* ṫ Ṫ */
0x1e6d
,
499
,
/* ṭ Ṭ */
0x1e6f
,
499
,
/* ṯ Ṯ */
0x1e71
,
499
,
/* ṱ Ṱ */
0x1e73
,
499
,
/* ṳ Ṳ */
0x1e75
,
499
,
/* ṵ Ṵ */
0x1e77
,
499
,
/* ṷ Ṷ */
0x1e79
,
499
,
/* ṹ Ṹ */
0x1e7b
,
499
,
/* ṻ Ṻ */
0x1e7d
,
499
,
/* ṽ Ṽ */
0x1e7f
,
499
,
/* ṿ Ṿ */
0x1e81
,
499
,
/* ẁ Ẁ */
0x1e83
,
499
,
/* ẃ Ẃ */
0x1e85
,
499
,
/* ẅ Ẅ */
0x1e87
,
499
,
/* ẇ Ẇ */
0x1e89
,
499
,
/* ẉ Ẉ */
0x1e8b
,
499
,
/* ẋ Ẋ */
0x1e8d
,
499
,
/* ẍ Ẍ */
0x1e8f
,
499
,
/* ẏ Ẏ */
0x1e91
,
499
,
/* ẑ Ẑ */
0x1e93
,
499
,
/* ẓ Ẓ */
0x1e95
,
499
,
/* ẕ Ẕ */
0x1ea1
,
499
,
/* ạ Ạ */
0x1ea3
,
499
,
/* ả Ả */
0x1ea5
,
499
,
/* ấ Ấ */
0x1ea7
,
499
,
/* ầ Ầ */
0x1ea9
,
499
,
/* ẩ Ẩ */
0x1eab
,
499
,
/* ẫ Ẫ */
0x1ead
,
499
,
/* ậ Ậ */
0x1eaf
,
499
,
/* ắ Ắ */
0x1eb1
,
499
,
/* ằ Ằ */
0x1eb3
,
499
,
/* ẳ Ẳ */
0x1eb5
,
499
,
/* ẵ Ẵ */
0x1eb7
,
499
,
/* ặ Ặ */
0x1eb9
,
499
,
/* ẹ Ẹ */
0x1ebb
,
499
,
/* ẻ Ẻ */
0x1ebd
,
499
,
/* ẽ Ẽ */
0x1ebf
,
499
,
/* ế Ế */
0x1ec1
,
499
,
/* ề Ề */
0x1ec3
,
499
,
/* ể Ể */
0x1ec5
,
499
,
/* ễ Ễ */
0x1ec7
,
499
,
/* ệ Ệ */
0x1ec9
,
499
,
/* ỉ Ỉ */
0x1ecb
,
499
,
/* ị Ị */
0x1ecd
,
499
,
/* ọ Ọ */
0x1ecf
,
499
,
/* ỏ Ỏ */
0x1ed1
,
499
,
/* ố Ố */
0x1ed3
,
499
,
/* ồ Ồ */
0x1ed5
,
499
,
/* ổ Ổ */
0x1ed7
,
499
,
/* ỗ Ỗ */
0x1ed9
,
499
,
/* ộ Ộ */
0x1edb
,
499
,
/* ớ Ớ */
0x1edd
,
499
,
/* ờ Ờ */
0x1edf
,
499
,
/* ở Ở */
0x1ee1
,
499
,
/* ỡ Ỡ */
0x1ee3
,
499
,
/* ợ Ợ */
0x1ee5
,
499
,
/* ụ Ụ */
0x1ee7
,
499
,
/* ủ Ủ */
0x1ee9
,
499
,
/* ứ Ứ */
0x1eeb
,
499
,
/* ừ Ừ */
0x1eed
,
499
,
/* ử Ử */
0x1eef
,
499
,
/* ữ Ữ */
0x1ef1
,
499
,
/* ự Ự */
0x1ef3
,
499
,
/* ỳ Ỳ */
0x1ef5
,
499
,
/* ỵ Ỵ */
0x1ef7
,
499
,
/* ỷ Ỷ */
0x1ef9
,
499
,
/* ỹ Ỹ */
0x1f51
,
508
,
/* ὑ Ὑ */
0x1f53
,
508
,
/* ὓ Ὓ */
0x1f55
,
508
,
/* ὕ Ὕ */
0x1f57
,
508
,
/* ὗ Ὗ */
0x1fb3
,
509
,
/* ᾳ ᾼ */
0x1fc3
,
509
,
/* ῃ ῌ */
0x1fe5
,
507
,
/* ῥ Ῥ */
0x1ff3
,
509
,
/* ῳ ῼ */
};
/*
* upper case ranges
* 3rd col is conversion excess 500
*/
static
Rune
__tolower2
[]
=
{
0x0041
,
0x005a
,
532
,
/* A-Z a-z */
0x00c0
,
0x00d6
,
532
,
/* À-Ö à-ö */
0x00d8
,
0x00de
,
532
,
/* Ø-Þ ø-þ */
0x0189
,
0x018a
,
705
,
/* Ɖ-Ɗ ɖ-ɗ */
0x018e
,
0x018f
,
702
,
/* Ǝ-Ə ɘ-ə */
0x01b1
,
0x01b2
,
717
,
/* Ʊ-Ʋ ʊ-ʋ */
0x0388
,
0x038a
,
537
,
/* Έ-Ί έ-ί */
0x038e
,
0x038f
,
563
,
/* Ύ-Ώ ύ-ώ */
0x0391
,
0x03a1
,
532
,
/* Α-Ρ α-ρ */
0x03a3
,
0x03ab
,
532
,
/* Σ-Ϋ σ-ϋ */
0x0401
,
0x040c
,
580
,
/* Ё-Ќ ё-ќ */
0x040e
,
0x040f
,
580
,
/* Ў-Џ ў-џ */
0x0410
,
0x042f
,
532
,
/* А-Я а-я */
0x0531
,
0x0556
,
548
,
/* Ա-Ֆ ա-ֆ */
0x10a0
,
0x10c5
,
548
,
/* Ⴀ-Ⴥ ა-ჵ */
0x1f08
,
0x1f0f
,
492
,
/* Ἀ-Ἇ ἀ-ἇ */
0x1f18
,
0x1f1d
,
492
,
/* Ἐ-Ἕ ἐ-ἕ */
0x1f28
,
0x1f2f
,
492
,
/* Ἠ-Ἧ ἠ-ἧ */
0x1f38
,
0x1f3f
,
492
,
/* Ἰ-Ἷ ἰ-ἷ */
0x1f48
,
0x1f4d
,
492
,
/* Ὀ-Ὅ ὀ-ὅ */
0x1f68
,
0x1f6f
,
492
,
/* Ὠ-Ὧ ὠ-ὧ */
0x1f88
,
0x1f8f
,
492
,
/* ᾈ-ᾏ ᾀ-ᾇ */
0x1f98
,
0x1f9f
,
492
,
/* ᾘ-ᾟ ᾐ-ᾗ */
0x1fa8
,
0x1faf
,
492
,
/* ᾨ-ᾯ ᾠ-ᾧ */
0x1fb8
,
0x1fb9
,
492
,
/* Ᾰ-Ᾱ ᾰ-ᾱ */
0x1fba
,
0x1fbb
,
426
,
/* Ὰ-Ά ὰ-ά */
0x1fc8
,
0x1fcb
,
414
,
/* Ὲ-Ή ὲ-ή */
0x1fd8
,
0x1fd9
,
492
,
/* Ῐ-Ῑ ῐ-ῑ */
0x1fda
,
0x1fdb
,
400
,
/* Ὶ-Ί ὶ-ί */
0x1fe8
,
0x1fe9
,
492
,
/* Ῠ-Ῡ ῠ-ῡ */
0x1fea
,
0x1feb
,
388
,
/* Ὺ-Ύ ὺ-ύ */
0x1ff8
,
0x1ff9
,
372
,
/* Ὸ-Ό ὸ-ό */
0x1ffa
,
0x1ffb
,
374
,
/* Ὼ-Ώ ὼ-ώ */
0x2160
,
0x216f
,
516
,
/* Ⅰ-Ⅿ ⅰ-ⅿ */
0x24b6
,
0x24cf
,
526
,
/* Ⓐ-Ⓩ ⓐ-ⓩ */
0xff21
,
0xff3a
,
532
,
/* A-Z a-z */
};
/*
* upper case singlets
* 2nd col is conversion excess 500
*/
static
Rune
__tolower1
[]
=
{
0x0100
,
501
,
/* Ā ā */
0x0102
,
501
,
/* Ă ă */
0x0104
,
501
,
/* Ą ą */
0x0106
,
501
,
/* Ć ć */
0x0108
,
501
,
/* Ĉ ĉ */
0x010a
,
501
,
/* Ċ ċ */
0x010c
,
501
,
/* Č č */
0x010e
,
501
,
/* Ď ď */
0x0110
,
501
,
/* Đ đ */
0x0112
,
501
,
/* Ē ē */
0x0114
,
501
,
/* Ĕ ĕ */
0x0116
,
501
,
/* Ė ė */
0x0118
,
501
,
/* Ę ę */
0x011a
,
501
,
/* Ě ě */
0x011c
,
501
,
/* Ĝ ĝ */
0x011e
,
501
,
/* Ğ ğ */
0x0120
,
501
,
/* Ġ ġ */
0x0122
,
501
,
/* Ģ ģ */
0x0124
,
501
,
/* Ĥ ĥ */
0x0126
,
501
,
/* Ħ ħ */
0x0128
,
501
,
/* Ĩ ĩ */
0x012a
,
501
,
/* Ī ī */
0x012c
,
501
,
/* Ĭ ĭ */
0x012e
,
501
,
/* Į į */
0x0130
,
301
,
/* İ i */
0x0132
,
501
,
/* IJ ij */
0x0134
,
501
,
/* Ĵ ĵ */
0x0136
,
501
,
/* Ķ ķ */
0x0139
,
501
,
/* Ĺ ĺ */
0x013b
,
501
,
/* Ļ ļ */
0x013d
,
501
,
/* Ľ ľ */
0x013f
,
501
,
/* Ŀ ŀ */
0x0141
,
501
,
/* Ł ł */
0x0143
,
501
,
/* Ń ń */
0x0145
,
501
,
/* Ņ ņ */
0x0147
,
501
,
/* Ň ň */
0x014a
,
501
,
/* Ŋ ŋ */
0x014c
,
501
,
/* Ō ō */
0x014e
,
501
,
/* Ŏ ŏ */
0x0150
,
501
,
/* Ő ő */
0x0152
,
501
,
/* Œ œ */
0x0154
,
501
,
/* Ŕ ŕ */
0x0156
,
501
,
/* Ŗ ŗ */
0x0158
,
501
,
/* Ř ř */
0x015a
,
501
,
/* Ś ś */
0x015c
,
501
,
/* Ŝ ŝ */
0x015e
,
501
,
/* Ş ş */
0x0160
,
501
,
/* Š š */
0x0162
,
501
,
/* Ţ ţ */
0x0164
,
501
,
/* Ť ť */
0x0166
,
501
,
/* Ŧ ŧ */
0x0168
,
501
,
/* Ũ ũ */
0x016a
,
501
,
/* Ū ū */
0x016c
,
501
,
/* Ŭ ŭ */
0x016e
,
501
,
/* Ů ů */
0x0170
,
501
,
/* Ű ű */
0x0172
,
501
,
/* Ų ų */
0x0174
,
501
,
/* Ŵ ŵ */
0x0176
,
501
,
/* Ŷ ŷ */
0x0178
,
379
,
/* Ÿ ÿ */
0x0179
,
501
,
/* Ź ź */
0x017b
,
501
,
/* Ż ż */
0x017d
,
501
,
/* Ž ž */
0x0181
,
710
,
/* Ɓ ɓ */
0x0182
,
501
,
/* Ƃ ƃ */
0x0184
,
501
,
/* Ƅ ƅ */
0x0186
,
706
,
/* Ɔ ɔ */
0x0187
,
501
,
/* Ƈ ƈ */
0x018b
,
501
,
/* Ƌ ƌ */
0x0190
,
703
,
/* Ɛ ɛ */
0x0191
,
501
,
/* Ƒ ƒ */
0x0193
,
705
,
/* Ɠ ɠ */
0x0194
,
707
,
/* Ɣ ɣ */
0x0196
,
711
,
/* Ɩ ɩ */
0x0197
,
709
,
/* Ɨ ɨ */
0x0198
,
501
,
/* Ƙ ƙ */
0x019c
,
711
,
/* Ɯ ɯ */
0x019d
,
713
,
/* Ɲ ɲ */
0x01a0
,
501
,
/* Ơ ơ */
0x01a2
,
501
,
/* Ƣ ƣ */
0x01a4
,
501
,
/* Ƥ ƥ */
0x01a7
,
501
,
/* Ƨ ƨ */
0x01a9
,
718
,
/* Ʃ ʃ */
0x01ac
,
501
,
/* Ƭ ƭ */
0x01ae
,
718
,
/* Ʈ ʈ */
0x01af
,
501
,
/* Ư ư */
0x01b3
,
501
,
/* Ƴ ƴ */
0x01b5
,
501
,
/* Ƶ ƶ */
0x01b7
,
719
,
/* Ʒ ʒ */
0x01b8
,
501
,
/* Ƹ ƹ */
0x01bc
,
501
,
/* Ƽ ƽ */
0x01c4
,
502
,
/* DŽ dž */
0x01c5
,
501
,
/* Dž dž */
0x01c7
,
502
,
/* LJ lj */
0x01c8
,
501
,
/* Lj lj */
0x01ca
,
502
,
/* NJ nj */
0x01cb
,
501
,
/* Nj nj */
0x01cd
,
501
,
/* Ǎ ǎ */
0x01cf
,
501
,
/* Ǐ ǐ */
0x01d1
,
501
,
/* Ǒ ǒ */
0x01d3
,
501
,
/* Ǔ ǔ */
0x01d5
,
501
,
/* Ǖ ǖ */
0x01d7
,
501
,
/* Ǘ ǘ */
0x01d9
,
501
,
/* Ǚ ǚ */
0x01db
,
501
,
/* Ǜ ǜ */
0x01de
,
501
,
/* Ǟ ǟ */
0x01e0
,
501
,
/* Ǡ ǡ */
0x01e2
,
501
,
/* Ǣ ǣ */
0x01e4
,
501
,
/* Ǥ ǥ */
0x01e6
,
501
,
/* Ǧ ǧ */
0x01e8
,
501
,
/* Ǩ ǩ */
0x01ea
,
501
,
/* Ǫ ǫ */
0x01ec
,
501
,
/* Ǭ ǭ */
0x01ee
,
501
,
/* Ǯ ǯ */
0x01f1
,
502
,
/* DZ dz */
0x01f2
,
501
,
/* Dz dz */
0x01f4
,
501
,
/* Ǵ ǵ */
0x01fa
,
501
,
/* Ǻ ǻ */
0x01fc
,
501
,
/* Ǽ ǽ */
0x01fe
,
501
,
/* Ǿ ǿ */
0x0200
,
501
,
/* Ȁ ȁ */
0x0202
,
501
,
/* Ȃ ȃ */
0x0204
,
501
,
/* Ȅ ȅ */
0x0206
,
501
,
/* Ȇ ȇ */
0x0208
,
501
,
/* Ȉ ȉ */
0x020a
,
501
,
/* Ȋ ȋ */
0x020c
,
501
,
/* Ȍ ȍ */
0x020e
,
501
,
/* Ȏ ȏ */
0x0210
,
501
,
/* Ȑ ȑ */
0x0212
,
501
,
/* Ȓ ȓ */
0x0214
,
501
,
/* Ȕ ȕ */
0x0216
,
501
,
/* Ȗ ȗ */
0x0386
,
538
,
/* Ά ά */
0x038c
,
564
,
/* Ό ό */
0x03e2
,
501
,
/* Ϣ ϣ */
0x03e4
,
501
,
/* Ϥ ϥ */
0x03e6
,
501
,
/* Ϧ ϧ */
0x03e8
,
501
,
/* Ϩ ϩ */
0x03ea
,
501
,
/* Ϫ ϫ */
0x03ec
,
501
,
/* Ϭ ϭ */
0x03ee
,
501
,
/* Ϯ ϯ */
0x0460
,
501
,
/* Ѡ ѡ */
0x0462
,
501
,
/* Ѣ ѣ */
0x0464
,
501
,
/* Ѥ ѥ */
0x0466
,
501
,
/* Ѧ ѧ */
0x0468
,
501
,
/* Ѩ ѩ */
0x046a
,
501
,
/* Ѫ ѫ */
0x046c
,
501
,
/* Ѭ ѭ */
0x046e
,
501
,
/* Ѯ ѯ */
0x0470
,
501
,
/* Ѱ ѱ */
0x0472
,
501
,
/* Ѳ ѳ */
0x0474
,
501
,
/* Ѵ ѵ */
0x0476
,
501
,
/* Ѷ ѷ */
0x0478
,
501
,
/* Ѹ ѹ */
0x047a
,
501
,
/* Ѻ ѻ */
0x047c
,
501
,
/* Ѽ ѽ */
0x047e
,
501
,
/* Ѿ ѿ */
0x0480
,
501
,
/* Ҁ ҁ */
0x0490
,
501
,
/* Ґ ґ */
0x0492
,
501
,
/* Ғ ғ */
0x0494
,
501
,
/* Ҕ ҕ */
0x0496
,
501
,
/* Җ җ */
0x0498
,
501
,
/* Ҙ ҙ */
0x049a
,
501
,
/* Қ қ */
0x049c
,
501
,
/* Ҝ ҝ */
0x049e
,
501
,
/* Ҟ ҟ */
0x04a0
,
501
,
/* Ҡ ҡ */
0x04a2
,
501
,
/* Ң ң */
0x04a4
,
501
,
/* Ҥ ҥ */
0x04a6
,
501
,
/* Ҧ ҧ */
0x04a8
,
501
,
/* Ҩ ҩ */
0x04aa
,
501
,
/* Ҫ ҫ */
0x04ac
,
501
,
/* Ҭ ҭ */
0x04ae
,
501
,
/* Ү ү */
0x04b0
,
501
,
/* Ұ ұ */
0x04b2
,
501
,
/* Ҳ ҳ */
0x04b4
,
501
,
/* Ҵ ҵ */
0x04b6
,
501
,
/* Ҷ ҷ */
0x04b8
,
501
,
/* Ҹ ҹ */
0x04ba
,
501
,
/* Һ һ */
0x04bc
,
501
,
/* Ҽ ҽ */
0x04be
,
501
,
/* Ҿ ҿ */
0x04c1
,
501
,
/* Ӂ ӂ */
0x04c3
,
501
,
/* Ӄ ӄ */
0x04c7
,
501
,
/* Ӈ ӈ */
0x04cb
,
501
,
/* Ӌ ӌ */
0x04d0
,
501
,
/* Ӑ ӑ */
0x04d2
,
501
,
/* Ӓ ӓ */
0x04d4
,
501
,
/* Ӕ ӕ */
0x04d6
,
501
,
/* Ӗ ӗ */
0x04d8
,
501
,
/* Ә ә */
0x04da
,
501
,
/* Ӛ ӛ */
0x04dc
,
501
,
/* Ӝ ӝ */
0x04de
,
501
,
/* Ӟ ӟ */
0x04e0
,
501
,
/* Ӡ ӡ */
0x04e2
,
501
,
/* Ӣ ӣ */
0x04e4
,
501
,
/* Ӥ ӥ */
0x04e6
,
501
,
/* Ӧ ӧ */
0x04e8
,
501
,
/* Ө ө */
0x04ea
,
501
,
/* Ӫ ӫ */
0x04ee
,
501
,
/* Ӯ ӯ */
0x04f0
,
501
,
/* Ӱ ӱ */
0x04f2
,
501
,
/* Ӳ ӳ */
0x04f4
,
501
,
/* Ӵ ӵ */
0x04f8
,
501
,
/* Ӹ ӹ */
0x1e00
,
501
,
/* Ḁ ḁ */
0x1e02
,
501
,
/* Ḃ ḃ */
0x1e04
,
501
,
/* Ḅ ḅ */
0x1e06
,
501
,
/* Ḇ ḇ */
0x1e08
,
501
,
/* Ḉ ḉ */
0x1e0a
,
501
,
/* Ḋ ḋ */
0x1e0c
,
501
,
/* Ḍ ḍ */
0x1e0e
,
501
,
/* Ḏ ḏ */
0x1e10
,
501
,
/* Ḑ ḑ */
0x1e12
,
501
,
/* Ḓ ḓ */
0x1e14
,
501
,
/* Ḕ ḕ */
0x1e16
,
501
,
/* Ḗ ḗ */
0x1e18
,
501
,
/* Ḙ ḙ */
0x1e1a
,
501
,
/* Ḛ ḛ */
0x1e1c
,
501
,
/* Ḝ ḝ */
0x1e1e
,
501
,
/* Ḟ ḟ */
0x1e20
,
501
,
/* Ḡ ḡ */
0x1e22
,
501
,
/* Ḣ ḣ */
0x1e24
,
501
,
/* Ḥ ḥ */
0x1e26
,
501
,
/* Ḧ ḧ */
0x1e28
,
501
,
/* Ḩ ḩ */
0x1e2a
,
501
,
/* Ḫ ḫ */
0x1e2c
,
501
,
/* Ḭ ḭ */
0x1e2e
,
501
,
/* Ḯ ḯ */
0x1e30
,
501
,
/* Ḱ ḱ */
0x1e32
,
501
,
/* Ḳ ḳ */
0x1e34
,
501
,
/* Ḵ ḵ */
0x1e36
,
501
,
/* Ḷ ḷ */
0x1e38
,
501
,
/* Ḹ ḹ */
0x1e3a
,
501
,
/* Ḻ ḻ */
0x1e3c
,
501
,
/* Ḽ ḽ */
0x1e3e
,
501
,
/* Ḿ ḿ */
0x1e40
,
501
,
/* Ṁ ṁ */
0x1e42
,
501
,
/* Ṃ ṃ */
0x1e44
,
501
,
/* Ṅ ṅ */
0x1e46
,
501
,
/* Ṇ ṇ */
0x1e48
,
501
,
/* Ṉ ṉ */
0x1e4a
,
501
,
/* Ṋ ṋ */
0x1e4c
,
501
,
/* Ṍ ṍ */
0x1e4e
,
501
,
/* Ṏ ṏ */
0x1e50
,
501
,
/* Ṑ ṑ */
0x1e52
,
501
,
/* Ṓ ṓ */
0x1e54
,
501
,
/* Ṕ ṕ */
0x1e56
,
501
,
/* Ṗ ṗ */
0x1e58
,
501
,
/* Ṙ ṙ */
0x1e5a
,
501
,
/* Ṛ ṛ */
0x1e5c
,
501
,
/* Ṝ ṝ */
0x1e5e
,
501
,
/* Ṟ ṟ */
0x1e60
,
501
,
/* Ṡ ṡ */
0x1e62
,
501
,
/* Ṣ ṣ */
0x1e64
,
501
,
/* Ṥ ṥ */
0x1e66
,
501
,
/* Ṧ ṧ */
0x1e68
,
501
,
/* Ṩ ṩ */
0x1e6a
,
501
,
/* Ṫ ṫ */
0x1e6c
,
501
,
/* Ṭ ṭ */
0x1e6e
,
501
,
/* Ṯ ṯ */
0x1e70
,
501
,
/* Ṱ ṱ */
0x1e72
,
501
,
/* Ṳ ṳ */
0x1e74
,
501
,
/* Ṵ ṵ */
0x1e76
,
501
,
/* Ṷ ṷ */
0x1e78
,
501
,
/* Ṹ ṹ */
0x1e7a
,
501
,
/* Ṻ ṻ */
0x1e7c
,
501
,
/* Ṽ ṽ */
0x1e7e
,
501
,
/* Ṿ ṿ */
0x1e80
,
501
,
/* Ẁ ẁ */
0x1e82
,
501
,
/* Ẃ ẃ */
0x1e84
,
501
,
/* Ẅ ẅ */
0x1e86
,
501
,
/* Ẇ ẇ */
0x1e88
,
501
,
/* Ẉ ẉ */
0x1e8a
,
501
,
/* Ẋ ẋ */
0x1e8c
,
501
,
/* Ẍ ẍ */
0x1e8e
,
501
,
/* Ẏ ẏ */
0x1e90
,
501
,
/* Ẑ ẑ */
0x1e92
,
501
,
/* Ẓ ẓ */
0x1e94
,
501
,
/* Ẕ ẕ */
0x1ea0
,
501
,
/* Ạ ạ */
0x1ea2
,
501
,
/* Ả ả */
0x1ea4
,
501
,
/* Ấ ấ */
0x1ea6
,
501
,
/* Ầ ầ */
0x1ea8
,
501
,
/* Ẩ ẩ */
0x1eaa
,
501
,
/* Ẫ ẫ */
0x1eac
,
501
,
/* Ậ ậ */
0x1eae
,
501
,
/* Ắ ắ */
0x1eb0
,
501
,
/* Ằ ằ */
0x1eb2
,
501
,
/* Ẳ ẳ */
0x1eb4
,
501
,
/* Ẵ ẵ */
0x1eb6
,
501
,
/* Ặ ặ */
0x1eb8
,
501
,
/* Ẹ ẹ */
0x1eba
,
501
,
/* Ẻ ẻ */
0x1ebc
,
501
,
/* Ẽ ẽ */
0x1ebe
,
501
,
/* Ế ế */
0x1ec0
,
501
,
/* Ề ề */
0x1ec2
,
501
,
/* Ể ể */
0x1ec4
,
501
,
/* Ễ ễ */
0x1ec6
,
501
,
/* Ệ ệ */
0x1ec8
,
501
,
/* Ỉ ỉ */
0x1eca
,
501
,
/* Ị ị */
0x1ecc
,
501
,
/* Ọ ọ */
0x1ece
,
501
,
/* Ỏ ỏ */
0x1ed0
,
501
,
/* Ố ố */
0x1ed2
,
501
,
/* Ồ ồ */
0x1ed4
,
501
,
/* Ổ ổ */
0x1ed6
,
501
,
/* Ỗ ỗ */
0x1ed8
,
501
,
/* Ộ ộ */
0x1eda
,
501
,
/* Ớ ớ */
0x1edc
,
501
,
/* Ờ ờ */
0x1ede
,
501
,
/* Ở ở */
0x1ee0
,
501
,
/* Ỡ ỡ */
0x1ee2
,
501
,
/* Ợ ợ */
0x1ee4
,
501
,
/* Ụ ụ */
0x1ee6
,
501
,
/* Ủ ủ */
0x1ee8
,
501
,
/* Ứ ứ */
0x1eea
,
501
,
/* Ừ ừ */
0x1eec
,
501
,
/* Ử ử */
0x1eee
,
501
,
/* Ữ ữ */
0x1ef0
,
501
,
/* Ự ự */
0x1ef2
,
501
,
/* Ỳ ỳ */
0x1ef4
,
501
,
/* Ỵ ỵ */
0x1ef6
,
501
,
/* Ỷ ỷ */
0x1ef8
,
501
,
/* Ỹ ỹ */
0x1f59
,
492
,
/* Ὑ ὑ */
0x1f5b
,
492
,
/* Ὓ ὓ */
0x1f5d
,
492
,
/* Ὕ ὕ */
0x1f5f
,
492
,
/* Ὗ ὗ */
0x1fbc
,
491
,
/* ᾼ ᾳ */
0x1fcc
,
491
,
/* ῌ ῃ */
0x1fec
,
493
,
/* Ῥ ῥ */
0x1ffc
,
491
,
/* ῼ ῳ */
};
/*
* title characters are those between
* upper and lower case. ie DZ Dz dz
*/
static
Rune
__totitle1
[]
=
{
0x01c4
,
501
,
/* DŽ Dž */
0x01c6
,
499
,
/* dž Dž */
0x01c7
,
501
,
/* LJ Lj */
0x01c9
,
499
,
/* lj Lj */
0x01ca
,
501
,
/* NJ Nj */
0x01cc
,
499
,
/* nj Nj */
0x01f1
,
501
,
/* DZ Dz */
0x01f3
,
499
,
/* dz Dz */
};
static
Rune
*
bsearch
(
Rune
c
,
Rune
*
t
,
int
n
,
int
ne
)
Rune
*
rbsearch
(
Rune
c
,
Rune
*
t
,
int
n
,
int
ne
)
{
Rune
*
p
;
int
m
;
while
(
n
>
1
)
{
m
=
n
/
2
;
m
=
n
>>
1
;
p
=
t
+
m
*
ne
;
if
(
c
>=
p
[
0
])
{
t
=
p
;
...
...
@@ -1050,102 +35,36 @@ bsearch(Rune c, Rune *t, int n, int ne)
return
0
;
}
Rune
tolowerrune
(
Rune
c
)
{
Rune
*
p
;
p
=
bsearch
(
c
,
__tolower2
,
nelem
(
__tolower2
)
/
3
,
3
);
if
(
p
&&
c
>=
p
[
0
]
&&
c
<=
p
[
1
])
return
c
+
p
[
2
]
-
500
;
p
=
bsearch
(
c
,
__tolower1
,
nelem
(
__tolower1
)
/
2
,
2
);
if
(
p
&&
c
==
p
[
0
])
return
c
+
p
[
1
]
-
500
;
return
c
;
}
Rune
toupperrune
(
Rune
c
)
{
Rune
*
p
;
p
=
bsearch
(
c
,
__toupper2
,
nelem
(
__toupper2
)
/
3
,
3
);
if
(
p
&&
c
>=
p
[
0
]
&&
c
<=
p
[
1
])
return
c
+
p
[
2
]
-
500
;
p
=
bsearch
(
c
,
__toupper1
,
nelem
(
__toupper1
)
/
2
,
2
);
if
(
p
&&
c
==
p
[
0
])
return
c
+
p
[
1
]
-
500
;
return
c
;
}
Rune
totitlerune
(
Rune
c
)
{
Rune
*
p
;
p
=
bsearch
(
c
,
__totitle1
,
nelem
(
__totitle1
)
/
2
,
2
);
if
(
p
&&
c
==
p
[
0
])
return
c
+
p
[
1
]
-
500
;
return
c
;
}
int
islowerrune
(
Rune
c
)
{
Rune
*
p
;
p
=
bsearch
(
c
,
__toupper2
,
nelem
(
__toupper2
)
/
3
,
3
);
if
(
p
&&
c
>=
p
[
0
]
&&
c
<=
p
[
1
])
return
1
;
p
=
bsearch
(
c
,
__toupper1
,
nelem
(
__toupper1
)
/
2
,
2
);
if
(
p
&&
c
==
p
[
0
])
return
1
;
return
0
;
}
int
isupperrune
(
Rune
c
)
{
Rune
*
p
;
p
=
bsearch
(
c
,
__tolower2
,
nelem
(
__tolower2
)
/
3
,
3
);
if
(
p
&&
c
>=
p
[
0
]
&&
c
<=
p
[
1
])
return
1
;
p
=
bsearch
(
c
,
__tolower1
,
nelem
(
__tolower1
)
/
2
,
2
);
if
(
p
&&
c
==
p
[
0
])
return
1
;
return
0
;
}
/*
* The "ideographic" property is hard to extract from UnicodeData.txt,
* so it is hard coded here.
*
* It is defined in the Unicode PropList.txt file, for example
* PropList-3.0.0.txt. Unlike the UnicodeData.txt file, the format of
* PropList changes between versions. This property appears relatively static;
* it is the same in version 4.0.1, except that version defines some >16 bit
* chars as ideographic as well: 20000..2a6d6, and 2f800..2Fa1d.
*/
static
Rune
__isideographicr
[]
=
{
0x3006
,
0x3007
,
/* 3006 not in Unicode 2, in 2.1 */
0x3021
,
0x3029
,
0x3038
,
0x303a
,
/* not in Unicode 2 or 2.1 */
0x3400
,
0x4db5
,
/* not in Unicode 2 or 2.1 */
0x4e00
,
0x9fbb
,
/* 0x9FA6..0x9FBB added for 4.1.0? */
0xf900
,
0xfa2d
,
0x20000
,
0x2A6D6
,
0x2F800
,
0x2FA1D
,
};
int
is
alpha
rune
(
Rune
c
)
is
ideographic
rune
(
Rune
c
)
{
Rune
*
p
;
if
(
isupperrune
(
c
)
||
islowerrune
(
c
))
return
1
;
p
=
bsearch
(
c
,
__alpha2
,
nelem
(
__alpha2
)
/
2
,
2
);
p
=
rbsearch
(
c
,
__isideographicr
,
nelem
(
__isideographicr
)
/
2
,
2
);
if
(
p
&&
c
>=
p
[
0
]
&&
c
<=
p
[
1
])
return
1
;
p
=
bsearch
(
c
,
__alpha1
,
nelem
(
__alpha1
),
1
);
if
(
p
&&
c
==
p
[
0
])
return
1
;
return
0
;
}
int
istitlerune
(
Rune
c
)
{
return
isupperrune
(
c
)
&&
islowerrune
(
c
);
}
int
isspacerune
(
Rune
c
)
{
Rune
*
p
;
p
=
bsearch
(
c
,
__space2
,
nelem
(
__space2
)
/
2
,
2
);
if
(
p
&&
c
>=
p
[
0
]
&&
c
<=
p
[
1
])
return
1
;
return
0
;
}
#include "runetypebody-5.0.0.c"
src/lib9/utf/utf.h
0 → 100644
View file @
5b904a3b
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 1998-2002 by Lucent Technologies.
* Portions Copyright (c) 2009 The Go Authors. All rights reserved.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#ifndef _UTFH_
#define _UTFH_ 1
#include <stdint.h>
typedef
signed
int
Rune
;
/* Code-point values in Unicode 4.0 are 21 bits wide.*/
enum
{
UTFmax
=
4
,
/* maximum bytes per rune */
Runesync
=
0x80
,
/* cannot represent part of a UTF sequence (<) */
Runeself
=
0x80
,
/* rune and UTF sequences are the same (<) */
Runeerror
=
0xFFFD
,
/* decoding error in UTF */
Runemax
=
0x10FFFF
,
/* maximum rune value */
};
#ifdef __cplusplus
extern
"C"
{
#endif
/*
* rune routines
*/
/*
* These routines were written by Rob Pike and Ken Thompson
* and first appeared in Plan 9.
* SEE ALSO
* utf (7)
* tcs (1)
*/
// runetochar copies (encodes) one rune, pointed to by r, to at most
// UTFmax bytes starting at s and returns the number of bytes generated.
int
runetochar
(
char
*
s
,
const
Rune
*
r
);
// chartorune copies (decodes) at most UTFmax bytes starting at s to
// one rune, pointed to by r, and returns the number of bytes consumed.
// If the input is not exactly in UTF format, chartorune will set *r
// to Runeerror and return 1.
//
// Note: There is no special case for a "null-terminated" string. A
// string whose first byte has the value 0 is the UTF8 encoding of the
// Unicode value 0 (i.e., ASCII NULL). A byte value of 0 is illegal
// anywhere else in a UTF sequence.
int
chartorune
(
Rune
*
r
,
const
char
*
s
);
// charntorune is like chartorune, except that it will access at most
// n bytes of s. If the UTF sequence is incomplete within n bytes,
// charntorune will set *r to Runeerror and return 0. If it is complete
// but not in UTF format, it will set *r to Runeerror and return 1.
//
// Added 2004-09-24 by Wei-Hwa Huang
int
charntorune
(
Rune
*
r
,
const
char
*
s
,
int
n
);
// isvalidcharntorune(str, n, r, consumed)
// is a convenience function that calls "*consumed = charntorune(r, str, n)"
// and returns an int (logically boolean) indicating whether the first
// n bytes of str was a valid and complete UTF sequence.
int
isvalidcharntorune
(
const
char
*
str
,
int
n
,
Rune
*
r
,
int
*
consumed
);
// runelen returns the number of bytes required to convert r into UTF.
int
runelen
(
Rune
r
);
// runenlen returns the number of bytes required to convert the n
// runes pointed to by r into UTF.
int
runenlen
(
const
Rune
*
r
,
int
n
);
// fullrune returns 1 if the string s of length n is long enough to be
// decoded by chartorune, and 0 otherwise. This does not guarantee
// that the string contains a legal UTF encoding. This routine is used
// by programs that obtain input one byte at a time and need to know
// when a full rune has arrived.
int
fullrune
(
const
char
*
s
,
int
n
);
// The following routines are analogous to the corresponding string
// routines with "utf" substituted for "str", and "rune" substituted
// for "chr".
// utflen returns the number of runes that are represented by the UTF
// string s. (cf. strlen)
int
utflen
(
const
char
*
s
);
// utfnlen returns the number of complete runes that are represented
// by the first n bytes of the UTF string s. If the last few bytes of
// the string contain an incompletely coded rune, utfnlen will not
// count them; in this way, it differs from utflen, which includes
// every byte of the string. (cf. strnlen)
int
utfnlen
(
const
char
*
s
,
long
n
);
// utfrune returns a pointer to the first occurrence of rune r in the
// UTF string s, or 0 if r does not occur in the string. The NULL
// byte terminating a string is considered to be part of the string s.
// (cf. strchr)
const
char
*
utfrune
(
const
char
*
s
,
Rune
r
);
// utfrrune returns a pointer to the last occurrence of rune r in the
// UTF string s, or 0 if r does not occur in the string. The NULL
// byte terminating a string is considered to be part of the string s.
// (cf. strrchr)
const
char
*
utfrrune
(
const
char
*
s
,
Rune
r
);
// utfutf returns a pointer to the first occurrence of the UTF string
// s2 as a UTF substring of s1, or 0 if there is none. If s2 is the
// null string, utfutf returns s1. (cf. strstr)
const
char
*
utfutf
(
const
char
*
s1
,
const
char
*
s2
);
// utfecpy copies UTF sequences until a null sequence has been copied,
// but writes no sequences beyond es1. If any sequences are copied,
// s1 is terminated by a null sequence, and a pointer to that sequence
// is returned. Otherwise, the original s1 is returned. (cf. strecpy)
char
*
utfecpy
(
char
*
s1
,
char
*
es1
,
const
char
*
s2
);
// These functions are rune-string analogues of the corresponding
// functions in strcat (3).
//
// These routines first appeared in Plan 9.
// SEE ALSO
// memmove (3)
// rune (3)
// strcat (2)
//
// BUGS: The outcome of overlapping moves varies among implementations.
Rune
*
runestrcat
(
Rune
*
s1
,
const
Rune
*
s2
);
Rune
*
runestrncat
(
Rune
*
s1
,
const
Rune
*
s2
,
long
n
);
const
Rune
*
runestrchr
(
const
Rune
*
s
,
Rune
c
);
int
runestrcmp
(
const
Rune
*
s1
,
const
Rune
*
s2
);
int
runestrncmp
(
const
Rune
*
s1
,
const
Rune
*
s2
,
long
n
);
Rune
*
runestrcpy
(
Rune
*
s1
,
const
Rune
*
s2
);
Rune
*
runestrncpy
(
Rune
*
s1
,
const
Rune
*
s2
,
long
n
);
Rune
*
runestrecpy
(
Rune
*
s1
,
Rune
*
es1
,
const
Rune
*
s2
);
Rune
*
runestrdup
(
const
Rune
*
s
);
const
Rune
*
runestrrchr
(
const
Rune
*
s
,
Rune
c
);
long
runestrlen
(
const
Rune
*
s
);
const
Rune
*
runestrstr
(
const
Rune
*
s1
,
const
Rune
*
s2
);
// The following routines test types and modify cases for Unicode
// characters. Unicode defines some characters as letters and
// specifies three cases: upper, lower, and title. Mappings among the
// cases are also defined, although they are not exhaustive: some
// upper case letters have no lower case mapping, and so on. Unicode
// also defines several character properties, a subset of which are
// checked by these routines. These routines are based on Unicode
// version 3.0.0.
//
// NOTE: The routines are implemented in C, so the boolean functions
// (e.g., isupperrune) return 0 for false and 1 for true.
//
//
// toupperrune, tolowerrune, and totitlerune are the Unicode case
// mappings. These routines return the character unchanged if it has
// no defined mapping.
Rune
toupperrune
(
Rune
r
);
Rune
tolowerrune
(
Rune
r
);
Rune
totitlerune
(
Rune
r
);
// isupperrune tests for upper case characters, including Unicode
// upper case letters and targets of the toupper mapping. islowerrune
// and istitlerune are defined analogously.
int
isupperrune
(
Rune
r
);
int
islowerrune
(
Rune
r
);
int
istitlerune
(
Rune
r
);
// isalpharune tests for Unicode letters; this includes ideographs in
// addition to alphabetic characters.
int
isalpharune
(
Rune
r
);
// isdigitrune tests for digits. Non-digit numbers, such as Roman
// numerals, are not included.
int
isdigitrune
(
Rune
r
);
// isideographicrune tests for ideographic characters and numbers, as
// defined by the Unicode standard.
int
isideographicrune
(
Rune
r
);
// isspacerune tests for whitespace characters, including "C" locale
// whitespace, Unicode defined whitespace, and the "zero-width
// non-break space" character.
int
isspacerune
(
Rune
r
);
// (The comments in this file were copied from the manpage files rune.3,
// isalpharune.3, and runestrcat.3. Some formatting changes were also made
// to conform to Google style. /JRM 11/11/05)
#ifdef __cplusplus
}
#endif
#endif
src/lib9/utf/utfdef.h
View file @
5b904a3b
...
...
@@ -12,36 +12,17 @@
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
/*
* compiler directive on Plan 9
*/
#
ifndef USED
#define
USED(x) if(x);else
#
endif
#define uchar _utfuchar
#define ushort _utfushort
#define uint _utfuint
#
define ulong _utfulong
#define
vlong _utfvlong
#
define uvlong _utfuvlong
/*
* easiest way to make sure these are defined
*/
#define uchar _fmtuchar
#define ushort _fmtushort
#define uint _fmtuint
#define ulong _fmtulong
#define vlong _fmtvlong
#define uvlong _fmtuvlong
typedef
unsigned
char
uchar
;
typedef
unsigned
short
ushort
;
typedef
unsigned
int
uint
;
typedef
unsigned
long
ulong
;
typedef
unsigned
long
long
uvlong
;
typedef
long
long
vlong
;
/*
* nil cannot be ((void*)0) on ANSI C,
* because it is used for function pointers
*/
#undef nil
#define nil 0
#undef nelem
#define nelem ((void*)0)
#define nelem(x) (sizeof(x)/sizeof((x)[0]))
#define nil ((void*)0)
src/lib9/utf/utfecpy.c
View file @
5b904a3b
...
...
@@ -7,18 +7,17 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
*
ANY
REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#define _BSD_SOURCE 1
/* memccpy */
#include <stdarg.h>
#include <string.h>
#include "plan9.h"
#include "utf.h"
#include "utfdef.h"
char
*
utfecpy
(
char
*
to
,
char
*
e
,
char
*
from
)
utfecpy
(
char
*
to
,
char
*
e
,
c
onst
c
har
*
from
)
{
char
*
end
;
...
...
src/lib9/utf/utflen.c
View file @
5b904a3b
...
...
@@ -7,17 +7,17 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
*
ANY
REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "plan9.h"
#include "utf.h"
#include "utfdef.h"
int
utflen
(
char
*
s
)
utflen
(
c
onst
c
har
*
s
)
{
int
c
;
long
n
;
...
...
@@ -34,4 +34,5 @@ utflen(char *s)
s
+=
chartorune
(
&
rune
,
s
);
n
++
;
}
return
0
;
}
src/lib9/utf/utfnlen.c
View file @
5b904a3b
...
...
@@ -7,22 +7,22 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
*
ANY
REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "plan9.h"
#include "utf.h"
#include "utfdef.h"
int
utfnlen
(
char
*
s
,
long
m
)
utfnlen
(
c
onst
c
har
*
s
,
long
m
)
{
int
c
;
long
n
;
Rune
rune
;
char
*
es
;
c
onst
c
har
*
es
;
es
=
s
+
m
;
for
(
n
=
0
;
s
<
es
;
n
++
)
{
...
...
src/lib9/utf/utfrrune.c
View file @
5b904a3b
...
...
@@ -7,21 +7,22 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
*
ANY
REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "plan9.h"
#include "utf.h"
#include "utfdef.h"
const
char
*
utfrrune
(
c
har
*
s
,
long
c
)
utfrrune
(
c
onst
char
*
s
,
Rune
c
)
{
long
c1
;
Rune
r
;
char
*
s1
;
c
onst
c
har
*
s1
;
if
(
c
<
Runesync
)
/* not part of utf sequence */
return
strrchr
(
s
,
c
);
...
...
@@ -42,4 +43,5 @@ utfrrune(char *s, long c)
s1
=
s
;
s
+=
c1
;
}
return
0
;
}
src/lib9/utf/utfrune.c
View file @
5b904a3b
...
...
@@ -7,17 +7,18 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
*
ANY
REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "plan9.h"
#include "utf.h"
#include "utfdef.h"
const
char
*
utfrune
(
c
har
*
s
,
long
c
)
utfrune
(
c
onst
char
*
s
,
Rune
c
)
{
long
c1
;
Rune
r
;
...
...
@@ -41,4 +42,5 @@ utfrune(char *s, long c)
return
s
;
s
+=
n
;
}
return
0
;
}
src/lib9/utf/utfutf.c
View file @
5b904a3b
...
...
@@ -7,24 +7,25 @@
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
*
ANY
REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
#include <stdarg.h>
#include <string.h>
#include "plan9.h"
#include "utf.h"
#include "utfdef.h"
/*
* Return pointer to first occurrence of s2 in s1,
* 0 if none
*/
const
char
*
utfutf
(
c
har
*
s1
,
char
*
s2
)
utfutf
(
c
onst
char
*
s1
,
const
char
*
s2
)
{
char
*
p
;
c
onst
c
har
*
p
;
long
f
,
n1
,
n2
;
Rune
r
;
...
...
@@ -34,7 +35,7 @@ utfutf(char *s1, char *s2)
return
strstr
(
s1
,
s2
);
n2
=
strlen
(
s2
);
for
(
p
=
s1
;
p
=
utfrune
(
p
,
f
)
;
p
+=
n1
)
for
(
p
=
s1
;
(
p
=
utfrune
(
p
,
f
))
!=
0
;
p
+=
n1
)
if
(
strncmp
(
p
,
s2
,
n2
)
==
0
)
return
p
;
return
0
;
...
...
src/runtime/Makefile
View file @
5b904a3b
...
...
@@ -20,6 +20,7 @@ LIBOFILES=\
runtime.
$O
\
map.
$O
\
print.
$O
\
rune.
$O
\
string.
$O
\
sys_file.
$O
\
...
...
src/runtime/rune.c
0 → 100644
View file @
5b904a3b
/*
* The authors of this software are Rob Pike and Ken Thompson.
* Copyright (c) 2002 by Lucent Technologies.
* Permission to use, copy, modify, and distribute this software for any
* purpose without fee is hereby granted, provided that this entire notice
* is included in all copies of any software which is or includes a copy
* or modification of this software and in all copies of the supporting
* documentation for such software.
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
* REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
*/
/*
* This code is copied, with slight editing due to type differences,
* from a subset of ../lib9/utf/rune.c
*/
#include "runtime.h"
enum
{
Bit1
=
7
,
Bitx
=
6
,
Bit2
=
5
,
Bit3
=
4
,
Bit4
=
3
,
Bit5
=
2
,
T1
=
((
1
<<
(
Bit1
+
1
))
-
1
)
^
0xFF
,
/* 0000 0000 */
Tx
=
((
1
<<
(
Bitx
+
1
))
-
1
)
^
0xFF
,
/* 1000 0000 */
T2
=
((
1
<<
(
Bit2
+
1
))
-
1
)
^
0xFF
,
/* 1100 0000 */
T3
=
((
1
<<
(
Bit3
+
1
))
-
1
)
^
0xFF
,
/* 1110 0000 */
T4
=
((
1
<<
(
Bit4
+
1
))
-
1
)
^
0xFF
,
/* 1111 0000 */
T5
=
((
1
<<
(
Bit5
+
1
))
-
1
)
^
0xFF
,
/* 1111 1000 */
Rune1
=
(
1
<<
(
Bit1
+
0
*
Bitx
))
-
1
,
/* 0000 0000 0111 1111 */
Rune2
=
(
1
<<
(
Bit2
+
1
*
Bitx
))
-
1
,
/* 0000 0111 1111 1111 */
Rune3
=
(
1
<<
(
Bit3
+
2
*
Bitx
))
-
1
,
/* 1111 1111 1111 1111 */
Rune4
=
(
1
<<
(
Bit4
+
3
*
Bitx
))
-
1
,
/* 0001 1111 1111 1111 1111 1111 */
Maskx
=
(
1
<<
Bitx
)
-
1
,
/* 0011 1111 */
Testx
=
Maskx
^
0xFF
,
/* 1100 0000 */
Runeerror
=
0xFFFD
,
Runeself
=
0x80
,
Bad
=
Runeerror
,
Runemax
=
0x10FFFF
,
/* maximum rune value */
};
/*
* Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
* This is a slower but "safe" version of the old chartorune
* that works on strings that are not necessarily null-terminated.
*
* If you know for sure that your string is null-terminated,
* chartorune will be a bit faster.
*
* It is guaranteed not to attempt to access "length"
* past the incoming pointer. This is to avoid
* possible access violations. If the string appears to be
* well-formed but incomplete (i.e., to get the whole Rune
* we'd need to read past str+length) then we'll set the Rune
* to Bad and return 0.
*
* Note that if we have decoding problems for other
* reasons, we return 1 instead of 0.
*/
int32
charntorune
(
int32
*
rune
,
byte
*
str
,
int32
length
)
{
int32
c
,
c1
,
c2
,
c3
;
int32
l
;
/* When we're not allowed to read anything */
if
(
length
<=
0
)
{
goto
badlen
;
}
/*
* one character sequence (7-bit value)
* 00000-0007F => T1
*/
c
=
*
(
byte
*
)
str
;
/* cast not necessary, but kept for safety */
if
(
c
<
Tx
)
{
*
rune
=
c
;
return
1
;
}
// If we can't read more than one character we must stop
if
(
length
<=
1
)
{
goto
badlen
;
}
/*
* two character sequence (11-bit value)
* 0080-07FF => T2 Tx
*/
c1
=
*
(
byte
*
)(
str
+
1
)
^
Tx
;
if
(
c1
&
Testx
)
goto
bad
;
if
(
c
<
T3
)
{
if
(
c
<
T2
)
goto
bad
;
l
=
((
c
<<
Bitx
)
|
c1
)
&
Rune2
;
if
(
l
<=
Rune1
)
goto
bad
;
*
rune
=
l
;
return
2
;
}
// If we can't read more than two characters we must stop
if
(
length
<=
2
)
{
goto
badlen
;
}
/*
* three character sequence (16-bit value)
* 0800-FFFF => T3 Tx Tx
*/
c2
=
*
(
byte
*
)(
str
+
2
)
^
Tx
;
if
(
c2
&
Testx
)
goto
bad
;
if
(
c
<
T4
)
{
l
=
((((
c
<<
Bitx
)
|
c1
)
<<
Bitx
)
|
c2
)
&
Rune3
;
if
(
l
<=
Rune2
)
goto
bad
;
*
rune
=
l
;
return
3
;
}
if
(
length
<=
3
)
goto
badlen
;
/*
* four character sequence (21-bit value)
* 10000-1FFFFF => T4 Tx Tx Tx
*/
c3
=
*
(
byte
*
)(
str
+
3
)
^
Tx
;
if
(
c3
&
Testx
)
goto
bad
;
if
(
c
<
T5
)
{
l
=
((((((
c
<<
Bitx
)
|
c1
)
<<
Bitx
)
|
c2
)
<<
Bitx
)
|
c3
)
&
Rune4
;
if
(
l
<=
Rune3
)
goto
bad
;
*
rune
=
l
;
return
4
;
}
// Support for 5-byte or longer UTF-8 would go here, but
// since we don't have that, we'll just fall through to bad.
/*
* bad decoding
*/
bad:
*
rune
=
Bad
;
return
1
;
badlen:
*
rune
=
Bad
;
return
0
;
}
int32
runetochar
(
byte
*
str
,
int32
rune
)
/* note: in original, arg2 was pointer */
{
/* Runes are signed, so convert to unsigned for range check. */
uint32
c
;
/*
* one character sequence
* 00000-0007F => 00-7F
*/
c
=
rune
;
if
(
c
<=
Rune1
)
{
str
[
0
]
=
c
;
return
1
;
}
/*
* two character sequence
* 0080-07FF => T2 Tx
*/
if
(
c
<=
Rune2
)
{
str
[
0
]
=
T2
|
(
c
>>
1
*
Bitx
);
str
[
1
]
=
Tx
|
(
c
&
Maskx
);
return
2
;
}
/*
* If the Rune is out of range, convert it to the error rune.
* Do this test here because the error rune encodes to three bytes.
* Doing it earlier would duplicate work, since an out of range
* Rune wouldn't have fit in one or two bytes.
*/
if
(
c
>
Runemax
)
c
=
Runeerror
;
/*
* three character sequence
* 0800-FFFF => T3 Tx Tx
*/
if
(
c
<=
Rune3
)
{
str
[
0
]
=
T3
|
(
c
>>
2
*
Bitx
);
str
[
1
]
=
Tx
|
((
c
>>
1
*
Bitx
)
&
Maskx
);
str
[
2
]
=
Tx
|
(
c
&
Maskx
);
return
3
;
}
/*
* four character sequence (21-bit value)
* 10000-1FFFFF => T4 Tx Tx Tx
*/
str
[
0
]
=
T4
|
(
c
>>
3
*
Bitx
);
str
[
1
]
=
Tx
|
((
c
>>
2
*
Bitx
)
&
Maskx
);
str
[
2
]
=
Tx
|
((
c
>>
1
*
Bitx
)
&
Maskx
);
str
[
3
]
=
Tx
|
(
c
&
Maskx
);
return
4
;
}
src/runtime/runtime.h
View file @
5b904a3b
...
...
@@ -85,6 +85,8 @@ enum
int32
strcmp
(
byte
*
,
byte
*
);
int32
findnull
(
int8
*
);
void
dump
(
byte
*
,
int32
);
int32
runetochar
(
byte
*
,
int32
);
int32
chartorune
(
uint32
*
,
byte
*
);
extern
string
emptystring
;
extern
int32
debug
;
...
...
src/runtime/string.c
View file @
5b904a3b
...
...
@@ -151,55 +151,6 @@ sys·indexstring(string s, int32 i, byte b)
FLUSH
(
&
b
);
}
/*
* this is the plan9 runetochar
* extended for 36 bits in 7 bytes
* note that it truncates to 32 bits
* through the argument passing.
*/
static
int32
runetochar
(
byte
*
str
,
uint32
c
)
{
int32
i
,
n
;
uint32
mask
,
mark
;
/*
* one character in 7 bits
*/
if
(
c
<=
0x07FUL
)
{
str
[
0
]
=
c
;
return
1
;
}
/*
* every new character picks up 5 bits
* one less in the first byte and
* six more in an extension byte
*/
mask
=
0x7ffUL
;
mark
=
0xC0UL
;
for
(
n
=
1
;;
n
++
)
{
if
(
c
<=
mask
)
break
;
mask
=
(
mask
<<
5
)
|
0x1fUL
;
mark
=
(
mark
>>
1
)
|
0x80UL
;
}
/*
* lay down the bytes backwards
* n is the number of extension bytes
* mask is the max codepoint
* mark is the zeroth byte indicator
*/
for
(
i
=
n
;
i
>
0
;
i
--
)
{
str
[
i
]
=
0x80UL
|
(
c
&
0x3fUL
);
c
>>=
6
;
}
str
[
0
]
=
mark
|
c
;
return
n
+
1
;
}
void
sys
·
intstring
(
int64
v
,
string
s
)
{
...
...
test/string_lit.go
View file @
5b904a3b
...
...
@@ -75,5 +75,14 @@ func main() {
`\000\123\x00\312\xFE\u0123\ubabe\U0000babe`
,
"backslashes 2 (backquote)"
);
assert
(
"
\\
x
\\
u
\\
U
\\
"
,
`\x\u\U\`
,
"backslash 3 (backquote)"
);
// test large runes. perhaps not the most logical place for this test.
var
r
int32
;
r
=
0x10ffff
;
// largest rune value
s
=
string
(
r
);
assert
(
s
,
"
\xf4\x8f\xbf\xbf
"
,
"largest rune"
);
r
=
0x10ffff
+
1
;
s
=
string
(
r
);
assert
(
s
,
"
\xef\xbf\xbd
"
,
"too-large rune"
);
sys
.
exit
(
ecode
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment