Commit 6ca99ce7 authored by Christoph Hellwig's avatar Christoph Hellwig Committed by Gabriel Krisman Bertazi

unicode: cache the normalization tables in struct unicode_map

Instead of repeatedly looking up the version add pointers to the
NFD and NFD+CF tables to struct unicode_map, and pass a
unicode_map plus index to the functions using the normalization
tables.
Signed-off-by: default avatarChristoph Hellwig <hch@lst.de>
Signed-off-by: default avatarGabriel Krisman Bertazi <krisman@collabora.com>
parent fbc59d65
...@@ -5,16 +5,13 @@ ...@@ -5,16 +5,13 @@
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/parser.h> #include <linux/parser.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/unicode.h>
#include <linux/stringhash.h> #include <linux/stringhash.h>
#include "utf8n.h" #include "utf8n.h"
int utf8_validate(const struct unicode_map *um, const struct qstr *str) int utf8_validate(const struct unicode_map *um, const struct qstr *str)
{ {
const struct utf8data *data = utf8nfdi(um->version); if (utf8nlen(um, UTF8_NFDI, str->name, str->len) < 0)
if (utf8nlen(data, str->name, str->len) < 0)
return -1; return -1;
return 0; return 0;
} }
...@@ -23,14 +20,13 @@ EXPORT_SYMBOL(utf8_validate); ...@@ -23,14 +20,13 @@ EXPORT_SYMBOL(utf8_validate);
int utf8_strncmp(const struct unicode_map *um, int utf8_strncmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2) const struct qstr *s1, const struct qstr *s2)
{ {
const struct utf8data *data = utf8nfdi(um->version);
struct utf8cursor cur1, cur2; struct utf8cursor cur1, cur2;
int c1, c2; int c1, c2;
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) if (utf8ncursor(&cur1, um, UTF8_NFDI, s1->name, s1->len) < 0)
return -EINVAL; return -EINVAL;
if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0) if (utf8ncursor(&cur2, um, UTF8_NFDI, s2->name, s2->len) < 0)
return -EINVAL; return -EINVAL;
do { do {
...@@ -50,14 +46,13 @@ EXPORT_SYMBOL(utf8_strncmp); ...@@ -50,14 +46,13 @@ EXPORT_SYMBOL(utf8_strncmp);
int utf8_strncasecmp(const struct unicode_map *um, int utf8_strncasecmp(const struct unicode_map *um,
const struct qstr *s1, const struct qstr *s2) const struct qstr *s1, const struct qstr *s2)
{ {
const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur1, cur2; struct utf8cursor cur1, cur2;
int c1, c2; int c1, c2;
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
return -EINVAL; return -EINVAL;
if (utf8ncursor(&cur2, data, s2->name, s2->len) < 0) if (utf8ncursor(&cur2, um, UTF8_NFDICF, s2->name, s2->len) < 0)
return -EINVAL; return -EINVAL;
do { do {
...@@ -81,12 +76,11 @@ int utf8_strncasecmp_folded(const struct unicode_map *um, ...@@ -81,12 +76,11 @@ int utf8_strncasecmp_folded(const struct unicode_map *um,
const struct qstr *cf, const struct qstr *cf,
const struct qstr *s1) const struct qstr *s1)
{ {
const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur1; struct utf8cursor cur1;
int c1, c2; int c1, c2;
int i = 0; int i = 0;
if (utf8ncursor(&cur1, data, s1->name, s1->len) < 0) if (utf8ncursor(&cur1, um, UTF8_NFDICF, s1->name, s1->len) < 0)
return -EINVAL; return -EINVAL;
do { do {
...@@ -105,11 +99,10 @@ EXPORT_SYMBOL(utf8_strncasecmp_folded); ...@@ -105,11 +99,10 @@ EXPORT_SYMBOL(utf8_strncasecmp_folded);
int utf8_casefold(const struct unicode_map *um, const struct qstr *str, int utf8_casefold(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen) unsigned char *dest, size_t dlen)
{ {
const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur; struct utf8cursor cur;
size_t nlen = 0; size_t nlen = 0;
if (utf8ncursor(&cur, data, str->name, str->len) < 0) if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
return -EINVAL; return -EINVAL;
for (nlen = 0; nlen < dlen; nlen++) { for (nlen = 0; nlen < dlen; nlen++) {
...@@ -128,12 +121,11 @@ EXPORT_SYMBOL(utf8_casefold); ...@@ -128,12 +121,11 @@ EXPORT_SYMBOL(utf8_casefold);
int utf8_casefold_hash(const struct unicode_map *um, const void *salt, int utf8_casefold_hash(const struct unicode_map *um, const void *salt,
struct qstr *str) struct qstr *str)
{ {
const struct utf8data *data = utf8nfdicf(um->version);
struct utf8cursor cur; struct utf8cursor cur;
int c; int c;
unsigned long hash = init_name_hash(salt); unsigned long hash = init_name_hash(salt);
if (utf8ncursor(&cur, data, str->name, str->len) < 0) if (utf8ncursor(&cur, um, UTF8_NFDICF, str->name, str->len) < 0)
return -EINVAL; return -EINVAL;
while ((c = utf8byte(&cur))) { while ((c = utf8byte(&cur))) {
...@@ -149,11 +141,10 @@ EXPORT_SYMBOL(utf8_casefold_hash); ...@@ -149,11 +141,10 @@ EXPORT_SYMBOL(utf8_casefold_hash);
int utf8_normalize(const struct unicode_map *um, const struct qstr *str, int utf8_normalize(const struct unicode_map *um, const struct qstr *str,
unsigned char *dest, size_t dlen) unsigned char *dest, size_t dlen)
{ {
const struct utf8data *data = utf8nfdi(um->version);
struct utf8cursor cur; struct utf8cursor cur;
ssize_t nlen = 0; ssize_t nlen = 0;
if (utf8ncursor(&cur, data, str->name, str->len) < 0) if (utf8ncursor(&cur, um, UTF8_NFDI, str->name, str->len) < 0)
return -EINVAL; return -EINVAL;
for (nlen = 0; nlen < dlen; nlen++) { for (nlen = 0; nlen < dlen; nlen++) {
...@@ -180,7 +171,17 @@ struct unicode_map *utf8_load(unsigned int version) ...@@ -180,7 +171,17 @@ struct unicode_map *utf8_load(unsigned int version)
if (!um) if (!um)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
um->version = version; um->version = version;
um->ntab[UTF8_NFDI] = utf8nfdi(version);
if (!um->ntab[UTF8_NFDI])
goto out_free_um;
um->ntab[UTF8_NFDICF] = utf8nfdicf(version);
if (!um->ntab[UTF8_NFDICF])
goto out_free_um;
return um; return um;
out_free_um:
kfree(um);
return ERR_PTR(-EINVAL);
} }
EXPORT_SYMBOL(utf8_load); EXPORT_SYMBOL(utf8_load);
......
...@@ -309,21 +309,19 @@ utf8hangul(const char *str, unsigned char *hangul) ...@@ -309,21 +309,19 @@ utf8hangul(const char *str, unsigned char *hangul)
* is well-formed and corresponds to a known unicode code point. The * is well-formed and corresponds to a known unicode code point. The
* shorthand for this will be "is valid UTF-8 unicode". * shorthand for this will be "is valid UTF-8 unicode".
*/ */
static utf8leaf_t *utf8nlookup(const struct utf8data *data, static utf8leaf_t *utf8nlookup(const struct unicode_map *um,
unsigned char *hangul, const char *s, size_t len) enum utf8_normalization n, unsigned char *hangul, const char *s,
size_t len)
{ {
utf8trie_t *trie = NULL; utf8trie_t *trie = utf8data + um->ntab[n]->offset;
int offlen; int offlen;
int offset; int offset;
int mask; int mask;
int node; int node;
if (!data)
return NULL;
if (len == 0) if (len == 0)
return NULL; return NULL;
trie = utf8data + data->offset;
node = 1; node = 1;
while (node) { while (node) {
offlen = (*trie & OFFLEN) >> OFFLEN_SHIFT; offlen = (*trie & OFFLEN) >> OFFLEN_SHIFT;
...@@ -385,29 +383,28 @@ static utf8leaf_t *utf8nlookup(const struct utf8data *data, ...@@ -385,29 +383,28 @@ static utf8leaf_t *utf8nlookup(const struct utf8data *data,
* *
* Forwards to utf8nlookup(). * Forwards to utf8nlookup().
*/ */
static utf8leaf_t *utf8lookup(const struct utf8data *data, static utf8leaf_t *utf8lookup(const struct unicode_map *um,
unsigned char *hangul, const char *s) enum utf8_normalization n, unsigned char *hangul, const char *s)
{ {
return utf8nlookup(data, hangul, s, (size_t)-1); return utf8nlookup(um, n, hangul, s, (size_t)-1);
} }
/* /*
* Length of the normalization of s, touch at most len bytes. * Length of the normalization of s, touch at most len bytes.
* Return -1 if s is not valid UTF-8 unicode. * Return -1 if s is not valid UTF-8 unicode.
*/ */
ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len) ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n,
const char *s, size_t len)
{ {
utf8leaf_t *leaf; utf8leaf_t *leaf;
size_t ret = 0; size_t ret = 0;
unsigned char hangul[UTF8HANGULLEAF]; unsigned char hangul[UTF8HANGULLEAF];
if (!data)
return -1;
while (len && *s) { while (len && *s) {
leaf = utf8nlookup(data, hangul, s, len); leaf = utf8nlookup(um, n, hangul, s, len);
if (!leaf) if (!leaf)
return -1; return -1;
if (utf8agetab[LEAF_GEN(leaf)] > data->maxage) if (utf8agetab[LEAF_GEN(leaf)] > um->ntab[n]->maxage)
ret += utf8clen(s); ret += utf8clen(s);
else if (LEAF_CCC(leaf) == DECOMPOSE) else if (LEAF_CCC(leaf) == DECOMPOSE)
ret += strlen(LEAF_STR(leaf)); ret += strlen(LEAF_STR(leaf));
...@@ -430,14 +427,13 @@ EXPORT_SYMBOL(utf8nlen); ...@@ -430,14 +427,13 @@ EXPORT_SYMBOL(utf8nlen);
* *
* Returns -1 on error, 0 on success. * Returns -1 on error, 0 on success.
*/ */
int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data, int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um,
const char *s, size_t len) enum utf8_normalization n, const char *s, size_t len)
{ {
if (!data)
return -1;
if (!s) if (!s)
return -1; return -1;
u8c->data = data; u8c->um = um;
u8c->n = n;
u8c->s = s; u8c->s = s;
u8c->p = NULL; u8c->p = NULL;
u8c->ss = NULL; u8c->ss = NULL;
...@@ -512,9 +508,9 @@ int utf8byte(struct utf8cursor *u8c) ...@@ -512,9 +508,9 @@ int utf8byte(struct utf8cursor *u8c)
/* Look up the data for the current character. */ /* Look up the data for the current character. */
if (u8c->p) { if (u8c->p) {
leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s); leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s);
} else { } else {
leaf = utf8nlookup(u8c->data, u8c->hangul, leaf = utf8nlookup(u8c->um, u8c->n, u8c->hangul,
u8c->s, u8c->len); u8c->s, u8c->len);
} }
...@@ -524,7 +520,8 @@ int utf8byte(struct utf8cursor *u8c) ...@@ -524,7 +520,8 @@ int utf8byte(struct utf8cursor *u8c)
ccc = LEAF_CCC(leaf); ccc = LEAF_CCC(leaf);
/* Characters that are too new have CCC 0. */ /* Characters that are too new have CCC 0. */
if (utf8agetab[LEAF_GEN(leaf)] > u8c->data->maxage) { if (utf8agetab[LEAF_GEN(leaf)] >
u8c->um->ntab[u8c->n]->maxage) {
ccc = STOPPER; ccc = STOPPER;
} else if (ccc == DECOMPOSE) { } else if (ccc == DECOMPOSE) {
u8c->len -= utf8clen(u8c->s); u8c->len -= utf8clen(u8c->s);
...@@ -538,7 +535,7 @@ int utf8byte(struct utf8cursor *u8c) ...@@ -538,7 +535,7 @@ int utf8byte(struct utf8cursor *u8c)
goto ccc_mismatch; goto ccc_mismatch;
} }
leaf = utf8lookup(u8c->data, u8c->hangul, u8c->s); leaf = utf8lookup(u8c->um, u8c->n, u8c->hangul, u8c->s);
if (!leaf) if (!leaf)
return -1; return -1;
ccc = LEAF_CCC(leaf); ccc = LEAF_CCC(leaf);
...@@ -611,7 +608,6 @@ const struct utf8data *utf8nfdi(unsigned int maxage) ...@@ -611,7 +608,6 @@ const struct utf8data *utf8nfdi(unsigned int maxage)
return NULL; return NULL;
return &utf8nfdidata[i]; return &utf8nfdidata[i];
} }
EXPORT_SYMBOL(utf8nfdi);
const struct utf8data *utf8nfdicf(unsigned int maxage) const struct utf8data *utf8nfdicf(unsigned int maxage)
{ {
...@@ -623,4 +619,3 @@ const struct utf8data *utf8nfdicf(unsigned int maxage) ...@@ -623,4 +619,3 @@ const struct utf8data *utf8nfdicf(unsigned int maxage)
return NULL; return NULL;
return &utf8nfdicfdata[i]; return &utf8nfdicfdata[i];
} }
EXPORT_SYMBOL(utf8nfdicf);
...@@ -18,9 +18,7 @@ unsigned int failed_tests; ...@@ -18,9 +18,7 @@ unsigned int failed_tests;
unsigned int total_tests; unsigned int total_tests;
/* Tests will be based on this version. */ /* Tests will be based on this version. */
#define latest_maj 12 #define UTF8_LATEST UNICODE_AGE(12, 1, 0)
#define latest_min 1
#define latest_rev 0
#define _test(cond, func, line, fmt, ...) do { \ #define _test(cond, func, line, fmt, ...) do { \
total_tests++; \ total_tests++; \
...@@ -160,29 +158,22 @@ static const struct { ...@@ -160,29 +158,22 @@ static const struct {
} }
}; };
static ssize_t utf8len(const struct utf8data *data, const char *s) static ssize_t utf8len(const struct unicode_map *um, enum utf8_normalization n,
const char *s)
{ {
return utf8nlen(data, s, (size_t)-1); return utf8nlen(um, n, s, (size_t)-1);
} }
static int utf8cursor(struct utf8cursor *u8c, const struct utf8data *data, static int utf8cursor(struct utf8cursor *u8c, const struct unicode_map *um,
const char *s) enum utf8_normalization n, const char *s)
{ {
return utf8ncursor(u8c, data, s, (unsigned int)-1); return utf8ncursor(u8c, um, n, s, (unsigned int)-1);
} }
static void check_utf8_nfdi(void) static void check_utf8_nfdi(struct unicode_map *um)
{ {
int i; int i;
struct utf8cursor u8c; struct utf8cursor u8c;
const struct utf8data *data;
data = utf8nfdi(UNICODE_AGE(latest_maj, latest_min, latest_rev));
if (!data) {
pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
__func__, latest_maj, latest_min, latest_rev);
return;
}
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) { for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
int len = strlen(nfdi_test_data[i].str); int len = strlen(nfdi_test_data[i].str);
...@@ -190,10 +181,11 @@ static void check_utf8_nfdi(void) ...@@ -190,10 +181,11 @@ static void check_utf8_nfdi(void)
int j = 0; int j = 0;
unsigned char c; unsigned char c;
test((utf8len(data, nfdi_test_data[i].str) == nlen)); test((utf8len(um, UTF8_NFDI, nfdi_test_data[i].str) == nlen));
test((utf8nlen(data, nfdi_test_data[i].str, len) == nlen)); test((utf8nlen(um, UTF8_NFDI, nfdi_test_data[i].str, len) ==
nlen));
if (utf8cursor(&u8c, data, nfdi_test_data[i].str) < 0) if (utf8cursor(&u8c, um, UTF8_NFDI, nfdi_test_data[i].str) < 0)
pr_err("can't create cursor\n"); pr_err("can't create cursor\n");
while ((c = utf8byte(&u8c)) > 0) { while ((c = utf8byte(&u8c)) > 0) {
...@@ -207,18 +199,10 @@ static void check_utf8_nfdi(void) ...@@ -207,18 +199,10 @@ static void check_utf8_nfdi(void)
} }
} }
static void check_utf8_nfdicf(void) static void check_utf8_nfdicf(struct unicode_map *um)
{ {
int i; int i;
struct utf8cursor u8c; struct utf8cursor u8c;
const struct utf8data *data;
data = utf8nfdicf(UNICODE_AGE(latest_maj, latest_min, latest_rev));
if (!data) {
pr_err("%s: Unable to load utf8-%d.%d.%d. Skipping.\n",
__func__, latest_maj, latest_min, latest_rev);
return;
}
for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) { for (i = 0; i < ARRAY_SIZE(nfdicf_test_data); i++) {
int len = strlen(nfdicf_test_data[i].str); int len = strlen(nfdicf_test_data[i].str);
...@@ -226,10 +210,13 @@ static void check_utf8_nfdicf(void) ...@@ -226,10 +210,13 @@ static void check_utf8_nfdicf(void)
int j = 0; int j = 0;
unsigned char c; unsigned char c;
test((utf8len(data, nfdicf_test_data[i].str) == nlen)); test((utf8len(um, UTF8_NFDICF, nfdicf_test_data[i].str) ==
test((utf8nlen(data, nfdicf_test_data[i].str, len) == nlen)); nlen));
test((utf8nlen(um, UTF8_NFDICF, nfdicf_test_data[i].str, len) ==
nlen));
if (utf8cursor(&u8c, data, nfdicf_test_data[i].str) < 0) if (utf8cursor(&u8c, um, UTF8_NFDICF,
nfdicf_test_data[i].str) < 0)
pr_err("can't create cursor\n"); pr_err("can't create cursor\n");
while ((c = utf8byte(&u8c)) > 0) { while ((c = utf8byte(&u8c)) > 0) {
...@@ -243,16 +230,9 @@ static void check_utf8_nfdicf(void) ...@@ -243,16 +230,9 @@ static void check_utf8_nfdicf(void)
} }
} }
static void check_utf8_comparisons(void) static void check_utf8_comparisons(struct unicode_map *table)
{ {
int i; int i;
struct unicode_map *table = utf8_load(UNICODE_AGE(12, 1, 0));
if (IS_ERR(table)) {
pr_err("%s: Unable to load utf8 %d.%d.%d. Skipping.\n",
__func__, latest_maj, latest_min, latest_rev);
return;
}
for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) { for (i = 0; i < ARRAY_SIZE(nfdi_test_data); i++) {
const struct qstr s1 = {.name = nfdi_test_data[i].str, const struct qstr s1 = {.name = nfdi_test_data[i].str,
...@@ -273,8 +253,6 @@ static void check_utf8_comparisons(void) ...@@ -273,8 +253,6 @@ static void check_utf8_comparisons(void)
test_f(!utf8_strncasecmp(table, &s1, &s2), test_f(!utf8_strncasecmp(table, &s1, &s2),
"%s %s comparison mismatch\n", s1.name, s2.name); "%s %s comparison mismatch\n", s1.name, s2.name);
} }
utf8_unload(table);
} }
static void check_supported_versions(void) static void check_supported_versions(void)
...@@ -286,8 +264,7 @@ static void check_supported_versions(void) ...@@ -286,8 +264,7 @@ static void check_supported_versions(void)
test(utf8version_is_supported(UNICODE_AGE(9, 0, 0))); test(utf8version_is_supported(UNICODE_AGE(9, 0, 0)));
/* Unicode 1x.0.0 (the latest version) should be supported. */ /* Unicode 1x.0.0 (the latest version) should be supported. */
test(utf8version_is_supported( test(utf8version_is_supported(UTF8_LATEST));
UNICODE_AGE(latest_maj, latest_min, latest_rev)));
/* Next versions don't exist. */ /* Next versions don't exist. */
test(!utf8version_is_supported(UNICODE_AGE(13, 0, 0))); test(!utf8version_is_supported(UNICODE_AGE(13, 0, 0)));
...@@ -297,19 +274,28 @@ static void check_supported_versions(void) ...@@ -297,19 +274,28 @@ static void check_supported_versions(void)
static int __init init_test_ucd(void) static int __init init_test_ucd(void)
{ {
struct unicode_map *um;
failed_tests = 0; failed_tests = 0;
total_tests = 0; total_tests = 0;
um = utf8_load(UTF8_LATEST);
if (IS_ERR(um)) {
pr_err("%s: Unable to load utf8 table.\n", __func__);
return PTR_ERR(um);
}
check_supported_versions(); check_supported_versions();
check_utf8_nfdi(); check_utf8_nfdi(um);
check_utf8_nfdicf(); check_utf8_nfdicf(um);
check_utf8_comparisons(); check_utf8_comparisons(um);
if (!failed_tests) if (!failed_tests)
pr_info("All %u tests passed\n", total_tests); pr_info("All %u tests passed\n", total_tests);
else else
pr_err("%u out of %u tests failed\n", failed_tests, pr_err("%u out of %u tests failed\n", failed_tests,
total_tests); total_tests);
utf8_unload(um);
return 0; return 0;
} }
......
...@@ -39,7 +39,8 @@ extern const struct utf8data *utf8nfdicf(unsigned int maxage); ...@@ -39,7 +39,8 @@ extern const struct utf8data *utf8nfdicf(unsigned int maxage);
* Returns 0 if only ignorable code points are present. * Returns 0 if only ignorable code points are present.
* Returns -1 if the input is not valid UTF-8. * Returns -1 if the input is not valid UTF-8.
*/ */
extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len); ssize_t utf8nlen(const struct unicode_map *um, enum utf8_normalization n,
const char *s, size_t len);
/* Needed in struct utf8cursor below. */ /* Needed in struct utf8cursor below. */
#define UTF8HANGULLEAF (12) #define UTF8HANGULLEAF (12)
...@@ -48,7 +49,8 @@ extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len); ...@@ -48,7 +49,8 @@ extern ssize_t utf8nlen(const struct utf8data *data, const char *s, size_t len);
* Cursor structure used by the normalizer. * Cursor structure used by the normalizer.
*/ */
struct utf8cursor { struct utf8cursor {
const struct utf8data *data; const struct unicode_map *um;
enum utf8_normalization n;
const char *s; const char *s;
const char *p; const char *p;
const char *ss; const char *ss;
...@@ -65,8 +67,8 @@ struct utf8cursor { ...@@ -65,8 +67,8 @@ struct utf8cursor {
* Returns 0 on success. * Returns 0 on success.
* Returns -1 on failure. * Returns -1 on failure.
*/ */
extern int utf8ncursor(struct utf8cursor *u8c, const struct utf8data *data, int utf8ncursor(struct utf8cursor *u8c, const struct unicode_map *um,
const char *s, size_t len); enum utf8_normalization n, const char *s, size_t len);
/* /*
* Get the next byte in the normalization. * Get the next byte in the normalization.
......
...@@ -5,6 +5,8 @@ ...@@ -5,6 +5,8 @@
#include <linux/init.h> #include <linux/init.h>
#include <linux/dcache.h> #include <linux/dcache.h>
struct utf8data;
#define UNICODE_MAJ_SHIFT 16 #define UNICODE_MAJ_SHIFT 16
#define UNICODE_MIN_SHIFT 8 #define UNICODE_MIN_SHIFT 8
...@@ -28,8 +30,25 @@ static inline u8 unicode_rev(unsigned int age) ...@@ -28,8 +30,25 @@ static inline u8 unicode_rev(unsigned int age)
return age & 0xff; return age & 0xff;
} }
/*
* Two normalization forms are supported:
* 1) NFDI
* - Apply unicode normalization form NFD.
* - Remove any Default_Ignorable_Code_Point.
* 2) NFDICF
* - Apply unicode normalization form NFD.
* - Remove any Default_Ignorable_Code_Point.
* - Apply a full casefold (C + F).
*/
enum utf8_normalization {
UTF8_NFDI = 0,
UTF8_NFDICF,
UTF8_NMAX,
};
struct unicode_map { struct unicode_map {
unsigned int version; unsigned int version;
const struct utf8data *ntab[UTF8_NMAX];
}; };
int utf8_validate(const struct unicode_map *um, const struct qstr *str); int utf8_validate(const struct unicode_map *um, const struct qstr *str);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment