Commit b3cedf63 authored by Vladislav Vaintroub's avatar Vladislav Vaintroub

MDEV-4742 - address review comments.

- Remove second optional parameter to natural_sort_key(), and all fraction
handling.

- Rename natsort_num2str() to natsort_encode_length() to show the intention
that it encodes string *lengths*, and not encode whitespaces and what not.

Handles lengths for which log10(len) >= 10,  even if they do not happen for
MariaDB Strings (where length is limited by 32bit, and log10(len) is <= 9)

- Do not let natural sort key grow past max_packet_length.


- Split Item_func_natural_sort_key::val_str() further and add
natsort_encode_numeric_string(), which contains comment on how
whitespaces are handled.

- Simplify, and speedup to_natsort_key() in common case, by removing
handling of weird charsets utf16/32, that encode numbers in several bytes.
In rare cases utf16/32 is used, we'll convert to utf8 prior to
creating keys, and back to original charset afterwards.
parent 5b29d407
......@@ -64,106 +64,45 @@ SHOW CREATE TABLE t2;
Table Create Table
t2 CREATE TABLE `t2` (
`c` varchar(30) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
`NATURAL_SORT_KEY(c)` varchar(45) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL
`NATURAL_SORT_KEY(c)` varchar(60) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL
) ENGINE=MyISAM DEFAULT CHARSET=latin1
DROP TABLE t1,t2;
SELECT RPAD(val,21,' ') value , RPAD(NATURAL_SORT_KEY(val),26,' ') sortkey , LENGTH(NATURAL_SORT_KEY(val)) - LENGTH(val) encoding_overhead
SELECT RPAD(val,28,' ') value , RPAD(NATURAL_SORT_KEY(val),35,' ') sortkey , LENGTH(NATURAL_SORT_KEY(val)) - LENGTH(val) encoding_overhead
FROM
(
SELECT 0 val
UNION VALUES ('0.0'),('0.1'), ('0,15'),('1.001'),('1.002'),('1.010'),('1.02'),('1.1'),('1.3'),('1'),('01'),('0001')
UNION SELECT CONCAT('1',repeat('0',seq)) FROM seq_1_to_20
UNION VALUES ('1'),('01'),('0001')
UNION SELECT CONCAT('1',repeat('0',seq)) FROM seq_1_to_27
) AS numbers ORDER BY sortkey;
value sortkey encoding_overhead
0 00 1
0,15 00,115 2
0.0 00.00 2
0.1 00.01 2
1 01 1
1.1 01.01 2
1.001 01.011 1
1.02 01.020 2
1.002 01.021 1
1.3 01.03 2
1.010 01.1100 2
01 010 1
0001 012 -1
10 110 1
100 2100 1
1000 31000 1
10000 410000 1
100000 5100000 1
1000000 61000000 1
10000000 710000000 1
100000000 8100000000 1
1000000000 901000000000 2
10000000000 9110000000000 2
100000000000 92100000000000 2
1000000000000 931000000000000 2
10000000000000 9410000000000000 2
100000000000000 95100000000000000 2
1000000000000000 961000000000000000 2
10000000000000000 9710000000000000000 2
100000000000000000 98100000000000000000 2
1000000000000000000 9901000000000000000000 3
10000000000000000000 9912010000000000000000000 5
100000000000000000000 99121100000000000000000000 5
# Disable fractions handling by passing NULL as second parameter to NATURAL_SORT_KEY
SELECT val value
FROM
(
SELECT 0 val
UNION VALUES ('0.1'),('1.001'),('1.002'),('1.010'),('1.02'),('1.1'),('1.3'),('1'), ('0,1'),('1,001'),('1,002'),('1,010'),('1,02'),('1,1'),('1,3'),('1'))
AS numbers ORDER BY NATURAL_SORT_KEY(val, NULL);
value
0
0,1
0.1
1
1,1
1,001
1,02
1,002
1,3
1,010
1.1
1.001
1.02
1.002
1.3
1.010
# Use ',' as decimal separator for NATURAL_SORT_KEY
SELECT val value, NATURAL_SORT_KEY(val,',') sortkey
FROM
(
SELECT 0 val
UNION VALUES ('0,1'),('1,001'),('1,002'),('1,010'),('1,02'),('1,1'),('1,3'),('1'))
AS numbers ORDER BY sortkey;
value sortkey
0 00
0,1 00,1
1 01
1,001 01,001
1,002 01,002
1,010 01,010
1,02 01,02
1,1 01,1
1,3 01,3
# Use '.' as decimal separator for NATURAL_SORT_KEY
SELECT val value,NATURAL_SORT_KEY(val,'.') sortkey
FROM
(
SELECT 0 val
UNION VALUES ('0.1'),('1.001'),('1.002'),('1.010'),('1.02'),('1.1'),('1.3'),('1'))
AS numbers ORDER BY sortkey;
value sortkey
0 00
0.1 00.1
1 01
1.001 01.001
1.002 01.002
1.010 01.010
1.02 01.02
1.1 01.1
1.3 01.3
SET NAMES DEFAULT;
0 000 2
1 010 2
01 011 1
0001 013 -1
10 1100 2
100 21000 2
1000 310000 2
10000 4100000 2
100000 51000000 2
1000000 610000000 2
10000000 7100000000 2
100000000 81000000000 2
1000000000 9010000000000 3
10000000000 91100000000000 3
100000000000 921000000000000 3
1000000000000 9310000000000000 3
10000000000000 94100000000000000 3
100000000000000 951000000000000000 3
1000000000000000 9610000000000000000 3
10000000000000000 97100000000000000000 3
100000000000000000 981000000000000000000 3
1000000000000000000 99010000000000000000000 4
10000000000000000000 991100000000000000000000 4
100000000000000000000 9921000000000000000000000 4
1000000000000000000000 99310000000000000000000000 4
10000000000000000000000 994100000000000000000000000 4
100000000000000000000000 9951000000000000000000000000 4
1000000000000000000000000 99610000000000000000000000000 4
10000000000000000000000000 997100000000000000000000000000 4
100000000000000000000000000 9981000000000000000000000000000 4
1000000000000000000000000000 99901271000000000000000000000000000 8
......@@ -29,36 +29,11 @@ CREATE TABLE t2 AS SELECT c, NATURAL_SORT_KEY(c) FROM t1 WHERE 0;
SHOW CREATE TABLE t2;
DROP TABLE t1,t2;
#Show encoding of numbers, including fractions, and leading whitespace.
SELECT RPAD(val,21,' ') value , RPAD(NATURAL_SORT_KEY(val),26,' ') sortkey , LENGTH(NATURAL_SORT_KEY(val)) - LENGTH(val) encoding_overhead
#Show encoding of numbers, with some leading whitespace.
SELECT RPAD(val,28,' ') value , RPAD(NATURAL_SORT_KEY(val),35,' ') sortkey , LENGTH(NATURAL_SORT_KEY(val)) - LENGTH(val) encoding_overhead
FROM
(
SELECT 0 val
UNION VALUES ('0.0'),('0.1'), ('0,15'),('1.001'),('1.002'),('1.010'),('1.02'),('1.1'),('1.3'),('1'),('01'),('0001')
UNION SELECT CONCAT('1',repeat('0',seq)) FROM seq_1_to_20
UNION VALUES ('1'),('01'),('0001')
UNION SELECT CONCAT('1',repeat('0',seq)) FROM seq_1_to_27
) AS numbers ORDER BY sortkey;
--echo # Disable fractions handling by passing NULL as second parameter to NATURAL_SORT_KEY
SELECT val value
FROM
(
SELECT 0 val
UNION VALUES ('0.1'),('1.001'),('1.002'),('1.010'),('1.02'),('1.1'),('1.3'),('1'), ('0,1'),('1,001'),('1,002'),('1,010'),('1,02'),('1,1'),('1,3'),('1'))
AS numbers ORDER BY NATURAL_SORT_KEY(val, NULL);
--echo # Use ',' as decimal separator for NATURAL_SORT_KEY
SELECT val value, NATURAL_SORT_KEY(val,',') sortkey
FROM
(
SELECT 0 val
UNION VALUES ('0,1'),('1,001'),('1,002'),('1,010'),('1,02'),('1,1'),('1,3'),('1'))
AS numbers ORDER BY sortkey;
--echo # Use '.' as decimal separator for NATURAL_SORT_KEY
SELECT val value,NATURAL_SORT_KEY(val,'.') sortkey
FROM
(
SELECT 0 val
UNION VALUES ('0.1'),('1.001'),('1.002'),('1.010'),('1.02'),('1.1'),('1.3'),('1'))
AS numbers ORDER BY sortkey;
SET NAMES DEFAULT;
......@@ -1622,11 +1622,10 @@ class Create_func_name_const : public Create_func_arg2
virtual ~Create_func_name_const() {}
};
class Create_func_natural_sort_key : public Create_native_func
class Create_func_natural_sort_key : public Create_func_arg1
{
public:
virtual Item *create_native(THD *thd, LEX_CSTRING *name,
List<Item> *item_list) override;
virtual Item *create_1_arg(THD *thd, Item *arg1) override;
static Create_func_natural_sort_key s_singleton;
protected:
Create_func_natural_sort_key() {}
......@@ -4664,33 +4663,9 @@ Create_func_md5::create_1_arg(THD *thd, Item *arg1)
Create_func_natural_sort_key Create_func_natural_sort_key::s_singleton;
Item *Create_func_natural_sort_key::create_native(THD *thd, LEX_CSTRING *name,
List<Item> *item_list)
Item *Create_func_natural_sort_key::create_1_arg(THD *thd, Item* arg1)
{
Item *func= NULL;
int arg_count= 0;
if (item_list != NULL)
arg_count= item_list->elements;
Item *param_1, *param_2;
switch (arg_count)
{
case 1:
param_1= item_list->pop();
func= new (thd->mem_root) Item_func_natural_sort_key(thd, param_1);
break;
case 2:
param_1= item_list->pop();
param_2= item_list->pop();
func= new (thd->mem_root) Item_func_natural_sort_key(thd, param_1, param_2);
break;
default:
my_error(ER_WRONG_PARAMCOUNT_TO_NATIVE_FCT, MYF(0), name->str);
break;
}
return func;
return new (thd->mem_root) Item_func_natural_sort_key(thd, arg1);
}
Create_func_monthname Create_func_monthname::s_singleton;
......
This diff is collapsed.
......@@ -273,17 +273,9 @@ class Item_func_aes_decrypt :public Item_aes_crypt
class Item_func_natural_sort_key : public Item_str_func
{
my_wc_t m_decimal_separator;
static constexpr my_wc_t DECIMAL_SEP_UNDEFINED=ULONG_MAX-1;
static constexpr my_wc_t DECIMAL_SEP_NONE= ULONG_MAX;
public:
Item_func_natural_sort_key(THD *thd, Item *a)
: Item_str_func(thd, a), m_decimal_separator(DECIMAL_SEP_NONE){}
Item_func_natural_sort_key(THD *thd, Item *a, Item *b)
: Item_str_func(thd, a, b), m_decimal_separator(DECIMAL_SEP_UNDEFINED){}
my_wc_t decimal_separator();
: Item_str_func(thd, a){};
String *val_str(String *) override;
LEX_CSTRING func_name_cstring() const override
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment