Commit b28118ab authored by unknown's avatar unknown

Fix for BUG#5117:

 * Renamed handler::estimate_number_of_rows to handler::estimate_rows_upper_bound function, which can also return "unknown"
 * made filesort to use full sort buffer if number of rows to sort is not known.


sql/examples/ha_tina.h:
  Renamed estimate_number_of_rows -> estimate_rows_upper_bound
sql/filesort.cc:
   * Fix for BUG#5117: made filesort() to use full sort buffer if number of rows to sort is not known.
   * Comments and assertion added
sql/ha_berkeley.cc:
  Renamed estimate_number_of_rows -> estimate_rows_upper_bound
sql/ha_berkeley.h:
  Renamed estimate_number_of_rows -> estimate_rows_upper_bound
sql/ha_innodb.cc:
  Renamed estimate_number_of_rows -> estimate_rows_upper_bound
sql/ha_innodb.h:
  Renamed estimate_number_of_rows -> estimate_rows_upper_bound
sql/handler.h:
  Renamed estimate_number_of_rows -> estimate_rows_upper_bound
parent 3f70b62c
......@@ -90,6 +90,12 @@ class ha_tina: public handler
/* The next method will never be called */
virtual double read_time(ha_rows rows) { DBUG_ASSERT(0); return((double) rows / 20.0+1); }
virtual bool fast_key_read() { return 1;}
/*
TODO: return actual upper bound of number of records in the table.
(e.g. save number of records seen on full table scan and/or use file size
as upper bound)
*/
ha_rows estimate_rows_upper_bound() { return HA_POS_ERROR; }
int open(const char *name, int mode, uint test_if_locked);
int close(void);
......
......@@ -169,7 +169,13 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length,
else
#endif
{
records=table->file->estimate_number_of_rows();
records= table->file->estimate_rows_upper_bound();
/*
If number of records is not known, use as much of sort buffer
as possible.
*/
if (records == HA_POS_ERROR)
records--; // we use 'records+1' below.
selected_records_file= 0;
}
......@@ -315,7 +321,7 @@ static char **make_char_array(register uint fields, uint length, myf my_flag)
} /* make_char_array */
/* Read all buffer pointers into memory */
/* Read 'count' number of buffer pointers into memory */
static BUFFPEK *read_buffpek_from_file(IO_CACHE *buffpek_pointers, uint count)
{
......@@ -336,8 +342,40 @@ static BUFFPEK *read_buffpek_from_file(IO_CACHE *buffpek_pointers, uint count)
}
/* Search after sort_keys and place them in a temp. file */
/*
Search after sort_keys and write them into tempfile.
SYNOPSIS
find_all_keys()
param Sorting parameter
select Use this to get source data
sort_keys Array of pointers to sort key + addon buffers.
buffpek_pointers File to write BUFFPEKs describing sorted segments
in tempfile.
tempfile File to write sorted sequences of sortkeys to.
indexfile If !NULL, use it for source data (contains rowids)
NOTE
Basic idea:
while (get_next_sortkey())
{
if (no free space in sort_keys buffers)
{
sort sort_keys buffer;
dump sorted sequence to 'tempfile';
dump BUFFPEK describing sequence location into 'buffpek_pointers';
}
put sort key into 'sort_keys';
}
if (sort_keys has some elements && dumped at least once)
sort-dump-dump as above;
else
don't sort, leave sort_keys array to be sorted by caller.
All produced sequences are guaranteed to be non-empty.
RETURN
Number of records written on success.
HA_POS_ERROR on error.
*/
static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
uchar **sort_keys,
......@@ -452,7 +490,25 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select,
} /* find_all_keys */
/* Skriver en buffert med nycklar till filen */
/*
Sort the buffer and write:
1) the sorted sequence to tempfile
2) a BUFFPEK describing the sorted sequence position to buffpek_pointers
(was: Skriver en buffert med nycklar till filen)
SYNOPSIS
write_keys()
param Sort parameters
sort_keys Array of pointers to keys to sort
count Number of elements in sort_keys array
buffpek_pointers One 'BUFFPEK' struct will be written into this file.
The BUFFPEK::{file_pos, count} will indicate where
the sorted data was stored.
tempfile The sorted sequence will be written into this file.
RETURN
0 OK
1 Error
*/
static int
write_keys(SORTPARAM *param, register uchar **sort_keys, uint count,
......@@ -784,7 +840,21 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek,
/*
Merge buffers to one buffer
Merge buffers to one buffer
SYNOPSIS
merge_buffers()
param Sort parameter
from_file File with source data (BUFFPEKs point to this file)
to_file File to write the sorted result data.
sort_buffer Buffer for data to store up to MERGEBUFF2 sort keys.
lastbuff OUT Store here BUFFPEK describing data written to to_file
Fb First element in source BUFFPEKs array
Tb Last element in source BUFFPEKs array
flag
RETURN
0 - OK
other - error
*/
int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
......@@ -822,6 +892,9 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file,
strpos= (uchar*) sort_buffer;
org_max_rows=max_rows= param->max_rows;
/* The following will fire if there is not enough space in sort_buffer */
DBUG_ASSERT(maxcount!=0);
if (init_queue(&queue, (uint) (Tb-Fb)+1, offsetof(BUFFPEK,key), 0,
(queue_compare) (cmp= get_ptr_compare(sort_length)),
(void*) &sort_length))
......
......@@ -25,7 +25,7 @@
We will need an updated Berkeley DB version for this.
- Killing threads that has got a 'deadlock'
- SHOW TABLE STATUS should give more information about the table.
- Get a more accurate count of the number of rows (estimate_number_of_rows()).
- Get a more accurate count of the number of rows (estimate_rows_upper_bound()).
We could store the found number of rows when the table is scanned and
then increment the counter for each attempted write.
- We will need to extend the manager thread to makes checkpoints at
......@@ -63,7 +63,7 @@
#define HA_BERKELEY_ROWS_IN_TABLE 10000 /* to get optimization right */
#define HA_BERKELEY_RANGE_COUNT 100
#define HA_BERKELEY_MAX_ROWS 10000000 /* Max rows in table */
/* extra rows for estimate_number_of_rows() */
/* extra rows for estimate_rows_upper_bound() */
#define HA_BERKELEY_EXTRA_ROWS 100
/* Bits for share->status */
......@@ -2556,7 +2556,7 @@ end:
Used when sorting to allocate buffers and by the optimizer.
*/
ha_rows ha_berkeley::estimate_number_of_rows()
ha_rows ha_berkeley::estimate_rows_upper_bound()
{
return share->rows + HA_BERKELEY_EXTRA_ROWS;
}
......
......@@ -100,7 +100,7 @@ class ha_berkeley: public handler
ulong table_flags(void) const { return int_table_flags; }
uint max_supported_keys() const { return MAX_KEY-1; }
uint extra_rec_buf_length() { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; }
ha_rows estimate_number_of_rows();
ha_rows estimate_rows_upper_bound();
const key_map *keys_to_use_for_scanning() { return &key_map_full; }
bool has_transactions() { return 1;}
......
......@@ -4115,7 +4115,7 @@ Gives an UPPER BOUND to the number of rows in a table. This is used in
filesort.cc. */
ha_rows
ha_innobase::estimate_number_of_rows(void)
ha_innobase::estimate_rows_upper_bound(void)
/*======================================*/
/* out: upper bound of rows */
{
......@@ -4124,7 +4124,7 @@ ha_innobase::estimate_number_of_rows(void)
ulonglong estimate;
ulonglong local_data_file_length;
DBUG_ENTER("estimate_number_of_rows");
DBUG_ENTER("estimate_rows_upper_bound");
/* We do not know if MySQL can call this function before calling
external_lock(). To be safe, update the thd of the current table
......@@ -4204,7 +4204,7 @@ ha_innobase::read_time(
time_for_scan = scan_time();
if ((total_rows = estimate_number_of_rows()) < rows)
if ((total_rows = estimate_rows_upper_bound()) < rows)
return time_for_scan;
return (ranges + (double) rows / (double) total_rows * time_for_scan);
......
......@@ -150,7 +150,7 @@ class ha_innobase: public handler
void position(byte *record);
ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key);
ha_rows estimate_number_of_rows();
ha_rows estimate_rows_upper_bound();
int create(const char *name, register TABLE *form,
HA_CREATE_INFO *create_info);
......
......@@ -300,7 +300,15 @@ public:
virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; }
virtual bool has_transactions(){ return 0;}
virtual uint extra_rec_buf_length() { return 0; }
virtual ha_rows estimate_number_of_rows() { return records+EXTRA_RECORDS; }
/*
Return upper bound of current number of records in the table
(max. of how many records one will retrieve when doing a full table scan)
If upper bound is not known, HA_POS_ERROR should be returned as a max
possible upper bound.
*/
virtual ha_rows estimate_rows_upper_bound()
{ return records+EXTRA_RECORDS; }
virtual const char *index_type(uint key_number) { DBUG_ASSERT(0); return "";}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment