multi_range_read.h 22 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
   Copyright (c) 2009, 2011, Monty Program Ab

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

17 18 19
/**
  @defgroup DS-MRR declarations
  @{
20
*/
21 22

/**
23
  A Disk-Sweep implementation of MRR Interface (DS-MRR for short)
24

25 26 27 28 29 30 31 32
  This is a "plugin"(*) for storage engines that allows to
    1. When doing index scans, read table rows in rowid order;
    2. when making many index lookups, do them in key order and don't
       lookup the same key value multiple times;
    3. Do both #1 and #2, when applicable.
  These changes are expected to speed up query execution for disk-based 
  storage engines running io-bound loads and "big" queries (ie. queries that
  do joins and enumerate lots of records).
33 34 35 36 37 38 39 40

  (*) - only conceptually. No dynamic loading or binary compatibility of any
        kind.

  General scheme of things:
   
      SQL Layer code
       |   |   |
41 42
       v   v   v 
      -|---|---|---- handler->multi_range_read_XXX() function calls
43
       |   |   |
44 45 46 47 48 49 50 51
      _____________________________________
     / DS-MRR module                       \
     | (order/de-duplicate lookup keys,    |
     | scan indexes in key order,          |
     | order/de-duplicate rowids,          |
     | retrieve full record reads in rowid |
     | order)                              |
     \_____________________________________/
52 53 54 55 56 57
       |   |   |
      -|---|---|----- handler->read_range_first()/read_range_next(), 
       |   |   |      handler->index_read(), handler->rnd_pos() calls.
       |   |   |
       v   v   v
      Storage engine internals
58 59


60 61 62 63 64
  Currently DS-MRR is used by MyISAM, InnoDB/XtraDB and Maria storage engines.
  Potentially it can be used with any table handler that has disk-based data
  storage and has better performance when reading data in rowid order.
*/

65
#include "sql_lifo_buffer.h"
66

Sergey Petrunya's avatar
Sergey Petrunya committed
67
class DsMrr_impl;
Sergey Petrunya's avatar
Sergey Petrunya committed
68 69
class Mrr_ordered_index_reader;

Sergey Petrunya's avatar
Sergey Petrunya committed
70

Sergey Petrunya's avatar
Sergey Petrunya committed
71
/* A structure with key parameters that's shared among several classes */
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
class Key_parameters
{
public:
  uint         key_tuple_length; /* Length of index lookup tuple, in bytes */
  key_part_map key_tuple_map;    /* keyparts used in index lookup tuples */

  /*
    This is 
      = key_tuple_length   if we copy keys to buffer
      = sizeof(void*)      if we're using pointers to materialized keys.
  */
  uint key_size_in_keybuf;

  /* TRUE <=> don't copy key values, use pointers to them instead.  */
  bool use_key_pointers;
87 88 89

  /* TRUE <=> We can get at most one index tuple for a lookup key */
  bool index_ranges_unique;
90 91
};

Sergey Petrunya's avatar
Sergey Petrunya committed
92

Sergey Petrunya's avatar
Sergey Petrunya committed
93
/**
Sergey Petrunya's avatar
Sergey Petrunya committed
94
  A class to enumerate (record, range_id) pairs that match given key value.
Sergey Petrunya's avatar
Sergey Petrunya committed
95
  
Sergey Petrunya's avatar
Sergey Petrunya committed
96
  @note
Sergey Petrunya's avatar
Sergey Petrunya committed
97

Sergey Petrunya's avatar
Sergey Petrunya committed
98 99
  The idea is that we have a Lifo_buffer which holds (key, range_id) pairs
  ordered by key value. From the front of the buffer we see
Sergey Petrunya's avatar
Sergey Petrunya committed
100

Sergey Petrunya's avatar
Sergey Petrunya committed
101
    (key_val1, range_id1), (key_val1, range_id2) ... (key_val2, range_idN)
Sergey Petrunya's avatar
Sergey Petrunya committed
102

Sergey Petrunya's avatar
Sergey Petrunya committed
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
  we take the first elements that have the same key value (key_val1 in the
  example above), and make lookup into the table.  The table will have 
  multiple matches for key_val1:
 
                  == Table Index ==
                   ...
     key_val1 ->  key_val1, index_tuple1
                  key_val1, index_tuple2
                   ...
                  key_val1, index_tupleN
                   ...
  
  Our goal is to produce all possible combinations, i.e. we need:
  
    {(key_val1, index_tuple1), range_id1}
    {(key_val1, index_tuple1), range_id2}
       ...           ...               |
    {(key_val1, index_tuple1), range_idN},
                  
    {(key_val1, index_tuple2), range_id1}
    {(key_val1, index_tuple2), range_id2}
        ...          ...               |
    {(key_val1, index_tuple2), range_idN},

        ...          ...          ...                          

    {(key_val1, index_tupleK), range_idN}
Sergey Petrunya's avatar
Sergey Petrunya committed
130
*/
Sergey Petrunya's avatar
Sergey Petrunya committed
131

Sergey Petrunya's avatar
Sergey Petrunya committed
132 133
class Key_value_records_iterator
{
Sergey Petrunya's avatar
Sergey Petrunya committed
134 135 136
  /* Use this to get table handler, key buffer and other parameters */
  Mrr_ordered_index_reader *owner;

Sergey Petrunya's avatar
Sergey Petrunya committed
137 138 139 140 141 142 143
  /* Iterator to get (key, range_id) pairs from */
  Lifo_buffer_iterator identical_key_it;
  
  /* 
    Last of the identical key values (when we get this pointer from
    identical_key_it, it will be time to stop).
  */
Sergey Petrunya's avatar
Sergey Petrunya committed
144
  uchar *last_identical_key_ptr;
Sergey Petrunya's avatar
Sergey Petrunya committed
145 146 147

  /*
    FALSE <=> we're right after the init() call, the record has been already
148
    read with owner->file->index_read_map() call
Sergey Petrunya's avatar
Sergey Petrunya committed
149
  */
Sergey Petrunya's avatar
Sergey Petrunya committed
150
  bool get_next_row;
151
  
Sergey Petrunya's avatar
Sergey Petrunya committed
152
public:
Sergey Petrunya's avatar
Sergey Petrunya committed
153
  int init(Mrr_ordered_index_reader *owner_arg);
154
  int get_next(range_id_t *range_info);
155
  void move_to_next_key_value();
Sergey Petrunya's avatar
Sergey Petrunya committed
156 157 158
};


159
/*
Sergey Petrunya's avatar
Sergey Petrunya committed
160 161
  Buffer manager interface. Mrr_reader objects use it to inqure DsMrr_impl
  to manage buffer space for them.
162
*/
163
typedef struct st_buffer_manager
164 165
{
public:
166 167 168 169 170 171 172
  /* Opaque value to be passed as the first argument to all member functions */
  void *arg;
  
  /*
    This is called when we've freed more space from the rowid buffer. The
    callee will get the unused space from the rowid buffer and give it to the
    key buffer.
Sergey Petrunya's avatar
Sergey Petrunya committed
173
  */
174
  void (*redistribute_buffer_space)(void *arg);
Sergey Petrunya's avatar
Sergey Petrunya committed
175 176 177 178 179 180

  /* 
    This is called when both key and rowid buffers are empty, and so it's time 
    to reset them to their original size (They've lost their original size,
    because we were dynamically growing rowid buffer and shrinking key buffer).
  */
181
  void (*reset_buffer_sizes)(void *arg);
Sergey Petrunya's avatar
Sergey Petrunya committed
182

183
} Buffer_manager;
184 185 186


/* 
Sergey Petrunya's avatar
Sergey Petrunya committed
187
  Mrr_reader - DS-MRR execution strategy abstraction
188

Sergey Petrunya's avatar
Sergey Petrunya committed
189 190
  A reader produces ([index]_record, range_info) pairs, and requires periodic
  refill operations.
191

Sergey Petrunya's avatar
Sergey Petrunya committed
192 193 194 195 196
  - one starts using the reader by calling reader->get_next(),
  - when a get_next() call returns HA_ERR_END_OF_FILE, one must call 
    refill_buffer() before they can make more get_next() calls.
  - when refill_buffer() returns HA_ERR_END_OF_FILE, this means the real
    end of stream and get_next() should not be called anymore.
197

Sergey Petrunya's avatar
Sergey Petrunya committed
198 199
  Both functions can return other error codes, these mean unrecoverable errors
  after which one cannot continue.
200 201
*/

Sergey Petrunya's avatar
Sergey Petrunya committed
202
class Mrr_reader 
203 204
{
public:
205
  virtual int get_next(range_id_t *range_info) = 0;
206
  virtual int refill_buffer(bool initial) = 0;
Sergey Petrunya's avatar
Sergey Petrunya committed
207
  virtual ~Mrr_reader() {}; /* just to remove compiler warning */
208 209 210
};


Sergey Petrunya's avatar
Sergey Petrunya committed
211 212 213 214
/* 
  A common base for readers that do index scans and produce index tuples 
*/

Sergey Petrunya's avatar
Sergey Petrunya committed
215
class Mrr_index_reader : public Mrr_reader
216
{
Sergey Petrunya's avatar
Sergey Petrunya committed
217
protected:
218
  handler *file; /* Handler object to use */
219 220 221
public:
  virtual int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
                   void *seq_init_param, uint n_ranges,
222 223 224
                   uint mode, Key_parameters *key_par, 
                   Lifo_buffer *key_buffer, 
                   Buffer_manager *buf_manager_arg) = 0;
Sergey Petrunya's avatar
Sergey Petrunya committed
225 226

  /* Get pointer to place where every get_next() call will put rowid */
Sergey Petrunya's avatar
Sergey Petrunya committed
227
  virtual uchar *get_rowid_ptr() = 0;
Sergey Petrunya's avatar
Sergey Petrunya committed
228
  /* Get the rowid (call this after get_next() call) */
229
  virtual void position();
230
  virtual bool skip_record(range_id_t range_id, uchar *rowid) = 0;
231 232 233

  virtual void interrupt_read() {}
  virtual void resume_read() {}
234 235 236 237
};


/*
Sergey Petrunya's avatar
Sergey Petrunya committed
238 239 240
  A "bypass" index reader that just does and index scan. The index scan is done 
  by calling default MRR implementation (i.e.  handler::multi_range_read_XXX())
  functions.
241 242 243 244 245
*/

class Mrr_simple_index_reader : public Mrr_index_reader
{
public:
246
  int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs,
247
           void *seq_init_param, uint n_ranges,
248 249 250
           uint mode, Key_parameters *key_par,
           Lifo_buffer *key_buffer,
           Buffer_manager *buf_manager_arg);
251
  int get_next(range_id_t *range_info);
252
  int refill_buffer(bool initial) { return initial? 0: HA_ERR_END_OF_FILE; }
253
  uchar *get_rowid_ptr() { return file->ref; }
254
  bool skip_record(range_id_t range_id, uchar *rowid)
255
  {
256 257
    return (file->mrr_funcs.skip_record &&
            file->mrr_funcs.skip_record(file->mrr_iter, range_id, rowid));
258 259 260 261 262
  }
};


/* 
Sergey Petrunya's avatar
Sergey Petrunya committed
263
  A reader that sorts the key values before it makes the index lookups.
264 265 266 267 268 269 270
*/

class Mrr_ordered_index_reader : public Mrr_index_reader
{
public:
  int init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
           void *seq_init_param, uint n_ranges,
271 272 273
           uint mode, Key_parameters *key_par,
           Lifo_buffer *key_buffer,
           Buffer_manager *buf_manager_arg);
274
  int get_next(range_id_t *range_info);
275
  int refill_buffer(bool initial);
276
  uchar *get_rowid_ptr() { return file->ref; }
277
  
278
  bool skip_record(range_id_t range_info, uchar *rowid)
279 280 281 282
  {
    return (mrr_funcs.skip_record &&
            mrr_funcs.skip_record(mrr_iter, range_info, rowid));
  }
Sergey Petrunya's avatar
Sergey Petrunya committed
283

284
  bool skip_index_tuple(range_id_t range_info)
Sergey Petrunya's avatar
Sergey Petrunya committed
285 286 287 288
  {
    return (mrr_funcs.skip_index_tuple &&
            mrr_funcs.skip_index_tuple(mrr_iter, range_info));
  }
289
  
290 291
  bool set_interruption_temp_buffer(uint rowid_length, uint key_len, 
                                    uint saved_pk_len,
292 293
                                    uchar **space_start, uchar *space_end);
  void set_no_interruption_temp_buffer();
Sergey Petrunya's avatar
Sergey Petrunya committed
294

295 296 297
  void interrupt_read();
  void resume_read();
  void position();
298 299 300 301 302 303 304
private:
  Key_value_records_iterator kv_it;

  bool scanning_key_val_iter;
  
  /* Buffer to store (key, range_id) pairs */
  Lifo_buffer *key_buffer;
Sergey Petrunya's avatar
Sergey Petrunya committed
305 306
  
  /* This manages key buffer allocation and sizing for us */
307 308
  Buffer_manager *buf_manager;

Sergey Petrunya's avatar
Sergey Petrunya committed
309
  Key_parameters  keypar; /* index scan and lookup tuple parameters */
Sergey Petrunya's avatar
Sergey Petrunya committed
310

311 312
  /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
  bool is_mrr_assoc;
Sergey Petrunya's avatar
Sergey Petrunya committed
313 314
  
  /* Range sequence iteration members */
315 316
  RANGE_SEQ_IF mrr_funcs;
  range_seq_t mrr_iter;
317 318 319
  
  /* TRUE == reached eof when enumerating ranges */
  bool source_exhausted;
320
   
Sergey Petrunya's avatar
Sergey Petrunya committed
321
  /* 
322 323 324 325 326 327 328 329 330 331 332 333 334
    Following members are for interrupt_read()/resume_read(). The idea is that 
    in some cases index scan that is done by this object is interrupted by
    rnd_pos() calls made by Mrr_ordered_rndpos_reader. The problem is that
    we're sharing handler->record[0] with that object, and it destroys its
    contents.
    We need to save/restore our current
    - index tuple (for pushed index condition checks)
    - clustered primary key values (again, for pushed index condition checks)
    - rowid of the last record we've retrieved (in case this rowid matches
      multiple ranges and we'll need to return it again)
  */ 
  bool support_scan_interruptions;
  /* Space where we save the rowid of the last record we've returned */
335
  uchar *saved_rowid;
336
  
Sergey Petrunya's avatar
Sergey Petrunya committed
337
  /* TRUE <=> saved_rowid has the last saved rowid */
338
  bool have_saved_rowid;
339 340 341
  
  uchar *saved_key_tuple; /* Saved current key tuple */
  uchar *saved_primary_key; /* Saved current primary key tuple */
342

343 344
  static int compare_keys(void* arg, uchar* key1, uchar* key2);
  static int compare_keys_reverse(void* arg, uchar* key1, uchar* key2);
345 346 347 348 349 350 351
  
  friend class Key_value_records_iterator; 
  friend class DsMrr_impl;
  friend class Mrr_ordered_rndpos_reader;
};


Sergey Petrunya's avatar
Sergey Petrunya committed
352 353 354 355
/* 
  A reader that gets rowids from an Mrr_index_reader, and then sorts them 
  before getting full records with handler->rndpos() calls.
*/
356

Sergey Petrunya's avatar
Sergey Petrunya committed
357
class Mrr_ordered_rndpos_reader : public Mrr_reader 
358 359
{
public:
360
  int init(handler *file, Mrr_index_reader *index_reader, uint mode,
361
           Lifo_buffer *buf);
362
  int get_next(range_id_t *range_info);
363
  int refill_buffer(bool initial);
364
private:
365
  handler *file; /* Handler to use */
366 367 368
  
  /* This what we get (rowid, range_info) pairs from */
  Mrr_index_reader *index_reader;
Sergey Petrunya's avatar
Sergey Petrunya committed
369 370

  /* index_reader->get_next() puts rowid here */
371
  uchar *index_rowid;
Sergey Petrunya's avatar
Sergey Petrunya committed
372 373
  
  /* TRUE <=> index_reader->refill_buffer() call has returned EOF */
Sergey Petrunya's avatar
Sergey Petrunya committed
374
  bool index_reader_exhausted;
375
  
Sergey Petrunya's avatar
Sergey Petrunya committed
376 377 378 379 380 381
  /* 
    TRUE <=> We should call index_reader->refill_buffer(). This happens if
    1. we've made index_reader->get_next() call which returned EOF
    2. we haven't made any index_reader calls (and our first call should 
       be index_reader->refill_buffer(initial=TRUE)
  */
382
  bool index_reader_needs_refill;
Sergey Petrunya's avatar
Sergey Petrunya committed
383

384 385
  /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
  bool is_mrr_assoc;
Sergey Petrunya's avatar
Sergey Petrunya committed
386 387 388 389 390
  
  /* 
    When reading from ordered rowid buffer: the rowid element of the last
    buffer element that has rowid identical to this one.
  */
391
  uchar *last_identical_rowid;
Sergey Petrunya's avatar
Sergey Petrunya committed
392 393

  /* Buffer to store (rowid, range_id) pairs */
394 395
  Lifo_buffer *rowid_buffer;
  
396
  int refill_from_index_reader();
397 398
};

Sergey Petrunya's avatar
Sergey Petrunya committed
399

Sergey Petrunya's avatar
Sergey Petrunya committed
400 401 402 403 404
/*
  A primitive "factory" of various Mrr_*_reader classes (the point is to 
  get various kinds of readers without having to allocate them on the heap)
*/

Sergey Petrunya's avatar
Sergey Petrunya committed
405
class Mrr_reader_factory
406 407 408 409 410 411 412
{
public:
  Mrr_ordered_rndpos_reader ordered_rndpos_reader;
  Mrr_ordered_index_reader  ordered_index_reader;
  Mrr_simple_index_reader   simple_index_reader;
};

Sergey Petrunya's avatar
Sergey Petrunya committed
413

414 415 416
#define DSMRR_IMPL_SORT_KEYS   HA_MRR_IMPLEMENTATION_FLAG1
#define DSMRR_IMPL_SORT_ROWIDS HA_MRR_IMPLEMENTATION_FLAG2

417 418 419 420 421
/*
  DS-MRR implementation for one table. Create/use one object of this class for
  each ha_{myisam/innobase/etc} object. That object will be further referred to
  as "the handler"

Sergey Petrunya's avatar
Sergey Petrunya committed
422 423 424 425 426 427 428
  DsMrr_impl supports has the following execution strategies:

  - Bypass DS-MRR, pass all calls to default MRR implementation, which is 
    an MRR-to-non-MRR call converter.
  - Key-Ordered Retrieval
  - Rowid-Ordered Retrieval

429
  DsMrr_impl will use one of the above strategies, or a combination of them, 
Sergey Petrunya's avatar
Sergey Petrunya committed
430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
  according to the following diagram:

         (mrr function calls)
                |
                +----------------->-----------------+
                |                                   |
     ___________v______________      _______________v________________
    / default: use lookup keys \    / KEY-ORDERED RETRIEVAL:         \
    | (or ranges) in whatever  |    | sort lookup keys and then make | 
    | order they are supplied  |    | index lookups in index order   |
    \__________________________/    \________________________________/
              | |  |                           |    |
      +---<---+ |  +--------------->-----------|----+
      |         |                              |    |
      |         |              +---------------+    |
      |   ______v___ ______    |     _______________v_______________
      |  / default: read   \   |    / ROWID-ORDERED RETRIEVAL:      \
      |  | table records   |   |    | Before reading table records, |
      v  | in random order |   v    | sort their rowids and then    |
      |  \_________________/   |    | read them in rowid order      |
      |         |              |    \_______________________________/
      |         |              |                    |
      |         |              |                    |
      +-->---+  |  +----<------+-----------<--------+
             |  |  |                                
             v  v  v
      (table records and range_ids)

  The choice of strategy depends on MRR scan properties, table properties
459
  (whether we're scanning clustered primary key), and @@optimizer_switch
Sergey Petrunya's avatar
Sergey Petrunya committed
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
  settings.
  
  Key-Ordered Retrieval
  ---------------------
  The idea is: if MRR scan is essentially a series of lookups on 
   
    tbl.key=value1 OR tbl.key=value2 OR ... OR tbl.key=valueN
  
  then it makes sense to collect and order the set of lookup values, i.e.
   
     sort(value1, value2, .. valueN)

  and then do index lookups in index order. This results in fewer index page
  fetch operations, and we also can avoid making multiple index lookups for the
  same value. That is, if value1=valueN we can easily discover that after
  sorting and make one index lookup for them instead of two.

  Rowid-Ordered Retrieval
  -----------------------
  If we do a regular index scan or a series of index lookups, we'll be hitting
  table records at random. For disk-based engines, this is much slower than 
  reading the same records in disk order. We assume that disk ordering of
  rows is the same as ordering of their rowids (which is provided by 
  handler::cmp_ref())
  In order to retrieve records in different order, we must separate index
  scanning and record fetching, that is, MRR scan uses the following steps:
486 487

    1. Scan the index (and only index, that is, with HA_EXTRA_KEYREAD on) and 
Sergey Petrunya's avatar
Sergey Petrunya committed
488 489
        fill a buffer with {rowid, range_id} pairs
    2. Sort the buffer by rowid value
490 491 492 493
    3. for each {rowid, range_id} pair in the buffer
         get record by rowid and return the {record, range_id} pair
    4. Repeat the above steps until we've exhausted the list of ranges we're
       scanning.
494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542

  Buffer space management considerations
  --------------------------------------
  With regards to buffer/memory management, MRR interface specifies that 
   - SQL layer provides multi_range_read_init() with buffer of certain size.
   - MRR implementation may use (i.e. have at its disposal till the end of 
     the MRR scan) all of the buffer, or return the unused end of the buffer 
     to SQL layer.

  DS-MRR needs buffer in order to accumulate and sort rowids and/or keys. When
  we need to accumulate/sort only keys (or only rowids), it is fairly trivial.

  When we need to accumulate/sort both keys and rowids, efficient buffer use
  gets complicated. We need to:
   - First, accumulate keys and sort them
   - Then use the keys (smaller values go first) to obtain rowids. A key is not
     needed after we've got matching rowids for it.
   - Make sure that rowids are accumulated at the front of the buffer, so that we
     can return the end part of the buffer to SQL layer, should there be too
     few rowid values to occupy the buffer.

  All of these goals are achieved by using the following scheme:

     |                    |   We get an empty buffer from SQL layer.   

     |                  *-|    
     |               *----|   First, we fill the buffer with keys. Key_buffer
     |            *-------|   part grows from end of the buffer space to start
     |         *----------|   (In this picture, the buffer is big enough to
     |      *-------------|    accomodate all keys and even have some space left)

     |      *=============|   We want to do key-ordered index scan, so we sort
                              the keys

     |-x      *===========|   Then we use the keys get rowids. Rowids are 
     |----x      *========|   stored from start of buffer space towards the end.
     |--------x     *=====|   The part of the buffer occupied with keys
     |------------x   *===|   gradually frees up space for rowids. In this
     |--------------x   *=|   picture we run out of keys before we've ran out
     |----------------x   |   of buffer space (it can be other way as well).

     |================x   |   Then we sort the rowids.
                     
     |                |~~~|   The unused part of the buffer is at the end, so
                              we can return it to the SQL layer.

     |================*       Sorted rowids are then used to read table records 
                              in disk order

543 544
*/

545
class DsMrr_impl
546 547 548 549 550
{
public:
  typedef void (handler::*range_check_toggle_func_t)(bool on);

  DsMrr_impl()
551
    : secondary_file(NULL) {};
552
  
553 554
  void init(handler *h_arg, TABLE *table_arg)
  {
555
    primary_file= h_arg; 
556 557
    table= table_arg;
  }
558 559 560
  int dsmrr_init(handler *h_arg, RANGE_SEQ_IF *seq_funcs, 
                 void *seq_init_param, uint n_ranges, uint mode, 
                 HANDLER_BUFFER *buf);
561
  void dsmrr_close();
562
  int dsmrr_next(range_id_t *range_info);
563 564 565 566 567 568 569

  ha_rows dsmrr_info(uint keyno, uint n_ranges, uint keys, uint key_parts, 
                     uint *bufsz, uint *flags, COST_VECT *cost);

  ha_rows dsmrr_info_const(uint keyno, RANGE_SEQ_IF *seq, 
                            void *seq_init_param, uint n_ranges, uint *bufsz,
                            uint *flags, COST_VECT *cost);
570 571

  int dsmrr_explain_info(uint mrr_mode, char *str, size_t size);
572
private:
573 574 575
  /* Buffer to store (key, range_id) pairs */
  Lifo_buffer *key_buffer;

576
  /*
577 578
    The "owner" handler object (the one that is expected to "own" this object
    and call its functions).
579
  */
580 581
  handler *primary_file;
  TABLE *table; /* Always equal to primary_file->table */
582

583
  /*
584
    Secondary handler object. (created when needed, we need it when we need 
585
    to run both index scan and rnd_pos() scan at the same time)
586
  */
587
  handler *secondary_file;
588
  
589 590 591 592
  uint keyno; /* index we're running the scan on */
  /* TRUE <=> need range association, buffers hold {rowid, range_id} pairs */
  bool is_mrr_assoc;

Sergey Petrunya's avatar
Sergey Petrunya committed
593
  Mrr_reader_factory reader_factory;
Sergey Petrunya's avatar
Sergey Petrunya committed
594

Sergey Petrunya's avatar
Sergey Petrunya committed
595
  Mrr_reader *strategy;
Sergey Petrunya's avatar
Sergey Petrunya committed
596 597
  bool strategy_exhausted;

598
  Mrr_index_reader *index_strategy;
599 600

  /* The whole buffer space that we're using */
601 602
  uchar *full_buf;
  uchar *full_buf_end;
603
  
604
  /* 
605
    When using both rowid and key buffers: the boundary between key and rowid
606 607 608 609
    parts of the buffer. This is the "original" value, actual memory ranges 
    used by key and rowid parts may be different because of dynamic space 
    reallocation between them.
  */
610
  uchar *rowid_buffer_end;
611
 
612 613 614 615 616 617 618 619
  /*
    One of the following two is used for key buffer: forward is used when 
    we only need key buffer, backward is used when we need both key and rowid
    buffers.
  */
  Forward_lifo_buffer forward_key_buf;
  Backward_lifo_buffer backward_key_buf;

620 621 622 623
  /*
    Buffer to store (rowid, range_id) pairs, or just rowids if 
    is_mrr_assoc==FALSE
  */
Sergey Petrunya's avatar
Sergey Petrunya committed
624
  Forward_lifo_buffer rowid_buffer;
625
  
626 627 628 629
  bool choose_mrr_impl(uint keyno, ha_rows rows, uint *flags, uint *bufsz, 
                       COST_VECT *cost);
  bool get_disk_sweep_mrr_cost(uint keynr, ha_rows rows, uint flags, 
                               uint *buffer_size, COST_VECT *cost);
630
  bool check_cpk_scan(THD *thd, uint keyno, uint mrr_flags);
631

632 633 634 635 636 637 638 639
  bool setup_buffer_sharing(uint key_size_in_keybuf, key_part_map key_tuple_map);

  /* Buffer_manager and its member functions */
  Buffer_manager buf_manager;
  static void redistribute_buffer_space(void *dsmrr_arg);
  static void reset_buffer_sizes(void *dsmrr_arg);
  static void do_nothing(void *dsmrr_arg);

640 641
  Lifo_buffer* get_key_buffer() { return key_buffer; }

Sergey Petrunya's avatar
Sergey Petrunya committed
642
  friend class Key_value_records_iterator;
643 644 645 646 647
  friend class Mrr_ordered_index_reader;
  friend class Mrr_ordered_rndpos_reader;

  int  setup_two_handlers();
  void close_second_handler();
648 649
};

650 651 652
/**
  @} (end of group DS-MRR declarations)
*/
653