page0page.c 50.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/******************************************************
Index page routines

(c) 1994-1996 Innobase Oy

Created 2/2/1994 Heikki Tuuri
*******************************************************/

#define THIS_MODULE
#include "page0page.h"
#ifdef UNIV_NONINL
#include "page0page.ic"
#endif
#undef THIS_MODULE

#include "page0cur.h"
#include "lock0lock.h"
#include "fut0lst.h"
#include "btr0sea.h"
unknown's avatar
unknown committed
20
#include "buf0buf.h"
unknown's avatar
unknown committed
21 22
#include "srv0srv.h"
#include "btr0btr.h"
23 24 25

/*			THE INDEX PAGE
			==============
26

27 28 29 30 31 32 33 34 35
The index page consists of a page header which contains the page's
id and other information. On top of it are the the index records
in a heap linked into a one way linear list according to alphabetic order.

Just below page end is an array of pointers which we call page directory,
to about every sixth record in the list. The pointers are placed in
the directory in the alphabetical order of the records pointed to,
enabling us to make binary search using the array. Each slot n:o I
in the directory points to a record, where a 4-bit field contains a count
36
of those records which are in the linear list between pointer I and
37 38 39 40
the pointer I - 1 in the directory, including the record
pointed to by pointer I and not including the record pointed to by I - 1.
We say that the record pointed to by slot I, or that slot I, owns
these records. The count is always kept in the range 4 to 8, with
41 42
the exception that it is 1 for the first slot, and 1--8 for the second slot.

43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
An essentially binary search can be performed in the list of index
records, like we could do if we had pointer to every record in the
page directory. The data structure is, however, more efficient when
we are doing inserts, because most inserts are just pushed on a heap.
Only every 8th insert requires block move in the directory pointer
table, which itself is quite small. A record is deleted from the page
by just taking it off the linear list and updating the number of owned
records-field of the record which owns it, and updating the page directory,
if necessary. A special case is the one when the record owns itself.
Because the overhead of inserts is so small, we may also increase the
page size from the projected default of 8 kB to 64 kB without too
much loss of efficiency in inserts. Bigger page becomes actual
when the disk transfer rate compared to seek and latency time rises.
On the present system, the page size is set so that the page transfer
time (3 ms) is 20 % of the disk random access time (15 ms).

When the page is split, merged, or becomes full but contains deleted
records, we have to reorganize the page.

Assuming a page size of 8 kB, a typical index page of a secondary
index contains 300 index entries, and the size of the page directory
is 50 x 4 bytes = 200 bytes. */

unknown's avatar
unknown committed
66 67 68 69 70 71 72 73 74
/*******************************************************************
Looks for the directory slot which owns the given record. */

ulint
page_dir_find_owner_slot(
/*=====================*/
			/* out: the directory slot number */
	rec_t*	rec)	/* in: the physical record */
{
75 76 77 78 79 80
	page_t*				page;
	register uint16			rec_offs_bytes;
	register page_dir_slot_t*	slot;
	register const page_dir_slot_t*	first_slot;
	register rec_t*			r = rec;

unknown's avatar
unknown committed
81 82
	ut_ad(page_rec_check(rec));

unknown's avatar
unknown committed
83
	page = buf_frame_align(rec);
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
	first_slot = page_dir_get_nth_slot(page, 0);
	slot = page_dir_get_nth_slot(page, page_dir_get_n_slots(page) - 1);

	if (page_is_comp(page)) {
		while (rec_get_n_owned(r, TRUE) == 0) {
			r = page + rec_get_next_offs(r, TRUE);
			ut_ad(r >= page + PAGE_NEW_SUPREMUM);
			ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
		}
	} else {
		while (rec_get_n_owned(r, FALSE) == 0) {
			r = page + rec_get_next_offs(r, FALSE);
			ut_ad(r >= page + PAGE_OLD_SUPREMUM);
			ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR));
		}
unknown's avatar
unknown committed
99 100
	}

101
	rec_offs_bytes = mach_encode_2(r - page);
unknown's avatar
unknown committed
102

103
	while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) {
unknown's avatar
unknown committed
104

105
		if (UNIV_UNLIKELY(slot == first_slot)) {
unknown's avatar
unknown committed
106
			fprintf(stderr,
unknown's avatar
unknown committed
107 108 109 110
				"InnoDB: Probable data corruption on"
				" page %lu\n"
				"InnoDB: Original record ",
				(ulong) buf_frame_get_page_no(page));
unknown's avatar
unknown committed
111

112
			if (page_is_comp(page)) {
unknown's avatar
unknown committed
113
				fputs("(compact record)", stderr);
unknown's avatar
unknown committed
114
			} else {
115
				rec_print_old(stderr, rec);
unknown's avatar
unknown committed
116
			}
unknown's avatar
unknown committed
117

118
			fputs("\n"
unknown's avatar
unknown committed
119 120 121
			      "InnoDB: on that page.\n"
			      "InnoDB: Cannot find the dir slot for record ",
			      stderr);
122
			if (page_is_comp(page)) {
unknown's avatar
unknown committed
123 124
				fputs("(compact record)", stderr);
			} else {
125
				rec_print_old(stderr, page
unknown's avatar
unknown committed
126
					      + mach_decode_2(rec_offs_bytes));
unknown's avatar
unknown committed
127
			}
128
			fputs("\n"
unknown's avatar
unknown committed
129
			      "InnoDB: on that page!\n", stderr);
unknown's avatar
unknown committed
130 131 132

			buf_page_print(page);

133 134
			ut_error;
		}
unknown's avatar
unknown committed
135

136
		slot += PAGE_DIR_SLOT_SIZE;
unknown's avatar
unknown committed
137 138
	}

139
	return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE);
unknown's avatar
unknown committed
140 141
}

unknown's avatar
Merge  
unknown committed
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
/******************************************************************
Used to check the consistency of a directory slot. */
static
ibool
page_dir_slot_check(
/*================*/
					/* out: TRUE if succeed */
	page_dir_slot_t*	slot)	/* in: slot */
{
	page_t*	page;
	ulint	n_slots;
	ulint	n_owned;

	ut_a(slot);

	page = buf_frame_align(slot);

unknown's avatar
unknown committed
159
	n_slots = page_dir_get_n_slots(page);
unknown's avatar
Merge  
unknown committed
160 161 162 163

	ut_a(slot <= page_dir_get_nth_slot(page, 0));
	ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1));

unknown's avatar
unknown committed
164
	ut_a(page_rec_check(page_dir_slot_get_rec(slot)));
unknown's avatar
Merge  
unknown committed
165

unknown's avatar
unknown committed
166
	n_owned = rec_get_n_owned(page_dir_slot_get_rec(slot),
unknown's avatar
unknown committed
167
				  page_is_comp(page));
unknown's avatar
Merge  
unknown committed
168 169 170 171 172 173 174 175 176 177 178 179 180 181

	if (slot == page_dir_get_nth_slot(page, 0)) {
		ut_a(n_owned == 1);
	} else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) {
		ut_a(n_owned >= 1);
		ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
	} else {
		ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED);
		ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED);
	}

	return(TRUE);
}

182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
/*****************************************************************
Sets the max trx id field value. */

void
page_set_max_trx_id(
/*================*/
	page_t*	page,	/* in: page */
	dulint	trx_id)	/* in: transaction id */
{
	buf_block_t*	block;

	ut_ad(page);

	block = buf_block_align(page);

	if (block->is_hashed) {
		rw_lock_x_lock(&btr_search_latch);
	}

	/* It is not necessary to write this change to the redo log, as
	during a database recovery we assume that the max trx id of every
	page is the maximum trx id assigned before the crash. */
204

205 206 207 208 209 210 211 212 213 214 215 216 217
	mach_write_to_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID, trx_id);

	if (block->is_hashed) {
		rw_lock_x_unlock(&btr_search_latch);
	}
}

/****************************************************************
Allocates a block of memory from an index page. */

byte*
page_mem_alloc(
/*===========*/
unknown's avatar
unknown committed
218 219 220 221 222 223 224 225
				/* out: pointer to start of allocated
				buffer, or NULL if allocation fails */
	page_t*		page,	/* in: index page */
	ulint		need,	/* in: number of bytes needed */
	dict_index_t*	index,	/* in: record descriptor */
	ulint*		heap_no)/* out: this contains the heap number
				of the allocated record
				if allocation succeeds */
226 227 228 229 230
{
	rec_t*	rec;
	byte*	block;
	ulint	avl_space;
	ulint	garbage;
231

232 233 234 235 236 237 238
	ut_ad(page && heap_no);

	/* If there are records in the free list, look if the first is
	big enough */

	rec = page_header_get_ptr(page, PAGE_FREE);

unknown's avatar
unknown committed
239
	if (rec) {
240
		mem_heap_t*	heap		= NULL;
241
		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
242
		ulint*		offsets		= offsets_;
243
		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
244 245

		offsets = rec_get_offsets(rec, index, offsets,
unknown's avatar
unknown committed
246
					  ULINT_UNDEFINED, &heap);
unknown's avatar
unknown committed
247 248 249

		if (rec_offs_size(offsets) >= need) {
			page_header_set_ptr(page, PAGE_FREE,
unknown's avatar
unknown committed
250
					    page_rec_get_next(rec));
251

unknown's avatar
unknown committed
252 253
			garbage = page_header_get_field(page, PAGE_GARBAGE);
			ut_ad(garbage >= need);
254

unknown's avatar
unknown committed
255
			page_header_set_field(page, PAGE_GARBAGE,
unknown's avatar
unknown committed
256
					      garbage - need);
257

unknown's avatar
unknown committed
258
			*heap_no = rec_get_heap_no(rec, page_is_comp(page));
259

unknown's avatar
unknown committed
260
			block = rec_get_start(rec, offsets);
261
			if (UNIV_LIKELY_NULL(heap)) {
262 263
				mem_heap_free(heap);
			}
unknown's avatar
unknown committed
264 265
			return(block);
		}
266

267
		if (UNIV_LIKELY_NULL(heap)) {
268 269
			mem_heap_free(heap);
		}
270 271 272
	}

	/* Could not find space from the free list, try top of heap */
273

274
	avl_space = page_get_max_insert_size(page, 1);
275

276 277 278 279
	if (avl_space >= need) {
		block = page_header_get_ptr(page, PAGE_HEAP_TOP);

		page_header_set_ptr(page, PAGE_HEAP_TOP, block + need);
unknown's avatar
unknown committed
280
		*heap_no = page_dir_get_n_heap(page);
281

unknown's avatar
unknown committed
282
		page_dir_set_n_heap(page, 1 + *heap_no);
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297

		return(block);
	}

	return(NULL);
}

/**************************************************************
Writes a log record of page creation. */
UNIV_INLINE
void
page_create_write_log(
/*==================*/
	buf_frame_t*	frame,	/* in: a buffer frame where the page is
				created */
unknown's avatar
unknown committed
298
	mtr_t*		mtr,	/* in: mini-transaction handle */
299
	ulint		comp)	/* in: nonzero=compact page format */
300
{
unknown's avatar
unknown committed
301 302 303
	mlog_write_initial_log_record(frame, comp
				      ? MLOG_COMP_PAGE_CREATE
				      : MLOG_PAGE_CREATE, mtr);
304 305 306 307 308 309 310 311 312 313
}

/***************************************************************
Parses a redo log record of creating a page. */

byte*
page_parse_create(
/*==============*/
			/* out: end of log record or NULL */
	byte*	ptr,	/* in: buffer */
unknown's avatar
unknown committed
314
	byte*	end_ptr __attribute__((unused)), /* in: buffer end */
315
	ulint	comp,	/* in: nonzero=compact page format */
316 317 318 319 320 321 322 323
	page_t*	page,	/* in: page or NULL */
	mtr_t*	mtr)	/* in: mtr or NULL */
{
	ut_ad(ptr && end_ptr);

	/* The record is empty, except for the record initial part */

	if (page) {
unknown's avatar
unknown committed
324
		page_create(page, mtr, comp);
325 326 327 328 329 330 331 332
	}

	return(ptr);
}

/**************************************************************
The index page creation function. */

333
page_t*
334 335 336 337 338
page_create(
/*========*/
				/* out: pointer to the page */
	buf_frame_t*	frame,	/* in: a buffer frame where the page is
				created */
unknown's avatar
unknown committed
339
	mtr_t*		mtr,	/* in: mini-transaction handle */
340
	ulint		comp)	/* in: nonzero=compact page format */
341 342 343
{
	page_dir_slot_t* slot;
	mem_heap_t*	heap;
344
	dtuple_t*	tuple;
345 346 347 348 349
	dfield_t*	field;
	byte*		heap_top;
	rec_t*		infimum_rec;
	rec_t*		supremum_rec;
	page_t*		page;
unknown's avatar
unknown committed
350 351 352 353
	dict_index_t*	index;
	ulint*		offsets;

	index = comp ? srv_sys->dummy_ind2 : srv_sys->dummy_ind1;
354

355
	ut_ad(frame && mtr);
356 357 358 359 360 361
#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA
# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA"
#endif
#if PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA
# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA"
#endif
362 363 364 365 366

	/* 1. INCREMENT MODIFY CLOCK */
	buf_frame_modify_clock_inc(frame);

	/* 2. WRITE LOG INFORMATION */
unknown's avatar
unknown committed
367
	page_create_write_log(frame, mtr, comp);
368

369 370 371 372 373
	page = frame;

	fil_page_set_type(page, FIL_PAGE_INDEX);

	heap = mem_heap_create(200);
374

375 376 377 378
	/* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */

	/* Create first a data tuple for infimum record */
	tuple = dtuple_create(heap, 1);
unknown's avatar
unknown committed
379
	dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM);
380 381
	field = dtuple_get_nth_field(tuple, 0);

unknown's avatar
unknown committed
382 383
	dfield_set_data(field, "infimum", 8);
	dtype_set(dfield_get_type(field),
unknown's avatar
unknown committed
384
		  DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8, 0);
385 386 387 388
	/* Set the corresponding physical record to its place in the page
	record heap */

	heap_top = page + PAGE_DATA;
389

unknown's avatar
unknown committed
390 391
	infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple);

unknown's avatar
unknown committed
392 393
	ut_a(infimum_rec == page
	     + (comp ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
unknown's avatar
unknown committed
394 395 396

	rec_set_n_owned(infimum_rec, comp, 1);
	rec_set_heap_no(infimum_rec, comp, 0);
397
	offsets = rec_get_offsets(infimum_rec, index, NULL,
unknown's avatar
unknown committed
398
				  ULINT_UNDEFINED, &heap);
unknown's avatar
unknown committed
399 400

	heap_top = rec_get_end(infimum_rec, offsets);
401 402 403 404

	/* Create then a tuple for supremum */

	tuple = dtuple_create(heap, 1);
unknown's avatar
unknown committed
405
	dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM);
406 407
	field = dtuple_get_nth_field(tuple, 0);

408
	dfield_set_data(field, "supremum", comp ? 8 : 9);
unknown's avatar
unknown committed
409
	dtype_set(dfield_get_type(field),
unknown's avatar
unknown committed
410
		  DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9, 0);
411

unknown's avatar
unknown committed
412
	supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple);
413

unknown's avatar
unknown committed
414 415
	ut_a(supremum_rec == page
	     + (comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM));
416

unknown's avatar
unknown committed
417 418
	rec_set_n_owned(supremum_rec, comp, 1);
	rec_set_heap_no(supremum_rec, comp, 1);
419

420
	offsets = rec_get_offsets(supremum_rec, index, offsets,
unknown's avatar
unknown committed
421
				  ULINT_UNDEFINED, &heap);
unknown's avatar
unknown committed
422 423
	heap_top = rec_get_end(supremum_rec, offsets);

unknown's avatar
unknown committed
424 425
	ut_ad(heap_top == page
	      + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END));
426 427 428

	mem_heap_free(heap);

429
	/* 4. INITIALIZE THE PAGE */
430 431 432

	page_header_set_field(page, PAGE_N_DIR_SLOTS, 2);
	page_header_set_ptr(page, PAGE_HEAP_TOP, heap_top);
unknown's avatar
unknown committed
433
	page_header_set_field(page, PAGE_N_HEAP, comp ? 0x8002 : 2);
434 435 436
	page_header_set_ptr(page, PAGE_FREE, NULL);
	page_header_set_field(page, PAGE_GARBAGE, 0);
	page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);
unknown's avatar
unknown committed
437 438
	page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION);
	page_header_set_field(page, PAGE_N_DIRECTION, 0);
439 440
	page_header_set_field(page, PAGE_N_RECS, 0);
	page_set_max_trx_id(page, ut_dulint_zero);
441
	memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START
unknown's avatar
unknown committed
442
	       - (heap_top - page));
443

444 445 446 447 448 449 450 451 452 453
	/* 5. SET POINTERS IN RECORDS AND DIR SLOTS */

	/* Set the slots to point to infimum and supremum. */

	slot = page_dir_get_nth_slot(page, 0);
	page_dir_slot_set_rec(slot, infimum_rec);

	slot = page_dir_get_nth_slot(page, 1);
	page_dir_slot_set_rec(slot, supremum_rec);

unknown's avatar
unknown committed
454
	/* Set the next pointers in infimum and supremum */
455

unknown's avatar
unknown committed
456 457
	rec_set_next_offs(infimum_rec, comp, (ulint)(supremum_rec - page));
	rec_set_next_offs(supremum_rec, comp, 0);
458 459 460 461 462 463 464 465 466 467 468

	return(page);
}

/*****************************************************************
Differs from page_copy_rec_list_end, because this function does not
touch the lock table and max trx id on page. */

void
page_copy_rec_list_end_no_locks(
/*============================*/
unknown's avatar
unknown committed
469 470 471 472 473
	page_t*		new_page,	/* in: index page to copy to */
	page_t*		page,		/* in: index page */
	rec_t*		rec,		/* in: record on page */
	dict_index_t*	index,		/* in: record descriptor */
	mtr_t*		mtr)		/* in: mtr */
474 475 476 477
{
	page_cur_t	cur1;
	page_cur_t	cur2;
	rec_t*		sup;
478
	mem_heap_t*	heap		= NULL;
479
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
480
	ulint*		offsets		= offsets_;
481
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
482 483 484 485 486 487 488

	page_cur_position(rec, &cur1);

	if (page_cur_is_before_first(&cur1)) {

		page_cur_move_to_next(&cur1);
	}
unknown's avatar
unknown committed
489

490
	ut_a((ibool)!!page_is_comp(new_page)
unknown's avatar
unknown committed
491
	     == dict_table_is_comp(index->table));
492
	ut_a(page_is_comp(new_page) == page_is_comp(page));
unknown's avatar
unknown committed
493
	ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint)
unknown's avatar
unknown committed
494 495
	     (page_is_comp(new_page)
	      ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM));
unknown's avatar
unknown committed
496

497
	page_cur_set_before_first(new_page, &cur2);
498 499

	/* Copy records from the original page to the new page */
500 501

	sup = page_get_supremum_rec(page);
502

503
	for (;;) {
504
		rec_t*	cur1_rec = page_cur_get_rec(&cur1);
505 506 507
		if (cur1_rec == sup) {
			break;
		}
508
		offsets = rec_get_offsets(cur1_rec, index, offsets,
unknown's avatar
unknown committed
509
					  ULINT_UNDEFINED, &heap);
510
		if (UNIV_UNLIKELY(!page_cur_rec_insert(&cur2, cur1_rec, index,
unknown's avatar
unknown committed
511
						       offsets, mtr))) {
unknown's avatar
unknown committed
512 513 514
			/* Track an assertion failure reported on the mailing
			list on June 18th, 2003 */

515 516
			buf_page_print(new_page);
			buf_page_print(page);
unknown's avatar
unknown committed
517 518 519
			ut_print_timestamp(stderr);

			fprintf(stderr,
unknown's avatar
unknown committed
520 521
				"InnoDB: rec offset %lu, cur1 offset %lu,"
				" cur2 offset %lu\n",
522 523 524 525
				(ulong)(rec - page),
				(ulong)(page_cur_get_rec(&cur1) - page),
				(ulong)(page_cur_get_rec(&cur2) - new_page));

526
			ut_error;
unknown's avatar
unknown committed
527
		}
528 529 530 531

		page_cur_move_to_next(&cur1);
		page_cur_move_to_next(&cur2);
	}
532

533
	if (UNIV_LIKELY_NULL(heap)) {
534 535 536
		mem_heap_free(heap);
	}
}
537 538 539 540 541 542 543 544 545

/*****************************************************************
Copies records from page to new_page, from a given record onward,
including that record. Infimum and supremum records are not copied.
The records are copied to the start of the record list on new_page. */

void
page_copy_rec_list_end(
/*===================*/
unknown's avatar
unknown committed
546 547 548 549 550
	page_t*		new_page,	/* in: index page to copy to */
	page_t*		page,		/* in: index page */
	rec_t*		rec,		/* in: record on page */
	dict_index_t*	index,		/* in: record descriptor */
	mtr_t*		mtr)		/* in: mtr */
551
{
unknown's avatar
unknown committed
552
	if (page_dir_get_n_heap(new_page) == 2) {
553
		page_copy_rec_list_end_to_created_page(new_page, page, rec,
unknown's avatar
unknown committed
554
						       index, mtr);
555
	} else {
unknown's avatar
unknown committed
556
		page_copy_rec_list_end_no_locks(new_page, page, rec,
unknown's avatar
unknown committed
557
						index, mtr);
558 559 560 561 562 563 564 565
	}

	/* Update the lock table, MAX_TRX_ID, and possible hash index */

	lock_move_rec_list_end(new_page, page, rec);

	page_update_max_trx_id(new_page, page_get_max_trx_id(page));

unknown's avatar
unknown committed
566
	btr_search_move_or_delete_hash_entries(new_page, page, index);
567
}
568 569 570 571 572 573 574 575 576

/*****************************************************************
Copies records from page to new_page, up to the given record,
NOT including that record. Infimum and supremum records are not copied.
The records are copied to the end of the record list on new_page. */

void
page_copy_rec_list_start(
/*=====================*/
unknown's avatar
unknown committed
577 578 579 580 581
	page_t*		new_page,	/* in: index page to copy to */
	page_t*		page,		/* in: index page */
	rec_t*		rec,		/* in: record on page */
	dict_index_t*	index,		/* in: record descriptor */
	mtr_t*		mtr)		/* in: mtr */
582 583 584 585
{
	page_cur_t	cur1;
	page_cur_t	cur2;
	rec_t*		old_end;
586
	mem_heap_t*	heap		= NULL;
587
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
588
	ulint*		offsets		= offsets_;
589
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
590 591 592 593 594 595 596 597 598

	page_cur_set_before_first(page, &cur1);

	if (rec == page_cur_get_rec(&cur1)) {

		return;
	}

	page_cur_move_to_next(&cur1);
599

600 601 602
	page_cur_set_after_last(new_page, &cur2);
	page_cur_move_to_prev(&cur2);
	old_end = page_cur_get_rec(&cur2);
603 604

	/* Copy records from the original page to the new page */
605 606

	while (page_cur_get_rec(&cur1) != rec) {
607 608 609
		rec_t*	ins_rec;
		rec_t*	cur1_rec = page_cur_get_rec(&cur1);
		offsets = rec_get_offsets(cur1_rec, index, offsets,
unknown's avatar
unknown committed
610
					  ULINT_UNDEFINED, &heap);
611
		ins_rec = page_cur_rec_insert(&cur2, cur1_rec, index,
unknown's avatar
unknown committed
612
					      offsets, mtr);
613
		ut_a(ins_rec);
614 615 616 617 618 619

		page_cur_move_to_next(&cur1);
		page_cur_move_to_next(&cur2);
	}

	/* Update the lock table, MAX_TRX_ID, and possible hash index */
620

621 622 623 624
	lock_move_rec_list_start(new_page, page, rec, old_end);

	page_update_max_trx_id(new_page, page_get_max_trx_id(page));

unknown's avatar
unknown committed
625
	btr_search_move_or_delete_hash_entries(new_page, page, index);
626

627
	if (UNIV_LIKELY_NULL(heap)) {
628 629 630
		mem_heap_free(heap);
	}
}
631 632 633 634 635 636 637

/**************************************************************
Writes a log record of a record list end or start deletion. */
UNIV_INLINE
void
page_delete_rec_list_write_log(
/*===========================*/
unknown's avatar
unknown committed
638 639 640 641 642
	rec_t*		rec,	/* in: record on page */
	dict_index_t*	index,	/* in: record descriptor */
	byte		type,	/* in: operation type:
				MLOG_LIST_END_DELETE, ... */
	mtr_t*		mtr)	/* in: mtr */
643
{
unknown's avatar
unknown committed
644 645
	byte*	log_ptr;
	ut_ad(type == MLOG_LIST_END_DELETE
unknown's avatar
unknown committed
646 647 648
	      || type == MLOG_LIST_START_DELETE
	      || type == MLOG_COMP_LIST_END_DELETE
	      || type == MLOG_COMP_LIST_START_DELETE);
unknown's avatar
unknown committed
649

650
	log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2);
unknown's avatar
unknown committed
651 652
	if (log_ptr) {
		/* Write the parameter as a 2-byte ulint */
653
		mach_write_to_2(log_ptr, ut_align_offset(rec, UNIV_PAGE_SIZE));
unknown's avatar
unknown committed
654 655
		mlog_close(mtr, log_ptr + 2);
	}
656 657 658 659 660 661 662 663
}

/**************************************************************
Parses a log record of a record list end or start deletion. */

byte*
page_parse_delete_rec_list(
/*=======================*/
unknown's avatar
unknown committed
664 665 666 667 668 669 670 671 672 673
				/* out: end of log record or NULL */
	byte		type,	/* in: MLOG_LIST_END_DELETE,
				MLOG_LIST_START_DELETE,
				MLOG_COMP_LIST_END_DELETE or
				MLOG_COMP_LIST_START_DELETE */
	byte*		ptr,	/* in: buffer */
	byte*		end_ptr,/* in: buffer end */
	dict_index_t*	index,	/* in: record descriptor */
	page_t*		page,	/* in: page or NULL */
	mtr_t*		mtr)	/* in: mtr or NULL */
674 675
{
	ulint	offset;
676

unknown's avatar
unknown committed
677
	ut_ad(type == MLOG_LIST_END_DELETE
unknown's avatar
unknown committed
678 679 680
	      || type == MLOG_LIST_START_DELETE
	      || type == MLOG_COMP_LIST_END_DELETE
	      || type == MLOG_COMP_LIST_START_DELETE);
681

682 683 684 685 686 687
	/* Read the record offset as a 2-byte ulint */

	if (end_ptr < ptr + 2) {

		return(NULL);
	}
688

689 690 691 692 693 694 695 696
	offset = mach_read_from_2(ptr);
	ptr += 2;

	if (!page) {

		return(ptr);
	}

697
	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
698

unknown's avatar
unknown committed
699
	if (type == MLOG_LIST_END_DELETE
unknown's avatar
unknown committed
700
	    || type == MLOG_COMP_LIST_END_DELETE) {
unknown's avatar
unknown committed
701
		page_delete_rec_list_end(page, page + offset, index,
unknown's avatar
unknown committed
702 703
					 ULINT_UNDEFINED,
					 ULINT_UNDEFINED, mtr);
704
	} else {
unknown's avatar
unknown committed
705
		page_delete_rec_list_start(page, page + offset, index, mtr);
706 707 708 709 710 711 712 713 714 715 716 717
	}

	return(ptr);
}

/*****************************************************************
Deletes records from a page from a given record onward, including that record.
The infimum and supremum records are not deleted. */

void
page_delete_rec_list_end(
/*=====================*/
unknown's avatar
unknown committed
718 719 720 721 722 723 724 725 726
	page_t*		page,	/* in: index page */
	rec_t*		rec,	/* in: record on page */
	dict_index_t*	index,	/* in: record descriptor */
	ulint		n_recs,	/* in: number of records to delete,
				or ULINT_UNDEFINED if not known */
	ulint		size,	/* in: the sum of the sizes of the
				records in the end of the chain to
				delete, or ULINT_UNDEFINED if not known */
	mtr_t*		mtr)	/* in: mtr */
727 728 729 730 731 732 733 734 735 736
{
	page_dir_slot_t* slot;
	ulint	slot_index;
	rec_t*	last_rec;
	rec_t*	prev_rec;
	rec_t*	free;
	rec_t*	rec2;
	ulint	count;
	ulint	n_owned;
	rec_t*	sup;
737
	ulint	comp;
738 739 740 741

	/* Reset the last insert info in the page header and increment
	the modify clock for the frame */

unknown's avatar
unknown committed
742
	ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
743 744 745 746 747 748
	page_header_set_ptr(page, PAGE_LAST_INSERT, NULL);

	/* The page gets invalid for optimistic searches: increment the
	frame modify clock */

	buf_frame_modify_clock_inc(page);
749

750
	sup = page_get_supremum_rec(page);
751

752 753
	comp = page_is_comp(page);
	if (page_rec_is_infimum_low(rec - page)) {
754 755 756
		rec = page_rec_get_next(rec);
	}

unknown's avatar
unknown committed
757 758 759
	page_delete_rec_list_write_log(rec, index, comp
				       ? MLOG_COMP_LIST_END_DELETE
				       : MLOG_LIST_END_DELETE, mtr);
760 761 762 763 764

	if (rec == sup) {

		return;
	}
765

766 767 768 769 770
	prev_rec = page_rec_get_prev(rec);

	last_rec = page_rec_get_prev(sup);

	if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) {
771
		mem_heap_t*	heap		= NULL;
772
		ulint		offsets_[REC_OFFS_NORMAL_SIZE];
773
		ulint*		offsets		= offsets_;
774
		*offsets_ = (sizeof offsets_) / sizeof *offsets_;
775 776 777 778 779 780
		/* Calculate the sum of sizes and the number of records */
		size = 0;
		n_recs = 0;
		rec2 = rec;

		while (rec2 != sup) {
unknown's avatar
unknown committed
781
			ulint	s;
782
			offsets = rec_get_offsets(rec2, index, offsets,
unknown's avatar
unknown committed
783
						  ULINT_UNDEFINED, &heap);
unknown's avatar
unknown committed
784 785
			s = rec_offs_size(offsets);
			ut_ad(rec2 - page + s - rec_offs_extra_size(offsets)
unknown's avatar
unknown committed
786
			      < UNIV_PAGE_SIZE);
unknown's avatar
unknown committed
787 788
			ut_ad(size + s < UNIV_PAGE_SIZE);
			size += s;
789 790 791 792
			n_recs++;

			rec2 = page_rec_get_next(rec2);
		}
unknown's avatar
unknown committed
793

794
		if (UNIV_LIKELY_NULL(heap)) {
795 796
			mem_heap_free(heap);
		}
797 798
	}

unknown's avatar
unknown committed
799 800
	ut_ad(size < UNIV_PAGE_SIZE);

801 802 803
	/* Update the page directory; there is no need to balance the number
	of the records owned by the supremum record, as it is allowed to be
	less than PAGE_DIR_SLOT_MIN_N_OWNED */
804

805 806
	rec2 = rec;
	count = 0;
807

unknown's avatar
unknown committed
808
	while (rec_get_n_owned(rec2, comp) == 0) {
809 810 811 812 813
		count++;

		rec2 = page_rec_get_next(rec2);
	}

unknown's avatar
unknown committed
814
	ut_ad(rec_get_n_owned(rec2, comp) - count > 0);
815

unknown's avatar
unknown committed
816
	n_owned = rec_get_n_owned(rec2, comp) - count;
817

818 819
	slot_index = page_dir_find_owner_slot(rec2);
	slot = page_dir_get_nth_slot(page, slot_index);
820

821 822 823
	page_dir_slot_set_rec(slot, sup);
	page_dir_slot_set_n_owned(slot, n_owned);

unknown's avatar
unknown committed
824
	page_dir_set_n_slots(page, slot_index + 1);
825

826 827 828 829 830 831 832 833 834 835
	/* Remove the record chain segment from the record chain */
	page_rec_set_next(prev_rec, page_get_supremum_rec(page));

	/* Catenate the deleted chain segment to the page free list */

	free = page_header_get_ptr(page, PAGE_FREE);

	page_rec_set_next(last_rec, free);
	page_header_set_ptr(page, PAGE_FREE, rec);

unknown's avatar
unknown committed
836 837
	page_header_set_field(page, PAGE_GARBAGE, size
			      + page_header_get_field(page, PAGE_GARBAGE));
838 839

	page_header_set_field(page, PAGE_N_RECS,
unknown's avatar
unknown committed
840
			      (ulint)(page_get_n_recs(page) - n_recs));
841
}
842 843 844 845 846 847 848 849

/*****************************************************************
Deletes records from page, up to the given record, NOT including
that record. Infimum and supremum records are not deleted. */

void
page_delete_rec_list_start(
/*=======================*/
unknown's avatar
unknown committed
850 851 852 853
	page_t*		page,	/* in: index page */
	rec_t*		rec,	/* in: record on page */
	dict_index_t*	index,	/* in: record descriptor */
	mtr_t*		mtr)	/* in: mtr */
854 855 856
{
	page_cur_t	cur1;
	ulint		log_mode;
857
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
858 859 860
	ulint*		offsets		= offsets_;
	mem_heap_t*	heap		= NULL;
	byte		type;
861
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
862

863
	ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table));
864 865

	if (page_is_comp(page)) {
866 867 868 869 870
		type = MLOG_COMP_LIST_START_DELETE;
	} else {
		type = MLOG_LIST_START_DELETE;
	}

871
	page_delete_rec_list_write_log(rec, index, type, mtr);
872 873 874 875 876 877 878 879 880

	page_cur_set_before_first(page, &cur1);

	if (rec == page_cur_get_rec(&cur1)) {

		return;
	}

	page_cur_move_to_next(&cur1);
881

882 883 884 885 886
	/* Individual deletes are not logged */

	log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE);

	while (page_cur_get_rec(&cur1) != rec) {
887
		offsets = rec_get_offsets(page_cur_get_rec(&cur1), index,
unknown's avatar
unknown committed
888
					  offsets, ULINT_UNDEFINED, &heap);
889 890
		page_cur_delete_rec(&cur1, index, offsets, mtr);
	}
891

892
	if (UNIV_LIKELY_NULL(heap)) {
893
		mem_heap_free(heap);
894 895 896 897 898
	}

	/* Restore log mode */

	mtr_set_log_mode(mtr, log_mode);
899
}
900 901 902 903 904 905 906 907

/*****************************************************************
Moves record list end to another page. Moved records include
split_rec. */

void
page_move_rec_list_end(
/*===================*/
unknown's avatar
unknown committed
908 909 910 911 912
	page_t*		new_page,	/* in: index page where to move */
	page_t*		page,		/* in: index page */
	rec_t*		split_rec,	/* in: first record to move */
	dict_index_t*	index,		/* in: record descriptor */
	mtr_t*		mtr)		/* in: mtr */
913 914 915 916 917 918 919 920
{
	ulint	old_data_size;
	ulint	new_data_size;
	ulint	old_n_recs;
	ulint	new_n_recs;

	old_data_size = page_get_data_size(new_page);
	old_n_recs = page_get_n_recs(new_page);
921

unknown's avatar
unknown committed
922
	page_copy_rec_list_end(new_page, page, split_rec, index, mtr);
923 924 925 926 927 928

	new_data_size = page_get_data_size(new_page);
	new_n_recs = page_get_n_recs(new_page);

	ut_ad(new_data_size >= old_data_size);

unknown's avatar
unknown committed
929
	page_delete_rec_list_end(page, split_rec, index,
unknown's avatar
unknown committed
930 931
				 new_n_recs - old_n_recs,
				 new_data_size - old_data_size, mtr);
932 933 934 935 936 937 938 939 940
}

/*****************************************************************
Moves record list start to another page. Moved records do not include
split_rec. */

void
page_move_rec_list_start(
/*=====================*/
unknown's avatar
unknown committed
941 942 943 944 945
	page_t*		new_page,	/* in: index page where to move */
	page_t*		page,		/* in: index page */
	rec_t*		split_rec,	/* in: first record not to move */
	dict_index_t*	index,		/* in: record descriptor */
	mtr_t*		mtr)		/* in: mtr */
946
{
unknown's avatar
unknown committed
947
	page_copy_rec_list_start(new_page, page, split_rec, index, mtr);
948

unknown's avatar
unknown committed
949
	page_delete_rec_list_start(page, split_rec, index, mtr);
950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965
}

/***************************************************************************
This is a low-level operation which is used in a database index creation
to update the page number of a created B-tree to a data dictionary record. */

void
page_rec_write_index_page_no(
/*=========================*/
	rec_t*	rec,	/* in: record to update */
	ulint	i,	/* in: index of the field to update */
	ulint	page_no,/* in: value to write */
	mtr_t*	mtr)	/* in: mtr */
{
	byte*	data;
	ulint	len;
966

unknown's avatar
unknown committed
967
	data = rec_get_nth_field_old(rec, i, &len);
968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983

	ut_ad(len == 4);

	mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr);
}

/******************************************************************
Used to delete n slots from the directory. This function updates
also n_owned fields in the records, so that the first slot after
the deleted ones inherits the records of the deleted slots. */
UNIV_INLINE
void
page_dir_delete_slots(
/*==================*/
	page_t*	page,	/* in: the index page */
	ulint	start,	/* in: first slot to be deleted */
984
	ulint	n)	/* in: number of slots to delete (currently
985 986 987 988 989 990 991 992
			only n == 1 allowed) */
{
	page_dir_slot_t*	slot;
	ulint			i;
	ulint			sum_owned = 0;
	ulint			n_slots;
	rec_t*			rec;

993
	ut_ad(n == 1);
994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010
	ut_ad(start > 0);
	ut_ad(start + n < page_dir_get_n_slots(page));

	n_slots = page_dir_get_n_slots(page);

	/* 1. Reset the n_owned fields of the slots to be
	deleted */
	for (i = start; i < start + n; i++) {
		slot = page_dir_get_nth_slot(page, i);
		sum_owned += page_dir_slot_get_n_owned(slot);
		page_dir_slot_set_n_owned(slot, 0);
	}

	/* 2. Update the n_owned value of the first non-deleted slot */

	slot = page_dir_get_nth_slot(page, start + n);
	page_dir_slot_set_n_owned(slot,
unknown's avatar
unknown committed
1011
				  sum_owned + page_dir_slot_get_n_owned(slot));
1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035

	/* 3. Destroy start and other slots by copying slots */
	for (i = start + n; i < n_slots; i++) {
		slot = page_dir_get_nth_slot(page, i);
		rec = page_dir_slot_get_rec(slot);

		slot = page_dir_get_nth_slot(page, i - n);
		page_dir_slot_set_rec(slot, rec);
	}

	/* 4. Update the page header */
	page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots - n);
}

/******************************************************************
Used to add n slots to the directory. Does not set the record pointers
in the added slots or update n_owned values: this is the responsibility
of the caller. */
UNIV_INLINE
void
page_dir_add_slots(
/*===============*/
	page_t*	page,	/* in: the index page */
	ulint	start,	/* in: the slot above which the new slots are added */
1036
	ulint	n)	/* in: number of slots to add (currently only n == 1
1037 1038 1039 1040 1041 1042 1043 1044
			allowed) */
{
	page_dir_slot_t*	slot;
	ulint			n_slots;
	ulint			i;
	rec_t*			rec;

	ut_ad(n == 1);
1045

1046 1047 1048 1049 1050
	n_slots = page_dir_get_n_slots(page);

	ut_ad(start < n_slots - 1);

	/* Update the page header */
unknown's avatar
unknown committed
1051
	page_dir_set_n_slots(page, n_slots + n);
1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072

	/* Move slots up */

	for (i = n_slots - 1; i > start; i--) {

		slot = page_dir_get_nth_slot(page, i);
		rec = page_dir_slot_get_rec(slot);

		slot = page_dir_get_nth_slot(page, i + n);
		page_dir_slot_set_rec(slot, rec);
	}
}

/********************************************************************
Splits a directory slot which owns too many records. */

void
page_dir_split_slot(
/*================*/
	page_t*	page,		/* in: the index page in question */
	ulint	slot_no)	/* in: the directory slot */
1073
{
1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084
	rec_t*			rec;
	page_dir_slot_t*	new_slot;
	page_dir_slot_t*	prev_slot;
	page_dir_slot_t*	slot;
	ulint			i;
	ulint			n_owned;

	ut_ad(page);
	ut_ad(slot_no > 0);

	slot = page_dir_get_nth_slot(page, slot_no);
1085

1086 1087 1088
	n_owned = page_dir_slot_get_n_owned(slot);
	ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1);

1089
	/* 1. We loop to find a record approximately in the middle of the
1090
	records owned by the slot. */
1091

1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
	prev_slot = page_dir_get_nth_slot(page, slot_no - 1);
	rec = page_dir_slot_get_rec(prev_slot);

	for (i = 0; i < n_owned / 2; i++) {
		rec = page_rec_get_next(rec);
	}

	ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED);

	/* 2. We add one directory slot immediately below the slot to be
	split. */

	page_dir_add_slots(page, slot_no - 1, 1);

	/* The added slot is now number slot_no, and the old slot is
	now number slot_no + 1 */

	new_slot = page_dir_get_nth_slot(page, slot_no);
	slot = page_dir_get_nth_slot(page, slot_no + 1);

	/* 3. We store the appropriate values to the new slot. */
1113

1114 1115
	page_dir_slot_set_rec(new_slot, rec);
	page_dir_slot_set_n_owned(new_slot, n_owned / 2);
1116 1117

	/* 4. Finally, we update the number of records field of the
1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131
	original slot */

	page_dir_slot_set_n_owned(slot, n_owned - (n_owned / 2));
}

/*****************************************************************
Tries to balance the given directory slot with too few records with the upper
neighbor, so that there are at least the minimum number of records owned by
the slot; this may result in the merging of two slots. */

void
page_dir_balance_slot(
/*==================*/
	page_t*	page,		/* in: index page */
1132
	ulint	slot_no)	/* in: the directory slot */
1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
{
	page_dir_slot_t*	slot;
	page_dir_slot_t*	up_slot;
	ulint			n_owned;
	ulint			up_n_owned;
	rec_t*			old_rec;
	rec_t*			new_rec;

	ut_ad(page);
	ut_ad(slot_no > 0);

	slot = page_dir_get_nth_slot(page, slot_no);
1145

1146 1147 1148 1149 1150 1151 1152
	/* The last directory slot cannot be balanced with the upper
	neighbor, as there is none. */

	if (slot_no == page_dir_get_n_slots(page) - 1) {

		return;
	}
1153

1154
	up_slot = page_dir_get_nth_slot(page, slot_no + 1);
1155

1156 1157
	n_owned = page_dir_slot_get_n_owned(slot);
	up_n_owned = page_dir_slot_get_n_owned(up_slot);
1158

1159 1160 1161
	ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1);

	/* If the upper slot has the minimum value of n_owned, we will merge
1162
	the two slots, therefore we assert: */
1163
	ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED);
1164

1165 1166 1167 1168 1169 1170
	if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) {

		/* In this case we can just transfer one record owned
		by the upper slot to the property of the lower slot */
		old_rec = page_dir_slot_get_rec(slot);
		new_rec = page_rec_get_next(old_rec);
1171

unknown's avatar
unknown committed
1172 1173
		rec_set_n_owned(old_rec, page_is_comp(page), 0);
		rec_set_n_owned(new_rec, page_is_comp(page), n_owned + 1);
1174

1175
		page_dir_slot_set_rec(slot, new_rec);
1176

1177 1178 1179 1180
		page_dir_slot_set_n_owned(up_slot, up_n_owned -1);
	} else {
		/* In this case we may merge the two slots */
		page_dir_delete_slots(page, slot_no, 1);
1181
	}
1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230
}

/****************************************************************
Returns the middle record of the record list. If there are an even number
of records in the list, returns the first record of the upper half-list. */

rec_t*
page_get_middle_rec(
/*================*/
			/* out: middle record */
	page_t*	page)	/* in: page */
{
	page_dir_slot_t*	slot;
	ulint			middle;
	ulint			i;
	ulint			n_owned;
	ulint			count;
	rec_t*			rec;

	/* This many records we must leave behind */
	middle = (page_get_n_recs(page) + 2) / 2;

	count = 0;

	for (i = 0;; i++) {

		slot = page_dir_get_nth_slot(page, i);
		n_owned = page_dir_slot_get_n_owned(slot);

		if (count + n_owned > middle) {
			break;
		} else {
			count += n_owned;
		}
	}

	ut_ad(i > 0);
	slot = page_dir_get_nth_slot(page, i - 1);
	rec = page_dir_slot_get_rec(slot);
	rec = page_rec_get_next(rec);

	/* There are now count records behind rec */

	for (i = 0; i < middle - count; i++) {
		rec = page_rec_get_next(rec);
	}

	return(rec);
}
1231

1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245
/*******************************************************************
Returns the number of records before the given record in chain.
The number includes infimum and supremum records. */

ulint
page_rec_get_n_recs_before(
/*=======================*/
			/* out: number of records */
	rec_t*	rec)	/* in: the physical record */
{
	page_dir_slot_t*	slot;
	rec_t*			slot_rec;
	page_t*			page;
	ulint			i;
1246
	ulint			comp;
1247 1248 1249 1250 1251
	lint			n	= 0;

	ut_ad(page_rec_check(rec));

	page = buf_frame_align(rec);
unknown's avatar
unknown committed
1252 1253 1254
	comp = page_is_comp(page);

	while (rec_get_n_owned(rec, comp) == 0) {
1255 1256 1257 1258

		rec = page_rec_get_next(rec);
		n--;
	}
1259

1260 1261 1262 1263
	for (i = 0; ; i++) {
		slot = page_dir_get_nth_slot(page, i);
		slot_rec = page_dir_slot_get_rec(slot);

unknown's avatar
unknown committed
1264
		n += rec_get_n_owned(slot_rec, comp);
1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281

		if (rec == slot_rec) {

			break;
		}
	}

	n--;

	ut_ad(n >= 0);

	return((ulint) n);
}

/****************************************************************
Prints record contents including the data relevant only in
the index page context. */
1282

1283 1284 1285
void
page_rec_print(
/*===========*/
unknown's avatar
unknown committed
1286 1287
	rec_t*		rec,	/* in: physical record */
	const ulint*	offsets)/* in: record descriptor */
1288
{
1289
	ulint	comp	= page_is_comp(buf_frame_align(rec));
unknown's avatar
unknown committed
1290

1291
	ut_a(!comp == !rec_offs_comp(offsets));
1292
	rec_print_new(stderr, rec, offsets);
1293
	fprintf(stderr,
1294
		"            n_owned: %lu; heap_no: %lu; next rec: %lu\n",
unknown's avatar
unknown committed
1295 1296 1297
		(ulong) rec_get_n_owned(rec, comp),
		(ulong) rec_get_heap_no(rec, comp),
		(ulong) rec_get_next_offs(rec, comp));
1298 1299

	page_rec_check(rec);
unknown's avatar
unknown committed
1300
	rec_validate(rec, offsets);
1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317
}

/*******************************************************************
This is used to print the contents of the directory for
debugging purposes. */

void
page_dir_print(
/*===========*/
	page_t*	page,	/* in: index page */
	ulint	pr_n)	/* in: print n first and n last entries */
{
	ulint			n;
	ulint			i;
	page_dir_slot_t*	slot;

	n = page_dir_get_n_slots(page);
1318

1319 1320 1321 1322
	fprintf(stderr, "--------------------------------\n"
		"PAGE DIRECTORY\n"
		"Page address %p\n"
		"Directory stack top at offs: %lu; number of slots: %lu\n",
unknown's avatar
unknown committed
1323 1324
		page, (ulong)(page_dir_get_nth_slot(page, n - 1) - page),
		(ulong) n);
1325 1326 1327
	for (i = 0; i < n; i++) {
		slot = page_dir_get_nth_slot(page, i);
		if ((i == pr_n) && (i < n - pr_n)) {
1328
			fputs("    ...   \n", stderr);
1329
		}
1330
		if ((i < pr_n) || (i >= n - pr_n)) {
1331
			fprintf(stderr,
unknown's avatar
unknown committed
1332 1333 1334 1335 1336
				"Contents of slot: %lu: n_owned: %lu,"
				" rec offs: %lu\n",
				(ulong) i,
				(ulong) page_dir_slot_get_n_owned(slot),
				(ulong)(page_dir_slot_get_rec(slot) - page));
1337
		}
1338
	}
1339 1340
	fprintf(stderr, "Total of %lu records\n"
		"--------------------------------\n",
unknown's avatar
unknown committed
1341
		(ulong) (2 + page_get_n_recs(page)));
1342 1343
}

1344 1345 1346 1347 1348 1349 1350
/*******************************************************************
This is used to print the contents of the page record list for
debugging purposes. */

void
page_print_list(
/*============*/
unknown's avatar
unknown committed
1351 1352 1353
	page_t*		page,	/* in: index page */
	dict_index_t*	index,	/* in: dictionary index of the page */
	ulint		pr_n)	/* in: print n first and n last entries */
1354 1355 1356 1357
{
	page_cur_t	cur;
	ulint		count;
	ulint		n_recs;
1358
	mem_heap_t*	heap		= NULL;
1359
	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
1360
	ulint*		offsets		= offsets_;
1361
	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
unknown's avatar
unknown committed
1362

1363
	ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table));
1364

1365 1366 1367 1368
	fprintf(stderr,
		"--------------------------------\n"
		"PAGE RECORD LIST\n"
		"Page address %p\n", page);
1369 1370 1371 1372 1373 1374

	n_recs = page_get_n_recs(page);

	page_cur_set_before_first(page, &cur);
	count = 0;
	for (;;) {
1375
		offsets = rec_get_offsets(cur.rec, index, offsets,
unknown's avatar
unknown committed
1376
					  ULINT_UNDEFINED, &heap);
unknown's avatar
unknown committed
1377
		page_rec_print(cur.rec, offsets);
1378 1379 1380

		if (count == pr_n) {
			break;
1381
		}
1382 1383
		if (page_cur_is_after_last(&cur)) {
			break;
1384
		}
1385
		page_cur_move_to_next(&cur);
1386
		count++;
1387
	}
1388

1389
	if (n_recs > 2 * pr_n) {
1390
		fputs(" ... \n", stderr);
1391
	}
1392

1393
	while (!page_cur_is_after_last(&cur)) {
1394 1395
		page_cur_move_to_next(&cur);

1396
		if (count + pr_n >= n_recs) {
1397
			offsets = rec_get_offsets(cur.rec, index, offsets,
unknown's avatar
unknown committed
1398
						  ULINT_UNDEFINED, &heap);
unknown's avatar
unknown committed
1399
			page_rec_print(cur.rec, offsets);
1400
		}
1401
		count++;
1402 1403
	}

1404 1405 1406
	fprintf(stderr,
		"Total of %lu records \n"
		"--------------------------------\n",
unknown's avatar
unknown committed
1407
		(ulong) (count + 1));
unknown's avatar
unknown committed
1408

1409
	if (UNIV_LIKELY_NULL(heap)) {
1410 1411
		mem_heap_free(heap);
	}
1412
}
1413 1414 1415 1416 1417 1418 1419 1420 1421

/*******************************************************************
Prints the info in a page header. */

void
page_header_print(
/*==============*/
	page_t*	page)
{
1422 1423 1424
	fprintf(stderr,
		"--------------------------------\n"
		"PAGE HEADER INFO\n"
unknown's avatar
unknown committed
1425
		"Page address %p, n records %lu (%s)\n"
1426 1427 1428
		"n dir slots %lu, heap top %lu\n"
		"Page n heap %lu, free %lu, garbage %lu\n"
		"Page last insert %lu, direction %lu, n direction %lu\n",
unknown's avatar
unknown committed
1429
		page, (ulong) page_header_get_field(page, PAGE_N_RECS),
unknown's avatar
unknown committed
1430
		page_is_comp(page) ? "compact format" : "original format",
1431
		(ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS),
unknown's avatar
unknown committed
1432
		(ulong) page_header_get_field(page, PAGE_HEAP_TOP),
unknown's avatar
unknown committed
1433
		(ulong) page_dir_get_n_heap(page),
1434
		(ulong) page_header_get_field(page, PAGE_FREE),
unknown's avatar
unknown committed
1435
		(ulong) page_header_get_field(page, PAGE_GARBAGE),
1436 1437 1438
		(ulong) page_header_get_field(page, PAGE_LAST_INSERT),
		(ulong) page_header_get_field(page, PAGE_DIRECTION),
		(ulong) page_header_get_field(page, PAGE_N_DIRECTION));
1439 1440 1441 1442 1443 1444 1445 1446
}

/*******************************************************************
This is used to print the contents of the page for
debugging purposes. */

void
page_print(
1447
/*=======*/
unknown's avatar
unknown committed
1448 1449 1450 1451 1452 1453
	page_t*		page,	/* in: index page */
	dict_index_t*	index,	/* in: dictionary index of the page */
	ulint		dn,	/* in: print dn first and last entries
				in directory */
	ulint		rn)	/* in: print rn first and last records
				in directory */
1454 1455 1456
{
	page_header_print(page);
	page_dir_print(page, dn);
unknown's avatar
unknown committed
1457
	page_print_list(page, index, rn);
1458
}
1459 1460 1461 1462 1463 1464 1465 1466 1467

/*******************************************************************
The following is used to validate a record on a page. This function
differs from rec_validate as it can also check the n_owned field and
the heap_no field. */

ibool
page_rec_validate(
/*==============*/
unknown's avatar
unknown committed
1468 1469 1470
				/* out: TRUE if ok */
	rec_t*		rec,	/* in: physical record */
	const ulint*	offsets)/* in: array returned by rec_get_offsets() */
1471 1472 1473
{
	ulint	n_owned;
	ulint	heap_no;
unknown's avatar
unknown committed
1474
	page_t*	page;
1475
	ulint	comp;
1476 1477

	page = buf_frame_align(rec);
unknown's avatar
unknown committed
1478
	comp = page_is_comp(page);
1479
	ut_a(!comp == !rec_offs_comp(offsets));
1480 1481

	page_rec_check(rec);
unknown's avatar
unknown committed
1482
	rec_validate(rec, offsets);
1483

unknown's avatar
unknown committed
1484 1485
	n_owned = rec_get_n_owned(rec, comp);
	heap_no = rec_get_heap_no(rec, comp);
1486

unknown's avatar
unknown committed
1487
	if (!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED)) {
unknown's avatar
unknown committed
1488 1489
		fprintf(stderr,
			"InnoDB: Dir slot of rec %lu, n owned too big %lu\n",
unknown's avatar
unknown committed
1490
			(ulong)(rec - page), (ulong) n_owned);
unknown's avatar
unknown committed
1491 1492 1493
		return(FALSE);
	}

unknown's avatar
unknown committed
1494
	if (!(heap_no < page_dir_get_n_heap(page))) {
unknown's avatar
unknown committed
1495
		fprintf(stderr,
unknown's avatar
unknown committed
1496 1497 1498
			"InnoDB: Heap no of rec %lu too big %lu %lu\n",
			(ulong)(rec - page), (ulong) heap_no,
			(ulong) page_dir_get_n_heap(page));
unknown's avatar
unknown committed
1499 1500
		return(FALSE);
	}
1501

1502 1503
	return(TRUE);
}
unknown's avatar
unknown committed
1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519

/*******************************************************************
Checks that the first directory slot points to the infimum record and
the last to the supremum. This function is intended to track if the
bug fixed in 4.0.14 has caused corruption to users' databases. */

void
page_check_dir(
/*===========*/
	page_t*	page)	/* in: index page */
{
	ulint	n_slots;

	n_slots = page_dir_get_n_slots(page);

	if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, 0))
unknown's avatar
unknown committed
1520
	    != page_get_infimum_rec(page)) {
unknown's avatar
unknown committed
1521

1522
		fprintf(stderr,
unknown's avatar
unknown committed
1523 1524
			"InnoDB: Page directory corruption:"
			" infimum not pointed to\n");
unknown's avatar
unknown committed
1525
		buf_page_print(page);
1526
	}
unknown's avatar
unknown committed
1527 1528

	if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, n_slots - 1))
unknown's avatar
unknown committed
1529
	    != page_get_supremum_rec(page)) {
unknown's avatar
unknown committed
1530

1531
		fprintf(stderr,
unknown's avatar
unknown committed
1532 1533
			"InnoDB: Page directory corruption:"
			" supremum not pointed to\n");
unknown's avatar
unknown committed
1534
		buf_page_print(page);
1535
	}
unknown's avatar
unknown committed
1536
}
1537

unknown's avatar
unknown committed
1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548
/*******************************************************************
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
even if the page is total garbage. */

ibool
page_simple_validate(
/*=================*/
			/* out: TRUE if ok */
	page_t*	page)	/* in: index page */
{
1549
	page_cur_t	cur;
unknown's avatar
unknown committed
1550 1551 1552 1553 1554 1555 1556 1557
	page_dir_slot_t* slot;
	ulint		slot_no;
	ulint		n_slots;
	rec_t*		rec;
	byte*		rec_heap_top;
	ulint		count;
	ulint		own_count;
	ibool		ret	= FALSE;
1558
	ulint		comp	= page_is_comp(page);
unknown's avatar
unknown committed
1559 1560 1561 1562 1563 1564 1565 1566

	/* Check first that the record heap and the directory do not
	overlap. */

	n_slots = page_dir_get_n_slots(page);

	if (n_slots > UNIV_PAGE_SIZE / 4) {
		fprintf(stderr,
unknown's avatar
unknown committed
1567 1568
			"InnoDB: Nonsensical number %lu of page dir slots\n",
			(ulong) n_slots);
unknown's avatar
unknown committed
1569 1570 1571 1572 1573

		goto func_exit;
	}

	rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP);
1574

unknown's avatar
unknown committed
1575 1576 1577
	if (rec_heap_top > page_dir_get_nth_slot(page, n_slots - 1)) {

		fprintf(stderr,
unknown's avatar
unknown committed
1578 1579 1580 1581 1582 1583
			"InnoDB: Record heap and dir overlap on a page,"
			" heap top %lu, dir %lu\n",
			(ulong)
			(page_header_get_ptr(page, PAGE_HEAP_TOP) - page),
			(ulong)
			(page_dir_get_nth_slot(page, n_slots - 1) - page));
unknown's avatar
unknown committed
1584

1585 1586
		goto func_exit;
	}
unknown's avatar
unknown committed
1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599

	/* Validate the record list in a loop checking also that it is
	consistent with the page record directory. */

	count = 0;
	own_count = 1;
	slot_no = 0;
	slot = page_dir_get_nth_slot(page, slot_no);

	page_cur_set_before_first(page, &cur);

	for (;;) {
		rec = (&cur)->rec;
1600

unknown's avatar
unknown committed
1601 1602
		if (rec > rec_heap_top) {
			fprintf(stderr,
unknown's avatar
unknown committed
1603 1604 1605 1606
				"InnoDB: Record %lu is above"
				" rec heap top %lu\n",
				(ulong)(rec - page),
				(ulong)(rec_heap_top - page));
unknown's avatar
unknown committed
1607 1608 1609 1610

			goto func_exit;
		}

unknown's avatar
unknown committed
1611
		if (rec_get_n_owned(rec, comp) != 0) {
unknown's avatar
unknown committed
1612
			/* This is a record pointed to by a dir slot */
unknown's avatar
unknown committed
1613
			if (rec_get_n_owned(rec, comp) != own_count) {
unknown's avatar
unknown committed
1614 1615

				fprintf(stderr,
unknown's avatar
unknown committed
1616 1617 1618 1619 1620
					"InnoDB: Wrong owned count %lu, %lu,"
					" rec %lu\n",
					(ulong) rec_get_n_owned(rec, comp),
					(ulong) own_count,
					(ulong)(rec - page));
unknown's avatar
unknown committed
1621 1622 1623 1624 1625 1626

				goto func_exit;
			}

			if (page_dir_slot_get_rec(slot) != rec) {
				fprintf(stderr,
unknown's avatar
unknown committed
1627 1628
					"InnoDB: Dir slot does not point"
					" to right rec %lu\n",
1629
					(ulong)(rec - page));
unknown's avatar
unknown committed
1630 1631 1632

				goto func_exit;
			}
1633

unknown's avatar
unknown committed
1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646
			own_count = 0;

			if (!page_cur_is_after_last(&cur)) {
				slot_no++;
				slot = page_dir_get_nth_slot(page, slot_no);
			}
		}

		if (page_cur_is_after_last(&cur)) {

			break;
		}

unknown's avatar
unknown committed
1647
		if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA
unknown's avatar
unknown committed
1648
		    || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) {
unknown's avatar
unknown committed
1649
			fprintf(stderr,
unknown's avatar
unknown committed
1650 1651 1652 1653
				"InnoDB: Next record offset"
				" nonsensical %lu for rec %lu\n",
				(ulong) rec_get_next_offs(rec, comp),
				(ulong)(rec - page));
unknown's avatar
unknown committed
1654 1655 1656 1657

			goto func_exit;
		}

1658
		count++;
unknown's avatar
unknown committed
1659 1660 1661

		if (count > UNIV_PAGE_SIZE) {
			fprintf(stderr,
unknown's avatar
unknown committed
1662 1663 1664
				"InnoDB: Page record list appears"
				" to be circular %lu\n",
				(ulong) count);
unknown's avatar
unknown committed
1665 1666
			goto func_exit;
		}
1667

unknown's avatar
unknown committed
1668 1669 1670
		page_cur_move_to_next(&cur);
		own_count++;
	}
1671

unknown's avatar
unknown committed
1672
	if (rec_get_n_owned(rec, comp) == 0) {
unknown's avatar
unknown committed
1673
		fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n");
unknown's avatar
unknown committed
1674 1675 1676

		goto func_exit;
	}
1677

unknown's avatar
unknown committed
1678
	if (slot_no != n_slots - 1) {
unknown's avatar
unknown committed
1679
		fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n",
1680
			(ulong) slot_no, (ulong) (n_slots - 1));
unknown's avatar
unknown committed
1681
		goto func_exit;
1682
	}
unknown's avatar
unknown committed
1683 1684

	if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) {
unknown's avatar
unknown committed
1685
		fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
unknown's avatar
unknown committed
1686 1687
			(ulong) page_header_get_field(page, PAGE_N_RECS) + 2,
			(ulong) (count + 1));
unknown's avatar
unknown committed
1688 1689 1690 1691 1692 1693 1694 1695 1696

		goto func_exit;
	}

	/* Check then the free list */
	rec = page_header_get_ptr(page, PAGE_FREE);

	while (rec != NULL) {
		if (rec < page + FIL_PAGE_DATA
unknown's avatar
unknown committed
1697
		    || rec >= page + UNIV_PAGE_SIZE) {
unknown's avatar
unknown committed
1698
			fprintf(stderr,
unknown's avatar
unknown committed
1699 1700 1701
				"InnoDB: Free list record has"
				" a nonsensical offset %lu\n",
				(ulong) (rec - page));
unknown's avatar
unknown committed
1702 1703 1704 1705 1706 1707

			goto func_exit;
		}

		if (rec > rec_heap_top) {
			fprintf(stderr,
unknown's avatar
unknown committed
1708 1709 1710 1711
				"InnoDB: Free list record %lu"
				" is above rec heap top %lu\n",
				(ulong) (rec - page),
				(ulong) (rec_heap_top - page));
unknown's avatar
unknown committed
1712 1713 1714 1715 1716

			goto func_exit;
		}

		count++;
1717

unknown's avatar
unknown committed
1718 1719
		if (count > UNIV_PAGE_SIZE) {
			fprintf(stderr,
unknown's avatar
unknown committed
1720 1721
				"InnoDB: Page free list appears"
				" to be circular %lu\n",
1722
				(ulong) count);
unknown's avatar
unknown committed
1723 1724 1725 1726 1727
			goto func_exit;
		}

		rec = page_rec_get_next(rec);
	}
1728

unknown's avatar
unknown committed
1729
	if (page_dir_get_n_heap(page) != count + 1) {
unknown's avatar
unknown committed
1730

unknown's avatar
unknown committed
1731
		fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n",
unknown's avatar
unknown committed
1732 1733
			(ulong) page_dir_get_n_heap(page),
			(ulong) (count + 1));
unknown's avatar
unknown committed
1734 1735 1736 1737

		goto func_exit;
	}

1738
	ret = TRUE;
unknown's avatar
unknown committed
1739 1740

func_exit:
1741
	return(ret);
unknown's avatar
unknown committed
1742 1743
}

1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754
/*******************************************************************
This function checks the consistency of an index page. */

ibool
page_validate(
/*==========*/
				/* out: TRUE if ok */
	page_t*		page,	/* in: index page */
	dict_index_t*	index)	/* in: data dictionary index containing
				the page record type definition */
{
unknown's avatar
unknown committed
1755
	page_dir_slot_t* slot;
1756
	mem_heap_t*	heap;
1757
	page_cur_t	cur;
1758 1759 1760 1761 1762 1763
	byte*		buf;
	ulint		count;
	ulint		own_count;
	ulint		slot_no;
	ulint		data_size;
	rec_t*		rec;
unknown's avatar
unknown committed
1764
	rec_t*		old_rec		= NULL;
1765 1766
	ulint		offs;
	ulint		n_slots;
unknown's avatar
unknown committed
1767
	ibool		ret		= FALSE;
unknown's avatar
Merge  
unknown committed
1768
	ulint		i;
1769
	ulint		comp		= page_is_comp(page);
unknown's avatar
unknown committed
1770 1771 1772
	ulint*		offsets		= NULL;
	ulint*		old_offsets	= NULL;

1773
	if ((ibool)!!comp != dict_table_is_comp(index->table)) {
unknown's avatar
unknown committed
1774 1775 1776
		fputs("InnoDB: 'compact format' flag mismatch\n", stderr);
		goto func_exit2;
	}
unknown's avatar
unknown committed
1777
	if (!page_simple_validate(page)) {
1778
		goto func_exit2;
unknown's avatar
unknown committed
1779 1780
	}

1781
	heap = mem_heap_create(UNIV_PAGE_SIZE + 200);
1782

1783 1784 1785 1786
	/* The following buffer is used to check that the
	records in the page record heap do not overlap */

	buf = mem_heap_alloc(heap, UNIV_PAGE_SIZE);
1787
	memset(buf, 0, UNIV_PAGE_SIZE);
1788 1789 1790 1791 1792

	/* Check first that the record heap and the directory do not
	overlap. */

	n_slots = page_dir_get_n_slots(page);
unknown's avatar
unknown committed
1793

unknown's avatar
unknown committed
1794 1795
	if (!(page_header_get_ptr(page, PAGE_HEAP_TOP)
	      <= page_dir_get_nth_slot(page, n_slots - 1))) {
1796 1797

		fputs("InnoDB: Record heap and dir overlap on a page ",
unknown's avatar
unknown committed
1798
		      stderr);
1799
		dict_index_name_print(stderr, NULL, index);
1800
		fprintf(stderr, ", %p, %p\n",
unknown's avatar
unknown committed
1801
			page_header_get_ptr(page, PAGE_HEAP_TOP),
1802
			page_dir_get_nth_slot(page, n_slots - 1));
unknown's avatar
unknown committed
1803

1804 1805
		goto func_exit;
	}
1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817

	/* Validate the record list in a loop checking also that
	it is consistent with the directory. */
	count = 0;
	data_size = 0;
	own_count = 1;
	slot_no = 0;
	slot = page_dir_get_nth_slot(page, slot_no);

	page_cur_set_before_first(page, &cur);

	for (;;) {
1818
		rec = cur.rec;
1819
		offsets = rec_get_offsets(rec, index, offsets,
unknown's avatar
unknown committed
1820
					  ULINT_UNDEFINED, &heap);
unknown's avatar
unknown committed
1821

unknown's avatar
unknown committed
1822
		if (comp && page_rec_is_user_rec(rec)
unknown's avatar
unknown committed
1823 1824 1825
		    && rec_get_node_ptr_flag(rec)
		    != (ibool)
		    (btr_page_get_level_low(page) != 0)) {
unknown's avatar
unknown committed
1826 1827 1828 1829 1830
			fputs("InnoDB: node_ptr flag mismatch\n", stderr);
			goto func_exit;
		}

		if (!page_rec_validate(rec, offsets)) {
unknown's avatar
unknown committed
1831 1832
			goto func_exit;
		}
1833

1834 1835
		/* Check that the records are in the ascending order */
		if ((count >= 2) && (!page_cur_is_after_last(&cur))) {
unknown's avatar
unknown committed
1836
			if (!(1 == cmp_rec_rec(rec, old_rec,
unknown's avatar
unknown committed
1837
					       offsets, old_offsets, index))) {
unknown's avatar
unknown committed
1838
				fprintf(stderr,
unknown's avatar
unknown committed
1839 1840
					"InnoDB: Records in wrong order"
					" on page %lu ",
unknown's avatar
unknown committed
1841
					(ulong) buf_frame_get_page_no(page));
1842
				dict_index_name_print(stderr, NULL, index);
1843
				fputs("\nInnoDB: previous record ", stderr);
1844
				rec_print_new(stderr, old_rec, old_offsets);
1845
				fputs("\nInnoDB: record ", stderr);
1846
				rec_print_new(stderr, rec, offsets);
1847
				putc('\n', stderr);
1848

unknown's avatar
unknown committed
1849 1850
				goto func_exit;
			}
1851 1852
		}

1853
		if (page_rec_is_user_rec(rec)) {
1854

unknown's avatar
unknown committed
1855
			data_size += rec_offs_size(offsets);
1856
		}
1857

unknown's avatar
unknown committed
1858
		offs = rec_get_start(rec, offsets) - page;
1859

unknown's avatar
unknown committed
1860
		for (i = 0; i < rec_offs_size(offsets); i++) {
unknown's avatar
unknown committed
1861 1862 1863
			if (!buf[offs + i] == 0) {
				/* No other record may overlap this */

1864
				fputs("InnoDB: Record overlaps another\n",
unknown's avatar
unknown committed
1865
				      stderr);
unknown's avatar
unknown committed
1866 1867
				goto func_exit;
			}
1868

1869 1870
			buf[offs + i] = 1;
		}
1871

unknown's avatar
unknown committed
1872
		if (rec_get_n_owned(rec, comp) != 0) {
1873
			/* This is a record pointed to by a dir slot */
unknown's avatar
unknown committed
1874
			if (rec_get_n_owned(rec, comp) != own_count) {
unknown's avatar
unknown committed
1875
				fprintf(stderr,
unknown's avatar
unknown committed
1876 1877 1878
					"InnoDB: Wrong owned count %lu, %lu\n",
					(ulong) rec_get_n_owned(rec, comp),
					(ulong) own_count);
unknown's avatar
unknown committed
1879 1880 1881 1882
				goto func_exit;
			}

			if (page_dir_slot_get_rec(slot) != rec) {
unknown's avatar
unknown committed
1883 1884 1885
				fputs("InnoDB: Dir slot does not"
				      " point to right rec\n",
				      stderr);
unknown's avatar
unknown committed
1886 1887
				goto func_exit;
			}
1888

1889
			page_dir_slot_check(slot);
1890

1891 1892 1893 1894 1895 1896 1897 1898 1899 1900
			own_count = 0;
			if (!page_cur_is_after_last(&cur)) {
				slot_no++;
				slot = page_dir_get_nth_slot(page, slot_no);
			}
		}

		if (page_cur_is_after_last(&cur)) {
			break;
		}
unknown's avatar
unknown committed
1901

unknown's avatar
unknown committed
1902
		if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA
unknown's avatar
unknown committed
1903
		    || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) {
unknown's avatar
unknown committed
1904
			fprintf(stderr,
1905
				"InnoDB: Next record offset wrong %lu\n",
unknown's avatar
unknown committed
1906
				(ulong) rec_get_next_offs(rec, comp));
unknown's avatar
unknown committed
1907 1908 1909
			goto func_exit;
		}

1910
		count++;
1911 1912 1913
		page_cur_move_to_next(&cur);
		own_count++;
		old_rec = rec;
unknown's avatar
unknown committed
1914 1915 1916 1917 1918 1919
		/* set old_offsets to offsets; recycle offsets */
		{
			ulint* offs = old_offsets;
			old_offsets = offsets;
			offsets = offs;
		}
1920
	}
1921

unknown's avatar
unknown committed
1922
	if (rec_get_n_owned(rec, comp) == 0) {
1923
		fputs("InnoDB: n owned is zero\n", stderr);
unknown's avatar
unknown committed
1924 1925
		goto func_exit;
	}
1926

unknown's avatar
unknown committed
1927
	if (slot_no != n_slots - 1) {
1928
		fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n",
unknown's avatar
unknown committed
1929
			(ulong) slot_no, (ulong) (n_slots - 1));
unknown's avatar
unknown committed
1930
		goto func_exit;
1931
	}
unknown's avatar
unknown committed
1932 1933

	if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) {
1934
		fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n",
unknown's avatar
unknown committed
1935 1936
			(ulong) page_header_get_field(page, PAGE_N_RECS) + 2,
			(ulong) (count + 1));
unknown's avatar
unknown committed
1937 1938
		goto func_exit;
	}
1939 1940

	if (data_size != page_get_data_size(page)) {
unknown's avatar
unknown committed
1941
		fprintf(stderr,
unknown's avatar
unknown committed
1942
			"InnoDB: Summed data size %lu, returned by func %lu\n",
1943
			(ulong) data_size, (ulong) page_get_data_size(page));
unknown's avatar
unknown committed
1944
		goto func_exit;
1945 1946 1947 1948 1949 1950
	}

	/* Check then the free list */
	rec = page_header_get_ptr(page, PAGE_FREE);

	while (rec != NULL) {
1951
		offsets = rec_get_offsets(rec, index, offsets,
unknown's avatar
unknown committed
1952
					  ULINT_UNDEFINED, &heap);
unknown's avatar
unknown committed
1953
		if (!page_rec_validate(rec, offsets)) {
unknown's avatar
unknown committed
1954 1955 1956

			goto func_exit;
		}
1957 1958

		count++;
unknown's avatar
unknown committed
1959
		offs = rec_get_start(rec, offsets) - page;
1960

unknown's avatar
unknown committed
1961
		for (i = 0; i < rec_offs_size(offsets); i++) {
unknown's avatar
unknown committed
1962 1963

			if (buf[offs + i] != 0) {
unknown's avatar
unknown committed
1964 1965
				fputs("InnoDB: Record overlaps another"
				      " in free list\n", stderr);
unknown's avatar
unknown committed
1966 1967
				goto func_exit;
			}
1968

1969 1970
			buf[offs + i] = 1;
		}
1971

1972 1973
		rec = page_rec_get_next(rec);
	}
1974

unknown's avatar
unknown committed
1975
	if (page_dir_get_n_heap(page) != count + 1) {
1976
		fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n",
unknown's avatar
unknown committed
1977
			(ulong) page_dir_get_n_heap(page),
unknown's avatar
unknown committed
1978
			(ulong) count + 1);
unknown's avatar
unknown committed
1979
		goto func_exit;
unknown's avatar
unknown committed
1980 1981
	}

1982
	ret = TRUE;
unknown's avatar
unknown committed
1983 1984

func_exit:
1985 1986
	mem_heap_free(heap);

unknown's avatar
unknown committed
1987
	if (ret == FALSE) {
unknown's avatar
unknown committed
1988
func_exit2:
1989
		fprintf(stderr, "InnoDB: Apparent corruption in page %lu in ",
unknown's avatar
unknown committed
1990
			(ulong) buf_frame_get_page_no(page));
1991
		dict_index_name_print(stderr, NULL, index);
1992
		putc('\n', stderr);
unknown's avatar
unknown committed
1993 1994
		buf_page_print(page);
	}
1995 1996

	return(ret);
1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013
}

/*******************************************************************
Looks in the page record list for a record with the given heap number. */

rec_t*
page_find_rec_with_heap_no(
/*=======================*/
			/* out: record, NULL if not found */
	page_t*	page,	/* in: index page */
	ulint	heap_no)/* in: heap number */
{
	page_cur_t	cur;

	page_cur_set_before_first(page, &cur);

	for (;;) {
unknown's avatar
unknown committed
2014
		if (rec_get_heap_no(cur.rec, page_is_comp(page)) == heap_no) {
2015

2016
			return(cur.rec);
2017 2018 2019 2020 2021
		}

		if (page_cur_is_after_last(&cur)) {

			return(NULL);
2022
		}
2023 2024 2025 2026

		page_cur_move_to_next(&cur);
	}
}