Commit 2ed4d634 authored by Vadim Tkachenko's avatar Vadim Tkachenko
Browse files

1.0.2 version imported

parent 84f733af
......@@ -17,6 +17,14 @@ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX")
ADD_DEFINITIONS(-DMYSQL_SERVER -D_WIN32 -D_LIB)
# Bug 19424 - InnoDB: Possibly a memory overrun of the buffer being freed (64-bit Visual C)
# Removing Win64 compiler optimizations for all innodb/mem/* files.
IF(CMAKE_GENERATOR MATCHES "Visual Studio" AND CMAKE_SIZEOF_VOID_P MATCHES 8)
SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/storage/innobase/mem/mem0mem.c
${CMAKE_SOURCE_DIR}/storage/innobase/mem/mem0pool.c
PROPERTIES COMPILE_FLAGS -Od)
ENDIF(CMAKE_GENERATOR MATCHES "Visual Studio" AND CMAKE_SIZEOF_VOID_P MATCHES 8)
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib
${CMAKE_SOURCE_DIR}/storage/innobase/include
${CMAKE_SOURCE_DIR}/storage/innobase/handler
......@@ -61,4 +69,28 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
IF(NOT SOURCE_SUBLIBS)
ADD_LIBRARY(innobase ${INNOBASE_SOURCES})
ADD_DEPENDENCIES(innobase GenError)
IF(INNODB_DYNAMIC_PLUGIN)
# The dynamic plugin requires CMake 2.6.0 or later. Otherwise, the /DELAYLOAD property
# will not be set
CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR)
ADD_LIBRARY(ha_innodb SHARED ${INNOBASE_SOURCES} ha_innodb.def handler/win_delay_loader.cc)
ADD_DEPENDENCIES(ha_innodb GenError mysqld)
# If build type is not specified as Release, default to Debug
# This is a workaround to a problem in CMake 2.6, which does not
# set the path of mysqld.lib correctly
IF(CMAKE_BUILD_TYPE MATCHES Release)
SET(CMAKE_BUILD_TYPE "Release")
ELSE(CMAKE_BUILD_TYPE MATCHES Release)
SET(CMAKE_BUILD_TYPE "Debug")
ENDIF(CMAKE_BUILD_TYPE MATCHES Release)
TARGET_LINK_LIBRARIES(ha_innodb strings zlib)
TARGET_LINK_LIBRARIES(ha_innodb ${CMAKE_SOURCE_DIR}/sql/${CMAKE_BUILD_TYPE}/mysqld.lib)
SET_TARGET_PROPERTIES(ha_innodb PROPERTIES OUTPUT_NAME ha_innodb)
SET_TARGET_PROPERTIES(ha_innodb PROPERTIES LINK_FLAGS "/MAP /MAPINFO:EXPORTS")
SET_TARGET_PROPERTIES(ha_innodb PROPERTIES LINK_FLAGS "/ENTRY:\"_DllMainCRTStartup@12\"")
SET_TARGET_PROPERTIES(ha_innodb PROPERTIES COMPILE_FLAGS "-DMYSQL_DYNAMIC_PLUGIN")
SET_TARGET_PROPERTIES(ha_innodb PROPERTIES LINK_FLAGS "/DELAYLOAD:mysqld.exe")
ENDIF(INNODB_DYNAMIC_PLUGIN)
ENDIF(NOT SOURCE_SUBLIBS)
2008-10-31 The InnoDB Team
* dict/dict0mem.c, include/dict0mem.h, include/lock0lock.h,
include/row0mysql.h, include/trx0trx.h, include/univ.i,
include/ut0vec.h, include/ut0vec.ic, lock/lock0lock.c,
row/row0mysql.c, trx/trx0trx.c:
Fix Bug#26316 Triggers create duplicate entries on auto-increment
columns
2008-10-30 The InnoDB Team
* handler/ha_innodb.cc, handler/handler0vars.h,
handler/win_delay_loader.cc, mysql-test/innodb_bug40360.result,
mysql-test/innodb_bug40360.test:
Fix Bug#40360 Binlog related errors with binlog off
2008-10-29 The InnoDB Team
* include/data0type.ic:
Fix Bug#40369 dtype_get_sql_null_size() returns 0 or 1, not the size
2008-10-29 The InnoDB Team
* handler/ha_innodb.cc, include/srv0srv.h, srv/srv0srv.c:
Fix Bug#38189 innodb_stats_on_metadata missing
2008-10-28 The InnoDB Team
* CMakeLists.txt, ha_innodb.def, handler/ha_innodb.cc,
handler/handler0alter.cc, handler/handler0vars.h, handler/i_s.cc,
handler/win_delay_loader.cc, win-plugin/*:
Implemented the delayloading of externals for the plugin on Windows.
This makes it possible to build a dynamic plugin (ha_innodb.dll) on
Windows.
2008-10-27 The InnoDB Team
* CMakeLists.txt:
Fix Bug#19424 InnoDB: Possibly a memory overrun of the buffer being
freed (64-bit Visual C)
2008-10-23 The InnoDB Team
* ibuf/ibuf0ibuf.c:
ibuf_delete_rec(): When the cursor to the insert buffer record
cannot be restored, do not complain if the tablespace does not
exist, because the insert buffer record may have been discarded by
some other thread. This bug has existed in MySQL/InnoDB since
version 4.1, when innodb_file_per_table was implemented.
This may fix Bug#27276 InnoDB Error: ibuf cursor restoration fails.
2008-10-22 The InnoDB Team
* dict/dict0dict.c, dict/dict0mem.c, handler/ha_innodb.cc,
handler/ha_innodb.h, include/dict0dict.h, include/dict0mem.h,
row/row0mysql.c:
Fix Bug#39830 Table autoinc value not updated on first insert
Fix Bug#35498 Cannot get table test/table1 auto-inccounter value in
::info
Fix Bug#36411 "Failed to read auto-increment value from storage
engine" in 5.1.24 auto-inc
2008-10-22 The InnoDB Team
* handler/ha_innodb.cc, include/row0mysql.h, row/row0mysql.c:
Fix Bug#40224 New AUTOINC changes mask reporting of deadlock/timeout
errors
2008-10-16 The InnoDB Team
* dict/dict0dict.c, mysql-test/innodb-index.result,
mysql-test/innodb-index.test:
Skip the undo log size check when creating REDUNDANT and COMPACT
tables. In ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED, column
prefix indexes require that prefixes of externally stored columns
be written to the undo log. This may make the undo log record
bigger than the record on the B-tree page. The maximum size of an
undo log record is the page size. That must be checked for, in
dict_index_add_to_cache(). However, this restriction must not
be enforced on REDUNDANT or COMPACT tables.
2008-10-15 The InnoDB Team
* btr/btr0cur.c, include/btr0cur.h, row/row0ext.c, row/row0sel.c,
row/row0upd.c:
When the server crashes while freeing an externally stored column
of a compressed table, the BTR_EXTERN_LEN field in the BLOB
pointer will be written as 0. Tolerate this in the functions that
deal with externally stored columns. This fixes problems after
crash recovery, in the rollback of incomplete transactions, and in
the purge of delete-marked records.
2008-10-15 The InnoDB Team
* btr/btr0btr.c, include/page0zip.h, page/page0zip.c, include/univ.i:
When a B-tree node of a compressed table is split or merged, the
compression may fail. In this case, the entire compressed page
will be copied and the excess records will be deleted. However,
page_zip_copy(), now renamed to page_zip_copy_recs(), copied too
many fields in the page header, overwriting PAGE_BTR_SEG_LEAF and
PAGE_BTR_SEG_TOP when splitting the B-tree root. This caused
corruption of compressed tables. Furthermore, the lock table and
the adaptive hash index would be corrupted, because we forgot to
update them when invoking page_zip_copy_recs().
Introduce the symbol UNIV_ZIP_DEBUG for triggering the copying of
compressed pages more often, for debugging purposes.
2008-10-10 The InnoDB Team
* handler/handler0alter.cc, include/row0merge.h, row/row0merge.c,
row/row0mysql.c:
Fix some locking issues, mainly in fast index creation. The
InnoDB data dictionary cache should be latched whenever a
transaction is holding locks on any data dictionary tables.
Otherwise, lock waits or deadlocks could occur. Furthermore, the
data dictionary transaction must be committed (and the locks
released) before the data dictionary latch is released.
ha_innobase::add_index(): Lock the data dictionary before renaming
or dropping the created indexes, because neither operation will
commit the data dictionary transaction.
ha_innobase::final_drop_index(): Commit the transactions before
unlocking the data dictionary.
2008-10-09 The InnoDB Team
* buf/buf0lru.c:
Fix Bug#39939 DROP TABLE/DISCARD TABLESPACE takes long time in
buf_LRU_invalidate_tablespace()
2008-10-08 The InnoDB Team
* dict/dict0crea.c, trx/trx0roll.c, include/row0mysql.h,
row/row0merge.c, row/row0mysql.c:
When dropping a table, hold the data dictionary latch until the
transaction has been committed. The data dictionary latch is
supposed to prevent lock waits and deadlocks in the data
dictionary tables. Due to this bug, DROP TABLE could cause a
deadlock or hang. Note that because of Bug#33650 and Bug#39833,
MySQL may also drop a (temporary) table when executing CREATE INDEX
or ALTER TABLE ... ADD INDEX.
2008-10-04 The InnoDB Team
* handler/ha_innodb.cc, mysql-test/innodb_bug39438-master.opt,
mysql-test/innodb_bug39438.result, mysql-test/innodb_bug39438.test:
Fix Bug#39438 Testcase for Bug#39436 crashes on 5.1 in
fil_space_get_latch
2008-10-04 The InnoDB Team
* include/lock0lock.h, lock/lock0lock.c,
mysql-test/innodb_bug38231.result, mysql-test/innodb_bug38231.test,
row/row0mysql.c:
Fix Bug#38231 Innodb crash in lock_reset_all_on_table() on TRUNCATE +
LOCK / UNLOCK
2008-10-04 The InnoDB Team
* handler/ha_innodb.cc:
Fix Bug#35498 Cannot get table test/table1 auto-inccounter value in
::info
2008-10-04 The InnoDB Team
* handler/ha_innodb.cc, handler/ha_innodb.h:
Fix Bug#37788 InnoDB Plugin: AUTO_INCREMENT wrong for compressed
tables
2008-10-04 The InnoDB Team
* dict/dict0dict.c, handler/ha_innodb.cc, handler/ha_innodb.h,
include/dict0dict.h, include/dict0mem.h, row/row0mysql.c:
Fix Bug#39830 Table autoinc value not updated on first insert
2008-10-03 The InnoDB Team
* mysql-test/innodb-index.test, mysql-test/innodb-index.result,
mysql-test/innodb-timeout.test, mysql-test/innodb-timeout.result,
srv/srv0srv.c, include/srv0srv.h, handler/ha_innodb.cc,
include/ha_prototypes.h:
Fix Bug#36285 innodb_lock_wait_timeout is not dynamic, not per session
2008-09-19 The InnoDB Team
* os/os0proc.c:
Fix a memory leak on Windows. The memory leak was due to wrong
parameters passed into VirtualFree() call. As the result, the
call fails with Windows error 87.
2008-09-17 The InnoDB Team
* mysql-test/innodb.result, mysql-test/innodb-zip.result,
mysql-test/innodb-zip.test, mysql-test/innodb.test, ibuf/ibuf0ibuf.c,
dict/dict0crea.c, dict/dict0load.c, dict/dict0boot.c,
include/dict0dict.h, include/trx0trx.h, dict/dict0dict.c,
trx/trx0trx.c, include/ha_prototypes.h, handler/ha_innodb.cc:
When creating an index in innodb_strict_mode, check that the
maximum record size will never exceed the B-tree page size limit.
For uncompressed tables, there should always be enough space for
two records in an empty B-tree page. For compressed tables, there
should be enough space for storing two node pointer records or one
data record in an empty page in uncompressed format.
The purpose of this check is to guarantee that INSERT or UPDATE
will never fail due to too big record size.
2008-09-17 The InnoDB Team
* btr/btr0cur.c, data/data0data.c, include/page0zip.h,
include/page0zip.ic, page/page0zip.c, mysql-test/innodb_bug36172.test:
Prevent infinite B-tree page splits in compressed tables by
ensuring that there will always be enough space for two node
pointer records in an empty B-tree page. Also, require that at
least one data record will fit in an empty compressed page. This
will reduce the maximum size of records in compressed tables.
2008-09-09 The InnoDB Team
* mysql-test/innodb.result:
Fix the failing innodb test by merging changes that MySQL made to
that file (r2646.12.1 in MySQL BZR repository)
2008-09-09 The InnoDB Team
* handler/ha_innodb.cc, mysql-test/innodb-autoinc.result,
mysql-test/innodb-autoinc.test:
Fix Bug#38839 auto increment does not work properly with InnoDB after
update
2008-09-09 The InnoDB Team
* dict/dict0dict.c, handler/handler0alter.cc, include/dict0dict.h,
mysql-test/innodb-index.result, mysql-test/innodb-index.test:
Fix Bug#38786 InnoDB plugin crashes on drop table/create table with FK
2008-08-21 The InnoDB Team
* handler/ha_innodb.cc, include/ha_prototypes.h, row/row0sel.c:
Fix Bug#37885 row_search_for_mysql may gap lock unnecessarily with SQL
comments in query
2008-08-21 The InnoDB Team
* handler/ha_innodb.cc:
Fix Bug#38185 ha_innobase::info can hold locks even when called with
HA_STATUS_NO_LOCK
2008-08-18 The InnoDB Team
* buf/buf0buf.c, buf/buf0lru.c, include/buf0buf.ic, include/univ.i:
Introduce UNIV_LRU_DEBUG for debugging the LRU buffer pool cache
2008-08-08 The InnoDB Team
* buf/buf0lru.c, include/buf0buf.h:
Fix two recovery bugs that could lead to a crash in debug builds with
small buffer size
2008-08-07 The InnoDB Team
* btr/btr0cur.c, handler/ha_innodb.cc, include/srv0srv.h,
srv/srv0srv.c:
Add a parameter innodb_stats_sample_pages to allow users to control
the number of index dives when InnoDB estimates the cardinality of
an index (ANALYZE TABLE, SHOW TABLE STATUS etc)
2008-08-07 The InnoDB Team
* trx/trx0i_s.c:
Fix a bug that would lead to a crash if a SELECT was issued from the
INFORMATION_SCHEMA tables and there are rolling back transactions at
the same time
2008-08-06 The InnoDB Team
* btr/btr0btr.c, btr/btr0cur.c, ibuf/ibuf0ibuf.c, include/btr0cur.h,
include/trx0roll.h, include/trx0types.h, row/row0purge.c,
row/row0uins.c, row/row0umod.c, trx/trx0roll.c:
In the rollback of incomplete transactions after crash recovery,
tolerate clustered index records whose externally stored columns
have not been written.
2008-07-30 The InnoDB Team
* trx/trx0trx.c:
Fixes a race in recovery where the recovery thread recovering a
PREPARED trx and the background rollback thread can both try
to free the trx after its status is set to COMMITTED_IN_MEMORY.
2008-07-29 The InnoDB Team
* include/trx0rec.h, row/row0purge.c, row/row0vers.c, trx/trx0rec.c:
Fix a BLOB corruption bug
2008-07-15 The InnoDB Team
* btr/btr0sea.c, dict/dict0dict.c, include/btr0sea.h:
Fixed a timing hole where a thread dropping an index can free the
in-memory index struct while another thread is still using that
structure to remove entries from adaptive hash index belonging
to one of the pages that belongs to the index being dropped.
2008-07-04 The InnoDB Team
* mysql-test/innodb-index.result:
Fix the failing innodb-index test by adjusting the result to a new
MySQL behavior (the change occured in BZR-r2667)
2008-07-03 The InnoDB Team
* mysql-test/innodb-zip.result, mysql-test/innodb-zip.test:
Remove the negative test cases that produce warnings
2008-07-02 The InnoDB Team
* mysql-test/innodb-replace.result, mysql-test/innodb-index.test:
Disable part of innodb-index test because MySQL changed its behavior
and is not calling ::add_index() anymore when adding primary index on
non-NULL column
2008-07-01 The InnoDB Team
* mysql-test/innodb-replace.result, mysql-test/innodb-replace.test:
Fix the failing innodb-replace test by merging changes that MySQL
made to that file (r2659 in MySQL BZR repository)
2008-07-01 The InnoDB Team
* lock/lock0lock.c:
Fix Bug#36942 Performance problem in lock_get_n_rec_locks (SHOW INNODB
STATUS)
2008-07-01 The InnoDB Team
* ha/ha0ha.c:
Fix Bug#36941 Performance problem in ha_print_info (SHOW INNODB
STATUS)
2008-07-01 The InnoDB Team
* handler/ha_innodb.cc, mysql-test/innodb-autoinc.result,
mysql-test/innodb-autoinc.test:
Fix Bug#37531 After truncate, auto_increment behaves incorrectly for
InnoDB
2008-06-19 The InnoDB Team
* handler/ha_innodb.cc:
Rewrite the function innodb_plugin_init() to support parameters in
different order (in static and dynamic InnoDB) and to support more
parameters in the static InnoDB
2008-06-19 The InnoDB Team
* handler/handler0alter.cc:
Fix a bug in ::add_index() which set the transaction state to "active"
but never restored it to the original value. This bug caused warnings
to be printed by the rpl.rpl_ddl mysql-test.
2008-06-19 The InnoDB Team
* mysql-test/patches:
Add a directory which contains patches, which need to be applied to
MySQL source in order to get some mysql-tests to succeed. The patches
cannot be committed in MySQL repository because they are specific to
the InnoDB plugin.
2008-06-19 The InnoDB Team
* mysql-test/innodb-zip.result, mysql-test/innodb-zip.test,
row/row0row.c:
Fix an anomaly when updating a record with BLOB prefix
2008-06-18 The InnoDB Team
* include/trx0sys.h, srv/srv0start.c, trx/trx0sys.c:
Fix a bug in recovery which was a side effect of the file_format_check
changes
2008-06-09 The InnoDB Team
* mysql-test/innodb.result:
Fix the failing innodb test by merging changes that MySQL made to that
file
2008-06-06 The InnoDB Team
* buf/buf0buf.c, handler/ha_innodb.cc, include/buf0buf.h,
include/srv0srv.h, srv/srv0srv.c:
Fix Bug#36600 SHOW STATUS takes a lot of CPU in
buf_get_latched_pages_number
* handler/ha_innodb.cc, os/os0file.c:
Fix Bug#11894 innodb_file_per_table crashes w/ Windows .sym symbolic
link hack
* include/ut0ut.h, srv/srv0srv.c, ut/ut0ut.c:
Fix Bug#36819 ut_usectime does not handle errors from gettimeofday
* handler/ha_innodb.cc:
Fix Bug#35602 Failed to read auto-increment value from storage engine
* srv/srv0start.c:
Fix Bug#36149 Read buffer overflow in srv0start.c found during "make
test"
2008-05-08 The InnoDB Team
* btr/btr0btr.c, mysql-test/innodb_bug36172.result,
mysql-test/innodb_bug36172.test:
Fix Bug#36172 insert into compressed innodb table crashes
2008-05-08 The InnoDB Team
InnoDB Plugin 1.0.1 released
2008-05-06 The InnoDB Team
* handler/ha_innodb.cc, include/srv0srv.h, include/sync0sync.h,
......@@ -51,7 +469,7 @@
2008-04-29 The InnoDB Team
* handler/i_s.cc, include/srv0start.h, srv/srv0start.c:
* handler/i_s.cc, include/srv0start.h, srv/srv0start.c:
Fix Bug#36310 InnoDB plugin crash
2008-04-23 The InnoDB Team
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This is the source of the InnoDB Plugin 1.0.1 for MySQL 5.1
This is the source of the InnoDB Plugin 1.0.2 for MySQL 5.1
===========================================================
Instructions for compiling the plugin:
......@@ -23,4 +23,7 @@ http://www.innodb.com/doc/innodb_plugin-1.0/innodb-plugin-installation.html
For more information about InnoDB visit
http://www.innodb.com
Please report any problems or issues with the plugin in the InnoDB Forums
http://forums.innodb.com/ or in the MySQL Bugs database http://bugs.mysql.com
Thank you for using the InnoDB plugin!
......@@ -78,6 +78,26 @@ make them consecutive on disk if possible. From the other file segment
we allocate pages for the non-leaf levels of the tree.
*/
#ifdef UNIV_BTR_DEBUG
/******************************************************************
Checks a file segment header within a B-tree root page. */
static
ibool
btr_root_fseg_validate(
/*===================*/
/* out: TRUE if valid */
const fseg_header_t* seg_header, /* in: segment header */
ulint space) /* in: tablespace identifier */
{
ulint offset = mach_read_from_2(seg_header + FSEG_HDR_OFFSET);
ut_a(mach_read_from_4(seg_header + FSEG_HDR_SPACE) == space);
ut_a(offset >= FIL_PAGE_DATA);
ut_a(offset <= UNIV_PAGE_SIZE - FIL_PAGE_DATA_END);
return(TRUE);
}
#endif /* UNIV_BTR_DEBUG */
/******************************************************************
Gets the root node of a tree and x-latches it. */
static
......@@ -100,6 +120,16 @@ btr_root_block_get(
block = btr_block_get(space, zip_size, root_page_no, RW_X_LATCH, mtr);
ut_a((ibool)!!page_is_comp(buf_block_get_frame(block))
== dict_table_is_comp(index->table));
#ifdef UNIV_BTR_DEBUG
if (!dict_index_is_ibuf(index)) {
const page_t* root = buf_block_get_frame(block);
ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+ root, space));
ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+ root, space));
}
#endif /* UNIV_BTR_DEBUG */
return(block);
}
......@@ -287,9 +317,7 @@ btr_page_alloc_for_ibuf(
dict_table_zip_size(index->table),
node_addr.page, RW_X_LATCH, mtr);
new_page = buf_block_get_frame(new_block);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
#endif /* UNIV_SYNC_DEBUG */
flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE,
......@@ -349,9 +377,7 @@ btr_page_alloc(
new_block = buf_page_get(dict_index_get_space(index),
dict_table_zip_size(index->table),
new_page_no, RW_X_LATCH, mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(new_block, SYNC_TREE_NODE_NEW);
#endif /* UNIV_SYNC_DEBUG */
return(new_block);
}
......@@ -709,9 +735,8 @@ btr_create(
space, 0,
IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(ibuf_hdr_block, SYNC_TREE_NODE_NEW);
#endif /* UNIV_SYNC_DEBUG */
ut_ad(buf_block_get_page_no(ibuf_hdr_block)
== IBUF_HEADER_PAGE_NO);
/* Allocate then the next page to the segment: it will be the
......@@ -740,9 +765,7 @@ btr_create(
page_no = buf_block_get_page_no(block);
frame = buf_block_get_frame(block);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
#endif /* UNIV_SYNC_DEBUG */
if (type & DICT_IBUF) {
/* It is an insert buffer tree: initialize the free list */
......@@ -757,9 +780,7 @@ btr_create(
PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr);
/* The fseg create acquires a second latch on the page,
therefore we must declare it: */
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW);
#endif /* UNIV_SYNC_DEBUG */
}
/* Create a new index page on the the allocated segment page */
......@@ -820,6 +841,12 @@ btr_free_but_not_root(
mtr_start(&mtr);
root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
#ifdef UNIV_BTR_DEBUG
ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+ root, space));
ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+ root, space));
#endif /* UNIV_BTR_DEBUG */
/* NOTE: page hash indexes are dropped when a page is freed inside
fsp0fsp. */
......@@ -836,6 +863,10 @@ btr_free_but_not_root(
mtr_start(&mtr);
root = btr_page_get(space, zip_size, root_page_no, RW_X_LATCH, &mtr);
#ifdef UNIV_BTR_DEBUG
ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+ root, space));
#endif /* UNIV_BTR_DEBUG */
finished = fseg_free_step_not_header(
root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr);
......@@ -868,6 +899,9 @@ btr_free_root(
btr_search_drop_page_hash_index(block);
header = buf_block_get_frame(block) + PAGE_HEADER + PAGE_BTR_SEG_TOP;
#ifdef UNIV_BTR_DEBUG
ut_a(btr_root_fseg_validate(header, space));
#endif /* UNIV_BTR_DEBUG */
while (!fseg_free_step(header, mtr));
}
......@@ -1104,8 +1138,18 @@ btr_root_raise_and_insert(
ut_a(!root_page_zip || page_zip_validate(root_page_zip, root));
#endif /* UNIV_ZIP_DEBUG */
index = btr_cur_get_index(cursor);
#ifdef UNIV_BTR_DEBUG
if (!dict_index_is_ibuf(index)) {
ulint space = dict_index_get_space(index);
ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+ root, space));
ut_a(btr_root_fseg_validate(FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+ root, space));
}
ut_ad(dict_index_get_page(index) == page_get_page_no(root));
ut_a(dict_index_get_page(index) == page_get_page_no(root));
#endif /* UNIV_BTR_DEBUG */
ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index),
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(mtr, root_block, MTR_MEMO_PAGE_X_FIX));
......@@ -1133,15 +1177,27 @@ btr_root_raise_and_insert(
/* Copy the records from root to the new page one by one. */
if (UNIV_UNLIKELY
if (0
#ifdef UNIV_ZIP_COPY
|| new_page_zip
#endif /* UNIV_ZIP_COPY */
|| UNIV_UNLIKELY
(!page_copy_rec_list_end(new_block, root_block,
page_get_infimum_rec(root),
index, mtr))) {
ut_a(new_page_zip);
/* Copy the page byte for byte. */
page_zip_copy(new_page_zip, new_page,
root_page_zip, root, index, mtr);
page_zip_copy_recs(new_page_zip, new_page,
root_page_zip, root, index, mtr);
/* Update the lock table and possible hash index. */
lock_move_rec_list_end(new_block, root_block,
page_get_infimum_rec(root));
btr_search_move_or_delete_hash_entries(new_block, root_block,
index);
}
/* If this is a pessimistic insert which is actually done to
......@@ -1794,7 +1850,35 @@ btr_page_split_and_insert(
} else {
direction = FSP_UP;
hint_page_no = page_no + 1;
split_rec = page_get_middle_rec(page);
if (page_get_n_recs(page) == 1) {
page_cur_t pcur;
/* There is only one record in the index page
therefore we can't split the node in the middle
by default. We need to determine whether the
new record will be inserted to the left or right. */
/* Read the first (and only) record in the page. */
page_cur_set_before_first(block, &pcur);
page_cur_move_to_next(&pcur);
first_rec = page_cur_get_rec(&pcur);
offsets = rec_get_offsets(
first_rec, cursor->index, offsets,
n_uniq, &heap);
/* If the new record is less than the existing record
the the split in the middle will copy the existing
record to the new node. */
if (cmp_dtuple_rec(tuple, first_rec, offsets) < 0) {
split_rec = page_get_middle_rec(page);
} else {
split_rec = NULL;
}
} else {
split_rec = page_get_middle_rec(page);
}
}
/* 2. Allocate a new page to the index */
......@@ -1867,7 +1951,11 @@ btr_page_split_and_insert(
if (direction == FSP_DOWN) {
/* fputs("Split left\n", stderr); */
if (UNIV_UNLIKELY
if (0
#ifdef UNIV_ZIP_COPY
|| page_zip
#endif /* UNIV_ZIP_COPY */
|| UNIV_UNLIKELY
(!page_move_rec_list_start(new_block, block, move_limit,
cursor->index, mtr))) {
/* For some reason, compressing new_page failed,
......@@ -1877,12 +1965,24 @@ btr_page_split_and_insert(
as appropriate. Deleting will always succeed. */
ut_a(new_page_zip);
page_zip_copy(new_page_zip, new_page,
page_zip, page, cursor->index, mtr);
page_zip_copy_recs(new_page_zip, new_page,
page_zip, page, cursor->index, mtr);
page_delete_rec_list_end(move_limit - page + new_page,
new_block, cursor->index,
ULINT_UNDEFINED,
ULINT_UNDEFINED, mtr);
/* Update the lock table and possible hash index. */
lock_move_rec_list_start(
new_block, block, move_limit,
new_page + PAGE_NEW_INFIMUM);
btr_search_move_or_delete_hash_entries(
new_block, block, cursor->index);
/* Delete the records from the source page. */
page_delete_rec_list_start(move_limit, block,
cursor->index, mtr);
}
......@@ -1894,7 +1994,11 @@ btr_page_split_and_insert(
} else {
/* fputs("Split right\n", stderr); */
if (UNIV_UNLIKELY
if (0
#ifdef UNIV_ZIP_COPY
|| page_zip
#endif /* UNIV_ZIP_COPY */
|| UNIV_UNLIKELY
(!page_move_rec_list_end(new_block, block, move_limit,
cursor->index, mtr))) {
/* For some reason, compressing new_page failed,
......@@ -1904,11 +2008,21 @@ btr_page_split_and_insert(
as appropriate. Deleting will always succeed. */
ut_a(new_page_zip);
page_zip_copy(new_page_zip, new_page,
page_zip, page, cursor->index, mtr);
page_zip_copy_recs(new_page_zip, new_page,
page_zip, page, cursor->index, mtr);
page_delete_rec_list_start(move_limit - page
+ new_page, new_block,
cursor->index, mtr);
/* Update the lock table and possible hash index. */
lock_move_rec_list_end(new_block, block, move_limit);
btr_search_move_or_delete_hash_entries(
new_block, block, cursor->index);
/* Delete the records from the source page. */
page_delete_rec_list_end(move_limit, block,
cursor->index,
ULINT_UNDEFINED,
......@@ -2174,7 +2288,7 @@ btr_node_ptr_delete(
/* Delete node pointer on father page */
btr_page_get_father(index, block, mtr, &cursor);
compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, FALSE,
compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, RB_NONE,
mtr);
ut_a(err == DB_SUCCESS);
......@@ -2257,7 +2371,11 @@ btr_lift_page_up(
btr_page_set_level(father_page, father_page_zip, page_level, mtr);
/* Copy the records to the father page one by one. */
if (UNIV_UNLIKELY
if (0
#ifdef UNIV_ZIP_COPY
|| father_page_zip
#endif /* UNIV_ZIP_COPY */
|| UNIV_UNLIKELY
(!page_copy_rec_list_end(father_block, block,
page_get_infimum_rec(page),
index, mtr))) {
......@@ -2267,20 +2385,31 @@ btr_lift_page_up(
ut_a(page_zip);
/* Copy the page byte for byte. */
page_zip_copy(father_page_zip, father_page,
page_zip, page, index, mtr);
page_zip_copy_recs(father_page_zip, father_page,
page_zip, page, index, mtr);
/* Update the lock table and possible hash index. */
lock_move_rec_list_end(father_block, block,
page_get_infimum_rec(page));
btr_search_move_or_delete_hash_entries(father_block, block,
index);
}
lock_update_copy_and_discard(father_block, block);
/* Go upward to root page, decrementing levels by one. */
for (i = 0; i < n_blocks; i++, page_level++) {
page_t* page = buf_block_get_frame(blocks[i]);
page_t* page = buf_block_get_frame(blocks[i]);
page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]);
ut_ad(btr_page_get_level(page, mtr) == page_level + 1);
btr_page_set_level(page, buf_block_get_page_zip(blocks[i]),
page_level, mtr);
btr_page_set_level(page, page_zip, page_level, mtr);
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip || page_zip_validate(page_zip, page));
#endif /* UNIV_ZIP_DEBUG */
}
/* Free the file page */
......@@ -2575,6 +2704,9 @@ btr_compress(
}
ut_ad(page_validate(merge_page, index));
#ifdef UNIV_ZIP_DEBUG
ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page));
#endif /* UNIV_ZIP_DEBUG */
/* Free the file page */
btr_page_free(index, block, mtr);
......@@ -2623,6 +2755,20 @@ btr_discard_only_page_on_level(
== dict_index_get_page(index))) {
/* The father is the root page */
#ifdef UNIV_BTR_DEBUG
if (!dict_index_is_ibuf(index)) {
const page_t* root
= buf_block_get_frame(father_block);
const ulint space
= dict_index_get_space(index);
ut_a(btr_root_fseg_validate(
FIL_PAGE_DATA + PAGE_BTR_SEG_LEAF
+ root, space));
ut_a(btr_root_fseg_validate(
FIL_PAGE_DATA + PAGE_BTR_SEG_TOP
+ root, space));
}
#endif /* UNIV_BTR_DEBUG */
btr_page_empty(father_block, father_page_zip, mtr, index);
/* We play safe and reset the free bits for the father */
......
......@@ -32,6 +32,7 @@ Created 10/16/1994 Heikki Tuuri
#include "btr0sea.h"
#include "row0upd.h"
#include "trx0rec.h"
#include "trx0roll.h" /* trx_is_recv() */
#include "que0que.h"
#include "row0row.h"
#include "srv0srv.h"
......@@ -55,10 +56,6 @@ can be released by page reorganize, then it is reorganized */
#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32)
/* When estimating number of different key values in an index, sample
this many index pages */
#define BTR_KEY_VAL_ESTIMATE_N_PAGES 8
/* The structure of a BLOB part header */
/*--------------------------------------*/
#define BTR_BLOB_HDR_PART_LEN 0 /* BLOB part len on this
......@@ -112,6 +109,7 @@ btr_rec_free_updated_extern_fields(
part will be updated, or NULL */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
const upd_t* update, /* in: update vector */
enum trx_rb_ctx rb_ctx, /* in: rollback context */
mtr_t* mtr); /* in: mini-transaction handle which contains
an X-latch to record page and to the tree */
/***************************************************************
......@@ -126,9 +124,7 @@ btr_rec_free_externally_stored_fields(
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
part will be updated, or NULL */
ibool do_not_free_inherited,/* in: TRUE if called in a
rollback and we do not want to free
inherited fields */
enum trx_rb_ctx rb_ctx, /* in: rollback context */
mtr_t* mtr); /* in: mini-transaction handle which contains
an X-latch to record page and to the index
tree */
......@@ -394,8 +390,8 @@ btr_cur_search_to_nth_level(
return;
}
#endif
#endif
#endif /* BTR_CUR_HASH_ADAPT */
#endif /* BTR_CUR_ADAPT */
btr_cur_n_non_sea++;
/* If the hash search did not succeed, do binary search down the
......@@ -468,8 +464,7 @@ btr_cur_search_to_nth_level(
block = buf_page_get_gen(space, zip_size, page_no,
rw_latch, guess, buf_mode,
__FILE__, __LINE__,
mtr);
__FILE__, __LINE__, mtr);
if (block == NULL) {
/* This must be a search to perform an insert;
try insert to the insert buffer */
......@@ -508,11 +503,10 @@ btr_cur_search_to_nth_level(
block->check_index_page_at_flush = TRUE;
#ifdef UNIV_SYNC_DEBUG
if (rw_latch != RW_NO_LATCH) {
buf_block_dbg_add_level(block, SYNC_TREE_NODE);
}
#endif
ut_ad(0 == ut_dulint_cmp(index->id,
btr_page_get_index_id(page)));
......@@ -690,8 +684,7 @@ btr_cur_open_at_index_side(
page_t* page;
block = buf_page_get_gen(space, zip_size, page_no,
RW_NO_LATCH, NULL, BUF_GET,
__FILE__, __LINE__,
mtr);
__FILE__, __LINE__, mtr);
page = buf_block_get_frame(block);
ut_ad(0 == ut_dulint_cmp(index->id,
btr_page_get_index_id(page)));
......@@ -810,8 +803,7 @@ btr_cur_open_at_rnd_pos(
block = buf_page_get_gen(space, zip_size, page_no,
RW_NO_LATCH, NULL, BUF_GET,
__FILE__, __LINE__,
mtr);
__FILE__, __LINE__, mtr);
page = buf_block_get_frame(block);
ut_ad(0 == ut_dulint_cmp(index->id,
btr_page_get_index_id(page)));
......@@ -854,7 +846,7 @@ btr_cur_open_at_rnd_pos(
/*****************************************************************
Inserts a record if there is enough space, or if enough space can
be freed by reorganizing. Differs from _optimistic_insert because
be freed by reorganizing. Differs from btr_cur_optimistic_insert because
no heuristics is applied to whether it pays to use CPU time for
reorganizing the page or not. */
static
......@@ -1058,7 +1050,8 @@ btr_cur_optimistic_insert(
/* Calculate the record size when entry is converted to a record */
rec_size = rec_get_converted_size(index, entry, n_ext);
if (page_zip_rec_needs_ext(rec_size, page_is_comp(page), zip_size)) {
if (page_zip_rec_needs_ext(rec_size, page_is_comp(page),
dtuple_get_n_fields(entry), zip_size)) {
/* The record is so big that we have to store some fields
externally on separate database pages */
......@@ -1072,6 +1065,45 @@ btr_cur_optimistic_insert(
rec_size = rec_get_converted_size(index, entry, n_ext);
}
if (UNIV_UNLIKELY(zip_size)) {
/* Estimate the free space of an empty compressed page.
Subtract one byte for the encoded heap_no in the
modification log. */
ulint free_space_zip = page_zip_empty_size(
cursor->index->n_fields, zip_size) - 1;
ulint n_uniq = dict_index_get_n_unique_in_tree(index);
ut_ad(dict_table_is_comp(index->table));
/* There should be enough room for two node pointer
records on an empty non-leaf page. This prevents
infinite page splits. */
if (UNIV_LIKELY(entry->n_fields >= n_uniq)
&& UNIV_UNLIKELY(REC_NODE_PTR_SIZE
+ rec_get_converted_size_comp_prefix(
index, entry->fields, n_uniq,
NULL)
/* On a compressed page, there is
a two-byte entry in the dense
page directory for every record.
But there is no record header. */
- (REC_N_NEW_EXTRA_BYTES - 2)
> free_space_zip / 2)) {
if (big_rec_vec) {
dtuple_convert_back_big_rec(
index, entry, big_rec_vec);
}
if (heap) {
mem_heap_free(heap);
}
return(DB_TOO_BIG_RECORD);
}
}
/* If there have been many consecutive inserts, and we are on the leaf
level, check if we have to split the page to reserve enough free space
for future updates of records. */
......@@ -1303,6 +1335,7 @@ btr_cur_pessimistic_insert(
if (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, n_ext),
dict_table_is_comp(index->table),
dict_index_get_n_fields(index),
zip_size)) {
/* The record is so big that we have to store some fields
externally on separate database pages */
......@@ -1326,45 +1359,6 @@ btr_cur_pessimistic_insert(
}
}
if (UNIV_UNLIKELY(zip_size)) {
/* Estimate the free space of an empty compressed page. */
ulint free_space_zip = page_zip_empty_size(
cursor->index->n_fields, zip_size);
if (UNIV_UNLIKELY(rec_get_converted_size(index, entry, n_ext)
> free_space_zip)) {
/* Try to insert the record by itself on a new page.
If it fails, no amount of splitting will help. */
buf_block_t* temp_block
= buf_block_alloc(zip_size);
page_t* temp_page
= page_create_zip(temp_block, index, 0, NULL);
page_cur_t temp_cursor;
rec_t* temp_rec;
page_cur_position(temp_page + PAGE_NEW_INFIMUM,
temp_block, &temp_cursor);
temp_rec = page_cur_tuple_insert(&temp_cursor,
entry, index,
n_ext, NULL);
buf_block_free(temp_block);
if (UNIV_UNLIKELY(!temp_rec)) {
if (big_rec_vec) {
dtuple_convert_back_big_rec(
index, entry, big_rec_vec);
}
if (heap) {
mem_heap_free(heap);
}
return(DB_TOO_BIG_RECORD);
}
}
}
if (dict_index_get_page(index)
== buf_block_get_page_no(btr_cur_get_block(cursor))) {
......@@ -2162,8 +2156,9 @@ btr_cur_pessimistic_update(
ut_ad(big_rec_vec == NULL);
btr_rec_free_updated_extern_fields(index, rec, page_zip,
offsets, update, mtr);
btr_rec_free_updated_extern_fields(
index, rec, page_zip, offsets, update,
trx_is_recv(trx) ? RB_RECOVERY : RB_NORMAL, mtr);
}
/* We have to set appropriate extern storage bits in the new
......@@ -2173,10 +2168,20 @@ btr_cur_pessimistic_update(
offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap);
n_ext += btr_push_update_extern_fields(new_entry, update, *heap);
if (page_zip_rec_needs_ext(rec_get_converted_size(index, new_entry,
n_ext),
page_is_comp(page), page_zip
? page_zip_get_size(page_zip) : 0)) {
if (UNIV_LIKELY_NULL(page_zip)) {
ut_ad(page_is_comp(page));
if (page_zip_rec_needs_ext(
rec_get_converted_size(index, new_entry, n_ext),
TRUE,
dict_index_get_n_fields(index),
page_zip_get_size(page_zip))) {
goto make_external;
}
} else if (page_zip_rec_needs_ext(
rec_get_converted_size(index, new_entry, n_ext),
page_is_comp(page), 0, 0)) {
make_external:
big_rec_vec = dtuple_convert_big_rec(index, new_entry, &n_ext);
if (UNIV_UNLIKELY(big_rec_vec == NULL)) {
......@@ -2655,7 +2660,7 @@ btr_cur_del_mark_set_sec_rec(
}
/***************************************************************
Sets a secondary index record delete mark to FALSE. This function is only
Clear a secondary index record's delete mark. This function is only
used by the insert buffer insert merge mechanism. */
UNIV_INTERN
void
......@@ -2811,7 +2816,7 @@ btr_cur_pessimistic_delete(
if compression does not occur, the cursor
stays valid: it points to successor of
deleted record on function exit */
ibool in_rollback,/* in: TRUE if called in rollback */
enum trx_rb_ctx rb_ctx, /* in: rollback context */
mtr_t* mtr) /* in: mtr */
{
buf_block_t* block;
......@@ -2865,14 +2870,14 @@ btr_cur_pessimistic_delete(
if (rec_offs_any_extern(offsets)) {
btr_rec_free_externally_stored_fields(index,
rec, offsets, page_zip,
in_rollback, mtr);
rb_ctx, mtr);
#ifdef UNIV_ZIP_DEBUG
ut_a(!page_zip || page_zip_validate(page_zip, page));
#endif /* UNIV_ZIP_DEBUG */
}
if (UNIV_UNLIKELY(page_get_n_recs(page) < 2)
&& UNIV_UNLIKELY(dict_index_get_page(btr_cur_get_index(cursor))
&& UNIV_UNLIKELY(dict_index_get_page(index)
!= buf_block_get_page_no(block))) {
/* If there is only one record, drop the whole page in
......@@ -3154,11 +3159,12 @@ btr_estimate_number_of_different_key_vals(
ulint matched_fields;
ulint matched_bytes;
ib_int64_t* n_diff;
ullint n_sample_pages; /* number of pages to sample */
ulint not_empty_flag = 0;
ulint total_external_size = 0;
ulint i;
ulint j;
ulint add_on;
ullint add_on;
mtr_t mtr;
mem_heap_t* heap = NULL;
ulint offsets_rec_[REC_OFFS_NORMAL_SIZE];
......@@ -3172,9 +3178,21 @@ btr_estimate_number_of_different_key_vals(
n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
/* It makes no sense to test more pages than are contained
in the index, thus we lower the number if it is too high */
if (srv_stats_sample_pages > index->stat_index_size) {
if (index->stat_index_size > 0) {
n_sample_pages = index->stat_index_size;
} else {
n_sample_pages = 1;
}
} else {
n_sample_pages = srv_stats_sample_pages;
}
/* We sample some pages in the index to get an estimate */
for (i = 0; i < BTR_KEY_VAL_ESTIMATE_N_PAGES; i++) {
for (i = 0; i < n_sample_pages; i++) {
rec_t* supremum;
mtr_start(&mtr);
......@@ -3182,7 +3200,7 @@ btr_estimate_number_of_different_key_vals(
/* Count the number of different key values for each prefix of
the key on this index page. If the prefix does not determine
the index record uniquely in te B-tree, then we subtract one
the index record uniquely in the B-tree, then we subtract one
because otherwise our algorithm would give a wrong estimate
for an index where there is just one key value. */
......@@ -3263,7 +3281,7 @@ btr_estimate_number_of_different_key_vals(
}
/* If we saw k borders between different key values on
BTR_KEY_VAL_ESTIMATE_N_PAGES leaf pages, we can estimate how many
n_sample_pages leaf pages, we can estimate how many
there will be in index->stat_n_leaf_pages */
/* We must take into account that our sample actually represents
......@@ -3274,26 +3292,26 @@ btr_estimate_number_of_different_key_vals(
index->stat_n_diff_key_vals[j]
= ((n_diff[j]
* (ib_int64_t)index->stat_n_leaf_pages
+ BTR_KEY_VAL_ESTIMATE_N_PAGES - 1
+ n_sample_pages - 1
+ total_external_size
+ not_empty_flag)
/ (BTR_KEY_VAL_ESTIMATE_N_PAGES
/ (n_sample_pages
+ total_external_size));
/* If the tree is small, smaller than
10 * BTR_KEY_VAL_ESTIMATE_N_PAGES + total_external_size, then
10 * n_sample_pages + total_external_size, then
the above estimate is ok. For bigger trees it is common that we
do not see any borders between key values in the few pages
we pick. But still there may be BTR_KEY_VAL_ESTIMATE_N_PAGES
we pick. But still there may be n_sample_pages
different key values, or even more. Let us try to approximate
that: */
add_on = index->stat_n_leaf_pages
/ (10 * (BTR_KEY_VAL_ESTIMATE_N_PAGES
/ (10 * (n_sample_pages
+ total_external_size));
if (add_on > BTR_KEY_VAL_ESTIMATE_N_PAGES) {
add_on = BTR_KEY_VAL_ESTIMATE_N_PAGES;
if (add_on > n_sample_pages) {
add_on = n_sample_pages;
}
index->stat_n_diff_key_vals[j] += add_on;
......@@ -3832,10 +3850,8 @@ btr_store_big_rec_extern_fields(
prev_block = buf_page_get(space_id, zip_size,
prev_page_no,
RW_X_LATCH, &mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(prev_block,
SYNC_EXTERN_STORAGE);
#endif /* UNIV_SYNC_DEBUG */
prev_page = buf_block_get_frame(prev_block);
if (UNIV_LIKELY_NULL(page_zip)) {
......@@ -3930,10 +3946,9 @@ btr_store_big_rec_extern_fields(
rec_block = buf_page_get(space_id, zip_size,
rec_page_no,
RW_X_LATCH, &mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(rec_block,
SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */
if (err == Z_STREAM_END) {
mach_write_to_4(field_ref
+ BTR_EXTERN_LEN, 0);
......@@ -4009,10 +4024,8 @@ btr_store_big_rec_extern_fields(
rec_block = buf_page_get(space_id, zip_size,
rec_page_no,
RW_X_LATCH, &mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(rec_block,
SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */
mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0,
MLOG_4BYTES, &mtr);
......@@ -4084,9 +4097,7 @@ btr_free_externally_stored_field(
to rec, or NULL if rec == NULL */
ulint i, /* in: field number of field_ref;
ignored if rec == NULL */
ibool do_not_free_inherited,/* in: TRUE if called in a
rollback and we do not want to free
inherited fields */
enum trx_rb_ctx rb_ctx, /* in: rollback context */
mtr_t* local_mtr __attribute__((unused))) /* in: mtr
containing the latch to data an an
X-latch to the index tree */
......@@ -4116,6 +4127,15 @@ btr_free_externally_stored_field(
}
#endif /* UNIV_DEBUG */
if (UNIV_UNLIKELY(!memcmp(field_ref, field_ref_zero,
BTR_EXTERN_FIELD_REF_SIZE))) {
/* In the rollback of uncommitted transactions, we may
encounter a clustered index record whose BLOBs have
not been written. There is nothing to free then. */
ut_a(rb_ctx == RB_RECOVERY);
return;
}
space_id = mach_read_from_4(field_ref + BTR_EXTERN_SPACE_ID);
if (UNIV_UNLIKELY(space_id != dict_index_get_space(index))) {
......@@ -4149,9 +4169,7 @@ btr_free_externally_stored_field(
page_get_page_no(
page_align(field_ref)),
RW_X_LATCH, &mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(rec_block, SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */
page_no = mach_read_from_4(field_ref + BTR_EXTERN_PAGE_NO);
if (/* There is no external storage data */
......@@ -4160,7 +4178,7 @@ btr_free_externally_stored_field(
|| (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
& BTR_EXTERN_OWNER_FLAG)
/* Rollback and inherited field */
|| (do_not_free_inherited
|| (rb_ctx != RB_NONE
&& (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
& BTR_EXTERN_INHERITED_FLAG))) {
......@@ -4172,9 +4190,7 @@ btr_free_externally_stored_field(
ext_block = buf_page_get(space_id, ext_zip_size, page_no,
RW_X_LATCH, &mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(ext_block, SYNC_EXTERN_STORAGE);
#endif /* UNIV_SYNC_DEBUG */
page = buf_block_get_frame(ext_block);
if (ext_zip_size) {
......@@ -4208,14 +4224,8 @@ btr_free_externally_stored_field(
MLOG_4BYTES, &mtr);
}
} else {
ulint extern_len = mach_read_from_4(
field_ref + BTR_EXTERN_LEN + 4);
ulint part_len = btr_blob_get_part_len(
page + FIL_PAGE_DATA);
ut_a(fil_page_get_type(page) == FIL_PAGE_TYPE_BLOB);
ut_a(!page_zip);
ut_a(extern_len >= part_len);
next_page_no = mach_read_from_4(
page + FIL_PAGE_DATA
......@@ -4233,16 +4243,14 @@ btr_free_externally_stored_field(
mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO,
next_page_no,
MLOG_4BYTES, &mtr);
/* Zero out the BLOB length. If the server
crashes during the execution of this function,
trx_rollback_or_clean_all_recovered() could
dereference the half-deleted BLOB, fetching a
wrong prefix for the BLOB. */
mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4,
extern_len - part_len,
0,
MLOG_4BYTES, &mtr);
if (next_page_no == FIL_NULL) {
ut_a(extern_len - part_len == 0);
}
if (extern_len - part_len == 0) {
ut_a(next_page_no == FIL_NULL);
}
}
/* Commit mtr and release the BLOB block to save memory. */
......@@ -4262,9 +4270,7 @@ btr_rec_free_externally_stored_fields(
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
page_zip_des_t* page_zip,/* in: compressed page whose uncompressed
part will be updated, or NULL */
ibool do_not_free_inherited,/* in: TRUE if called in a
rollback and we do not want to free
inherited fields */
enum trx_rb_ctx rb_ctx, /* in: rollback context */
mtr_t* mtr) /* in: mini-transaction handle which contains
an X-latch to record page and to the index
tree */
......@@ -4288,8 +4294,7 @@ btr_rec_free_externally_stored_fields(
btr_free_externally_stored_field(
index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
rec, offsets, page_zip, i,
do_not_free_inherited, mtr);
rec, offsets, page_zip, i, rb_ctx, mtr);
}
}
}
......@@ -4308,6 +4313,7 @@ btr_rec_free_updated_extern_fields(
part will be updated, or NULL */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
const upd_t* update, /* in: update vector */
enum trx_rb_ctx rb_ctx, /* in: rollback context */
mtr_t* mtr) /* in: mini-transaction handle which contains
an X-latch to record page and to the tree */
{
......@@ -4333,7 +4339,7 @@ btr_rec_free_updated_extern_fields(
btr_free_externally_stored_field(
index, data + len - BTR_EXTERN_FIELD_REF_SIZE,
rec, offsets, page_zip,
ufield->field_no, TRUE, mtr);
ufield->field_no, rb_ctx, mtr);
}
}
}
......@@ -4366,9 +4372,7 @@ btr_copy_blob_prefix(
mtr_start(&mtr);
block = buf_page_get(space_id, 0, page_no, RW_S_LATCH, &mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_EXTERN_STORAGE);
#endif /* UNIV_SYNC_DEBUG */
page = buf_block_get_frame(block);
/* Unfortunately, FIL_PAGE_TYPE was uninitialized for
......@@ -4584,7 +4588,9 @@ UNIV_INTERN
ulint
btr_copy_externally_stored_field_prefix(
/*====================================*/
/* out: the length of the copied field */
/* out: the length of the copied field,
or 0 if the column was being or has been
deleted */
byte* buf, /* out: the field, or a prefix of it */
ulint len, /* in: length of buf, in bytes */
ulint zip_size,/* in: nonzero=compressed BLOB page size,
......@@ -4613,6 +4619,14 @@ btr_copy_externally_stored_field_prefix(
ut_a(memcmp(data, field_ref_zero, BTR_EXTERN_FIELD_REF_SIZE));
if (!mach_read_from_4(data + BTR_EXTERN_LEN + 4)) {
/* The externally stored part of the column has been
(partially) deleted. Signal the half-deleted BLOB
to the caller. */
return(0);
}
space_id = mach_read_from_4(data + BTR_EXTERN_SPACE_ID);
page_no = mach_read_from_4(data + BTR_EXTERN_PAGE_NO);
......
......@@ -208,6 +208,7 @@ btr_pcur_restore_position(
|| UNIV_UNLIKELY(cursor->pos_state != BTR_PCUR_WAS_POSITIONED
&& cursor->pos_state != BTR_PCUR_IS_POSITIONED)) {
ut_print_buf(stderr, cursor, sizeof(btr_pcur_t));
putc('\n', stderr);
if (cursor->trx_if_known) {
trx_print(stderr, cursor->trx_if_known, 0);
}
......@@ -243,10 +244,10 @@ btr_pcur_restore_position(
cursor->block_when_stored,
cursor->modify_clock, mtr))) {
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(btr_pcur_get_block(cursor),
SYNC_TREE_NODE);
#endif /* UNIV_SYNC_DEBUG */
if (cursor->rel_pos == BTR_PCUR_ON) {
#ifdef UNIV_DEBUG
const rec_t* rec;
......
......@@ -188,6 +188,7 @@ btr_search_info_create(
info->magic_n = BTR_SEARCH_MAGIC_N;
#endif /* UNIV_DEBUG */
info->ref_count = 0;
info->root_guess = NULL;
info->hash_analysis = 0;
......@@ -211,6 +212,32 @@ btr_search_info_create(
return(info);
}
/*********************************************************************
Returns the value of ref_count. The value is protected by
btr_search_latch. */
UNIV_INTERN
ulint
btr_search_info_get_ref_count(
/*==========================*/
/* out: ref_count value. */
btr_search_t* info) /* in: search info. */
{
ulint ret;
ut_ad(info);
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
rw_lock_s_lock(&btr_search_latch);
ret = info->ref_count;
rw_lock_s_unlock(&btr_search_latch);
return(ret);
}
/*************************************************************************
Updates the search info of an index about hash successes. NOTE that info
is NOT protected by any semaphore, to save CPU time! Do not assume its fields
......@@ -818,9 +845,7 @@ btr_search_guess_on_hash(
rw_lock_s_unlock(&btr_search_latch);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
#endif /* UNIV_SYNC_DEBUG */
}
if (UNIV_UNLIKELY(buf_block_get_state(block)
......@@ -944,21 +969,21 @@ btr_search_drop_page_hash_index(
for which we know that
block->buf_fix_count == 0 */
{
hash_table_t* table;
ulint n_fields;
ulint n_bytes;
page_t* page;
rec_t* rec;
ulint fold;
ulint prev_fold;
dulint index_id;
ulint n_cached;
ulint n_recs;
ulint* folds;
ulint i;
mem_heap_t* heap;
dict_index_t* index;
ulint* offsets;
hash_table_t* table;
ulint n_fields;
ulint n_bytes;
const page_t* page;
const rec_t* rec;
ulint fold;
ulint prev_fold;
dulint index_id;
ulint n_cached;
ulint n_recs;
ulint* folds;
ulint i;
mem_heap_t* heap;
const dict_index_t* index;
ulint* offsets;
#ifdef UNIV_SYNC_DEBUG
ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
......@@ -1007,7 +1032,7 @@ btr_search_drop_page_hash_index(
n_cached = 0;
rec = page_get_infimum_rec(page);
rec = page_rec_get_next(rec);
rec = page_rec_get_next_low(rec, page_is_comp(page));
index_id = btr_page_get_index_id(page);
......@@ -1035,7 +1060,7 @@ btr_search_drop_page_hash_index(
folds[n_cached] = fold;
n_cached++;
next_rec:
rec = page_rec_get_next(rec);
rec = page_rec_get_next_low(rec, page_rec_is_comp(rec));
prev_fold = fold;
}
......@@ -1070,8 +1095,12 @@ btr_search_drop_page_hash_index(
ha_remove_all_nodes_to_page(table, folds[i], page);
}
ut_a(index->search_info->ref_count > 0);
index->search_info->ref_count--;
block->is_hashed = FALSE;
block->index = NULL;
cleanup:
#ifdef UNIV_DEBUG
if (UNIV_UNLIKELY(block->n_pointers)) {
......@@ -1127,9 +1156,7 @@ btr_search_drop_page_hash_when_freed(
BUF_GET_IF_IN_POOL, __FILE__, __LINE__,
&mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_TREE_NODE_FROM_HASH);
#endif /* UNIV_SYNC_DEBUG */
btr_search_drop_page_hash_index(block);
......@@ -1295,6 +1322,15 @@ btr_search_build_page_hash_index(
goto exit_func;
}
/* This counter is decremented every time we drop page
hash index entries and is incremented here. Since we can
rebuild hash index for a page that is already hashed, we
have to take care not to increment the counter in that
case. */
if (!block->is_hashed) {
index->search_info->ref_count++;
}
block->is_hashed = TRUE;
block->n_hash_helps = 0;
......
......@@ -406,7 +406,7 @@ buf_page_is_corrupted(
}
/* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id
(always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */
(always equal to 0), to FIL_PAGE_SPACE_OR_CHKSUM */
if (checksum_field != 0
&& checksum_field != BUF_NO_CHECKSUM_MAGIC
......@@ -443,7 +443,7 @@ buf_page_print(
fprintf(stderr, " InnoDB: Page dump in ascii and hex (%lu bytes):\n",
(ulong) size);
ut_print_buf(stderr, read_buf, size);
fputs("InnoDB: End of page dump\n", stderr);
fputs("\nInnoDB: End of page dump\n", stderr);
if (zip_size) {
/* Print compressed page. */
......@@ -1053,6 +1053,14 @@ buf_relocate(
if (UNIV_UNLIKELY(buf_pool->LRU_old == bpage)) {
buf_pool->LRU_old = dpage;
#ifdef UNIV_LRU_DEBUG
/* buf_pool->LRU_old must be the first item in the LRU list
whose "old" flag is set. */
ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
|| !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
|| UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
#endif /* UNIV_LRU_DEBUG */
}
ut_d(UT_LIST_VALIDATE(LRU, buf_page_t, buf_pool->LRU));
......@@ -2193,9 +2201,8 @@ buf_page_optimistic_get_func(
}
if (UNIV_UNLIKELY(modify_clock != block->modify_clock)) {
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */
if (rw_latch == RW_S_LATCH) {
rw_lock_s_unlock(&(block->lock));
} else {
......@@ -2395,9 +2402,8 @@ buf_page_try_get_func(
#ifdef UNIV_DEBUG_FILE_ACCESSES
ut_a(block->page.file_page_was_freed == FALSE);
#endif /* UNIV_DEBUG_FILE_ACCESSES */
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
#endif /* UNIV_SYNC_DEBUG */
buf_pool->n_page_gets++;
return(block);
......@@ -3070,9 +3076,6 @@ buf_page_io_complete(
ut_error;
}
mutex_exit(buf_page_get_mutex(bpage));
buf_pool_mutex_exit();
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
fprintf(stderr, "Has %s page space %lu page no %lu\n",
......@@ -3081,6 +3084,9 @@ buf_page_io_complete(
(ulong) buf_page_get_page_no(bpage));
}
#endif /* UNIV_DEBUG */
mutex_exit(buf_page_get_mutex(bpage));
buf_pool_mutex_exit();
}
/*************************************************************************
......@@ -3446,6 +3452,7 @@ buf_print(void)
}
#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_DEBUG
/*************************************************************************
Returns the number of latched pages in the buffer pool. */
UNIV_INTERN
......@@ -3532,6 +3539,7 @@ buf_get_latched_pages_number(void)
return(fixed_pages_number);
}
#endif /* UNIV_DEBUG */
/*************************************************************************
Returns the number of pending buf pool ios. */
......
......@@ -168,6 +168,7 @@ buf_flush_ready_for_replace(
" in the LRU list!\n",
(ulong) buf_page_get_state(bpage));
ut_print_buf(stderr, bpage, sizeof(buf_page_t));
putc('\n', stderr);
return(FALSE);
}
......@@ -634,6 +635,13 @@ buf_flush_init_for_writing(
return;
}
ut_print_timestamp(stderr);
fputs(" InnoDB: ERROR: The compressed page to be written"
" seems corrupt:", stderr);
ut_print_buf(stderr, page, zip_size);
fputs("\nInnoDB: Possibly older version of the page:", stderr);
ut_print_buf(stderr, page_zip->data, zip_size);
putc('\n', stderr);
ut_error;
}
......
......@@ -44,6 +44,11 @@ initial segment in buf_LRU_get_recent_limit */
#define BUF_LRU_INITIAL_RATIO 8
/* When dropping the search hash index entries before deleting an ibd
file, we build a local array of pages belonging to that tablespace
in the buffer pool. Following is the size of that array. */
#define BUF_LRU_DROP_SEARCH_HASH_SIZE 1024
/* If we switch on the InnoDB monitor because there are too few available
frames in the buffer pool, we set this to TRUE */
UNIV_INTERN ibool buf_lru_switched_on_innodb_mon = FALSE;
......@@ -157,6 +162,133 @@ buf_LRU_evict_from_unzip_LRU(void)
return(unzip_avg <= io_avg * BUF_LRU_IO_TO_UNZIP_FACTOR);
}
/**********************************************************************
Attempts to drop page hash index on a batch of pages belonging to a
particular space id. */
static
void
buf_LRU_drop_page_hash_batch(
/*=========================*/
ulint space_id, /* in: space id */
ulint zip_size, /* in: compressed page size in bytes
or 0 for uncompressed pages */
const ulint* arr, /* in: array of page_no */
ulint count) /* in: number of entries in array */
{
ulint i;
ut_ad(arr != NULL);
ut_ad(count <= BUF_LRU_DROP_SEARCH_HASH_SIZE);
for (i = 0; i < count; ++i) {
btr_search_drop_page_hash_when_freed(space_id, zip_size,
arr[i]);
}
}
/**********************************************************************
When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page
hash index entries belonging to that table. This function tries to
do that in batch. Note that this is a 'best effort' attempt and does
not guarantee that ALL hash entries will be removed. */
static
void
buf_LRU_drop_page_hash_for_tablespace(
/*==================================*/
ulint id) /* in: space id */
{
buf_page_t* bpage;
ulint* page_arr;
ulint num_entries;
ulint zip_size;
zip_size = fil_space_get_zip_size(id);
if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
/* Somehow, the tablespace does not exist. Nothing to drop. */
ut_ad(0);
return;
}
page_arr = ut_malloc(sizeof(ulint)
* BUF_LRU_DROP_SEARCH_HASH_SIZE);
buf_pool_mutex_enter();
scan_again:
num_entries = 0;
bpage = UT_LIST_GET_LAST(buf_pool->LRU);
while (bpage != NULL) {
mutex_t* block_mutex = buf_page_get_mutex(bpage);
buf_page_t* prev_bpage;
mutex_enter(block_mutex);
prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
ut_a(buf_page_in_file(bpage));
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE
|| bpage->space != id
|| bpage->buf_fix_count > 0
|| bpage->io_fix != BUF_IO_NONE) {
/* We leave the fixed pages as is in this scan.
To be dealt with later in the final scan. */
mutex_exit(block_mutex);
goto next_page;
}
if (((buf_block_t*) bpage)->is_hashed) {
/* Store the offset(i.e.: page_no) in the array
so that we can drop hash index in a batch
later. */
page_arr[num_entries] = bpage->offset;
mutex_exit(block_mutex);
ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
++num_entries;
if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
goto next_page;
}
/* Array full. We release the buf_pool->mutex to
obey the latching order. */
buf_pool_mutex_exit();
buf_LRU_drop_page_hash_batch(id, zip_size, page_arr,
num_entries);
num_entries = 0;
buf_pool_mutex_enter();
} else {
mutex_exit(block_mutex);
}
next_page:
/* Note that we may have released the buf_pool mutex
above after reading the prev_bpage during processing
of a page_hash_batch (i.e.: when the array was full).
This means that prev_bpage can change in LRU list.
This is OK because this function is a 'best effort'
to drop as many search hash entries as possible and
it does not guarantee that ALL such entries will be
dropped. */
bpage = prev_bpage;
/* If, however, bpage has been removed from LRU list
to the free list then we should restart the scan.
bpage->state is protected by buf_pool mutex. */
if (bpage && !buf_page_in_file(bpage)) {
ut_a(num_entries == 0);
goto scan_again;
}
}
buf_pool_mutex_exit();
/* Drop any remaining batch of search hashed pages. */
buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
ut_free(page_arr);
}
/**********************************************************************
Invalidates all pages belonging to a given tablespace when we are deleting
the data file(s) of that tablespace. */
......@@ -170,6 +302,14 @@ buf_LRU_invalidate_tablespace(
ulint page_no;
ibool all_freed;
/* Before we attempt to drop pages one by one we first
attempt to drop page hash index entries in batches to make
it more efficient. The batching attempt is a best effort
attempt and does not guarantee that all pages hash entries
will be dropped. We get rid of remaining page hash entries
one by one below. */
buf_LRU_drop_page_hash_for_tablespace(id);
scan_again:
buf_pool_mutex_enter();
......@@ -632,7 +772,7 @@ buf_LRU_get_free_block(
if (!buf_lru_switched_on_innodb_mon) {
/* Over 67 % of the buffer pool is occupied by lock
/* Over 67 % of the buffer pool is occupied by lock
heaps or the adaptive hash index. This may be a memory
leak! */
......@@ -712,7 +852,7 @@ buf_LRU_get_free_block(
if (n_iterations > 30) {
ut_print_timestamp(stderr);
fprintf(stderr,
"InnoDB: Warning: difficult to find free blocks from\n"
" InnoDB: Warning: difficult to find free blocks in\n"
"InnoDB: the buffer pool (%lu search iterations)!"
" Consider\n"
"InnoDB: increasing the buffer pool size.\n"
......@@ -790,12 +930,25 @@ buf_LRU_old_adjust_len(void)
#if 3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5
# error "3 * (BUF_LRU_OLD_MIN_LEN / 8) <= BUF_LRU_OLD_TOLERANCE + 5"
#endif
#ifdef UNIV_LRU_DEBUG
/* buf_pool->LRU_old must be the first item in the LRU list
whose "old" flag is set. */
ut_a(buf_pool->LRU_old->old);
ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
|| !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
|| UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
#endif /* UNIV_LRU_DEBUG */
for (;;) {
old_len = buf_pool->LRU_old_len;
new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8);
ut_ad(buf_pool->LRU_old->in_LRU_list);
ut_a(buf_pool->LRU_old);
#ifdef UNIV_LRU_DEBUG
ut_a(buf_pool->LRU_old->old);
#endif /* UNIV_LRU_DEBUG */
/* Update the LRU_old pointer if necessary */
......@@ -803,6 +956,9 @@ buf_LRU_old_adjust_len(void)
buf_pool->LRU_old = UT_LIST_GET_PREV(
LRU, buf_pool->LRU_old);
#ifdef UNIV_LRU_DEBUG
ut_a(!buf_pool->LRU_old->old);
#endif /* UNIV_LRU_DEBUG */
buf_page_set_old(buf_pool->LRU_old, TRUE);
buf_pool->LRU_old_len++;
......@@ -813,8 +969,6 @@ buf_LRU_old_adjust_len(void)
LRU, buf_pool->LRU_old);
buf_pool->LRU_old_len--;
} else {
ut_a(buf_pool->LRU_old); /* Check that we did not
fall out of the LRU list */
return;
}
}
......@@ -901,6 +1055,9 @@ buf_LRU_remove_block(
buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, bpage);
ut_a(buf_pool->LRU_old);
#ifdef UNIV_LRU_DEBUG
ut_a(!buf_pool->LRU_old->old);
#endif /* UNIV_LRU_DEBUG */
buf_page_set_old(buf_pool->LRU_old, TRUE);
buf_pool->LRU_old_len++;
......@@ -974,8 +1131,6 @@ buf_LRU_add_block_to_end_low(
ut_a(buf_page_in_file(bpage));
buf_page_set_old(bpage, TRUE);
last_bpage = UT_LIST_GET_LAST(buf_pool->LRU);
if (last_bpage) {
......@@ -988,6 +1143,8 @@ buf_LRU_add_block_to_end_low(
UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
ut_d(bpage->in_LRU_list = TRUE);
buf_page_set_old(bpage, TRUE);
if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) {
buf_pool->LRU_old_len++;
......@@ -1035,8 +1192,6 @@ buf_LRU_add_block_low(
ut_a(buf_page_in_file(bpage));
ut_ad(!bpage->in_LRU_list);
buf_page_set_old(bpage, old);
if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) {
UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, bpage);
......@@ -1044,6 +1199,15 @@ buf_LRU_add_block_low(
bpage->LRU_position = buf_pool_clock_tic();
bpage->freed_page_clock = buf_pool->freed_page_clock;
} else {
#ifdef UNIV_LRU_DEBUG
/* buf_pool->LRU_old must be the first item in the LRU list
whose "old" flag is set. */
ut_a(buf_pool->LRU_old->old);
ut_a(!UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)
|| !UT_LIST_GET_PREV(LRU, buf_pool->LRU_old)->old);
ut_a(!UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)
|| UT_LIST_GET_NEXT(LRU, buf_pool->LRU_old)->old);
#endif /* UNIV_LRU_DEBUG */
UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old,
bpage);
buf_pool->LRU_old_len++;
......@@ -1056,6 +1220,8 @@ buf_LRU_add_block_low(
ut_d(bpage->in_LRU_list = TRUE);
buf_page_set_old(bpage, old);
if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
ut_ad(buf_pool->LRU_old);
......@@ -1246,6 +1412,21 @@ buf_LRU_free_block(
if (buf_page_is_old(b)) {
buf_pool->LRU_old_len++;
if (UNIV_UNLIKELY
(buf_pool->LRU_old
== UT_LIST_GET_NEXT(LRU, b))) {
buf_pool->LRU_old = b;
}
#ifdef UNIV_LRU_DEBUG
ut_a(prev_b->old
|| !UT_LIST_GET_NEXT(LRU, b)
|| UT_LIST_GET_NEXT(LRU, b)->old);
} else {
ut_a(!prev_b->old
|| !UT_LIST_GET_NEXT(LRU, b)
|| !UT_LIST_GET_NEXT(LRU, b)->old);
#endif /* UNIV_LRU_DEBUG */
}
lru_len = UT_LIST_GET_LEN(buf_pool->LRU);
......@@ -1455,6 +1636,8 @@ buf_LRU_block_remove_hashed_page(
buf_block_modify_clock_inc((buf_block_t*) bpage);
if (bpage->zip.data) {
const page_t* page = ((buf_block_t*) bpage)->frame;
const ulint zip_size
= page_zip_get_size(&bpage->zip);
ut_a(!zip || bpage->oldest_modification == 0);
......@@ -1472,7 +1655,7 @@ buf_LRU_block_remove_hashed_page(
to the compressed page, which will
be preserved. */
memcpy(bpage->zip.data, page,
page_zip_get_size(&bpage->zip));
zip_size);
}
break;
case FIL_PAGE_TYPE_ZBLOB:
......@@ -1484,6 +1667,15 @@ buf_LRU_block_remove_hashed_page(
#endif /* UNIV_ZIP_DEBUG */
break;
default:
ut_print_timestamp(stderr);
fputs(" InnoDB: ERROR: The compressed page"
" to be evicted seems corrupt:", stderr);
ut_print_buf(stderr, page, zip_size);
fputs("\nInnoDB: Possibly older version"
" of the page:", stderr);
ut_print_buf(stderr, bpage->zip.data,
zip_size);
putc('\n', stderr);
ut_error;
}
......
......@@ -607,6 +607,7 @@ dtuple_convert_big_rec(
while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry,
*n_ext),
dict_table_is_comp(index->table),
dict_index_get_n_fields(index),
dict_table_zip_size(index->table))) {
ulint i;
ulint longest = 0;
......
......@@ -39,9 +39,9 @@ dict_hdr_get(
block = buf_page_get(DICT_HDR_SPACE, 0, DICT_HDR_PAGE_NO,
RW_X_LATCH, mtr);
header = DICT_HDR + buf_block_get_frame(block);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_DICT_HEADER);
#endif /* UNIV_SYNC_DEBUG */
return(header);
}
......@@ -279,7 +279,8 @@ dict_boot(void)
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_TABLES,
MLOG_4BYTES, &mtr));
MLOG_4BYTES, &mtr),
FALSE);
ut_a(error == DB_SUCCESS);
/*-------------------------*/
......@@ -291,7 +292,8 @@ dict_boot(void)
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_TABLE_IDS,
MLOG_4BYTES, &mtr));
MLOG_4BYTES, &mtr),
FALSE);
ut_a(error == DB_SUCCESS);
/*-------------------------*/
......@@ -322,7 +324,8 @@ dict_boot(void)
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_COLUMNS,
MLOG_4BYTES, &mtr));
MLOG_4BYTES, &mtr),
FALSE);
ut_a(error == DB_SUCCESS);
/*-------------------------*/
......@@ -363,7 +366,8 @@ dict_boot(void)
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_INDEXES,
MLOG_4BYTES, &mtr));
MLOG_4BYTES, &mtr),
FALSE);
ut_a(error == DB_SUCCESS);
/*-------------------------*/
......@@ -389,7 +393,8 @@ dict_boot(void)
error = dict_index_add_to_cache(table, index,
mtr_read_ulint(dict_hdr
+ DICT_HDR_FIELDS,
MLOG_4BYTES, &mtr));
MLOG_4BYTES, &mtr),
FALSE);
ut_a(error == DB_SUCCESS);
mtr_commit(&mtr);
......
......@@ -216,8 +216,6 @@ dict_build_table_def_step(
const char* path_or_name;
ibool is_path;
mtr_t mtr;
ulint i;
ulint row_len;
ut_ad(mutex_own(&(dict_sys->mutex)));
......@@ -227,14 +225,6 @@ dict_build_table_def_step(
thr_get_trx(thr)->table_id = table->id;
row_len = 0;
for (i = 0; i < table->n_def; i++) {
row_len += dict_col_get_min_size(&table->cols[i]);
}
if (row_len > BTR_PAGE_MAX_REC_SIZE) {
return(DB_TOO_BIG_RECORD);
}
if (srv_file_per_table) {
/* We create a new single-table tablespace for the table.
We initially let it be 4 pages:
......@@ -543,11 +533,7 @@ dict_build_index_def_step(
ut_ad((UT_LIST_GET_LEN(table->indexes) > 0)
|| dict_index_is_clust(index));
/* For fast index creation we have already allocated an index id
for this index so that we could write an UNDO log record for it.*/
if (ut_dulint_is_zero(index->id)) {
index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID);
}
index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID);
/* Inherit the space id from the table; we store all indexes of a
table in the same tablespace */
......@@ -1093,7 +1079,7 @@ dict_create_index_step(
dulint index_id = node->index->id;
err = dict_index_add_to_cache(node->table, node->index,
FIL_NULL);
FIL_NULL, trx_is_strict(trx));
node->index = dict_index_get_if_in_cache_low(index_id);
ut_a(!node->index == (err != DB_SUCCESS));
......@@ -1239,7 +1225,6 @@ dict_create_or_check_foreign_constraint_tables(void)
" FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n"
"CREATE UNIQUE CLUSTERED INDEX ID_IND"
" ON SYS_FOREIGN_COLS (ID, POS);\n"
"COMMIT WORK;\n"
"END;\n"
, FALSE, trx);
......@@ -1262,7 +1247,7 @@ dict_create_or_check_foreign_constraint_tables(void)
error = DB_MUST_GET_MORE_FILE_SPACE;
}
trx->op_info = "";
trx_commit_for_mysql(trx);
row_mysql_unlock_data_dictionary(trx);
......
......@@ -22,6 +22,8 @@ Created 1/8/1996 Heikki Tuuri
#include "btr0btr.h"
#include "btr0cur.h"
#include "btr0sea.h"
#include "page0zip.h"
#include "page0page.h"
#include "pars0pars.h"
#include "pars0sym.h"
#include "que0que.h"
......@@ -55,56 +57,6 @@ UNIV_INTERN rw_lock_t dict_operation_lock;
/* Identifies generated InnoDB foreign key names */
static char dict_ibfk[] = "_ibfk_";
#ifndef UNIV_HOTBACKUP
/**********************************************************************
Converts an identifier to a table name.
NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
this function, you MUST change also the prototype here! */
UNIV_INTERN
void
innobase_convert_from_table_id(
/*===========================*/
char* to, /* out: converted identifier */
const char* from, /* in: identifier to convert */
ulint len); /* in: length of 'to', in bytes;
should be at least 5 * strlen(to) + 1 */
/**********************************************************************
Converts an identifier to UTF-8.
NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
this function, you MUST change also the prototype here! */
UNIV_INTERN
void
innobase_convert_from_id(
/*=====================*/
char* to, /* out: converted identifier */
const char* from, /* in: identifier to convert */
ulint len); /* in: length of 'to', in bytes;
should be at least 3 * strlen(to) + 1 */
/**********************************************************************
Makes all characters in a NUL-terminated UTF-8 string lower case.
NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
this function, you MUST change also the prototype here! */
UNIV_INTERN
void
innobase_casedn_str(
/*================*/
char* a); /* in/out: string to put in lower case */
/**************************************************************************
Determines the connection character set.
NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
this function, you MUST change also the prototype here! */
struct charset_info_st*
innobase_get_charset(
/*=================*/
/* out: connection character set */
void* mysql_thd); /* in: MySQL thread handle */
#endif /* !UNIV_HOTBACKUP */
/***********************************************************************
Tries to find column names for the index and sets the col field of the
index. */
......@@ -335,8 +287,7 @@ dict_table_autoinc_lock(
}
/************************************************************************
Initializes the autoinc counter. It is not an error to initialize an already
initialized counter. */
Unconditionally set the autoinc counter. */
UNIV_INTERN
void
dict_table_autoinc_initialize(
......@@ -346,7 +297,6 @@ dict_table_autoinc_initialize(
{
ut_ad(mutex_own(&table->autoinc_mutex));
table->autoinc_inited = TRUE;
table->autoinc = value;
}
......@@ -360,32 +310,25 @@ dict_table_autoinc_read(
/* out: value for a new row, or 0 */
const dict_table_t* table) /* in: table */
{
ib_int64_t value;
ut_ad(mutex_own(&table->autoinc_mutex));
if (!table->autoinc_inited) {
value = 0;
} else {
value = table->autoinc;
}
return(value);
return(table->autoinc);
}
/************************************************************************
Updates the autoinc counter if the value supplied is greater than the
current value. If not inited, does nothing. */
current value. */
UNIV_INTERN
void
dict_table_autoinc_update(
/*======================*/
dict_table_autoinc_update_if_greater(
/*=================================*/
dict_table_t* table, /* in/out: table */
ib_uint64_t value) /* in: value which was assigned to a row */
{
if (table->autoinc_inited && value > table->autoinc) {
ut_ad(mutex_own(&table->autoinc_mutex));
if (value > table->autoinc) {
table->autoinc = value;
}
......@@ -1312,6 +1255,156 @@ dict_index_too_big_for_undo(
return(undo_page_len >= UNIV_PAGE_SIZE);
}
/********************************************************************
If a record of this index might not fit on a single B-tree page,
return TRUE. */
static
ibool
dict_index_too_big_for_tree(
/*========================*/
/* out: TRUE if the index
record could become too big */
const dict_table_t* table, /* in: table */
const dict_index_t* new_index) /* in: index */
{
ulint zip_size;
ulint comp;
ulint i;
/* maximum possible storage size of a record */
ulint rec_max_size;
/* maximum allowed size of a record on a leaf page */
ulint page_rec_max;
/* maximum allowed size of a node pointer record */
ulint page_ptr_max;
comp = dict_table_is_comp(table);
zip_size = dict_table_zip_size(table);
if (zip_size && zip_size < UNIV_PAGE_SIZE) {
/* On a compressed page, two records must fit in the
uncompressed page modification log. On compressed
pages with zip_size == UNIV_PAGE_SIZE, this limit will
never be reached. */
ut_ad(comp);
/* The maximum allowed record size is the size of
an empty page, minus a byte for recoding the heap
number in the page modification log. The maximum
allowed node pointer size is half that. */
page_rec_max = page_zip_empty_size(new_index->n_fields,
zip_size) - 1;
page_ptr_max = page_rec_max / 2;
/* On a compressed page, there is a two-byte entry in
the dense page directory for every record. But there
is no record header. */
rec_max_size = 2;
} else {
/* The maximum allowed record size is half a B-tree
page. No additional sparse page directory entry will
be generated for the first few user records. */
page_rec_max = page_get_free_space_of_empty(comp) / 2;
page_ptr_max = page_rec_max;
/* Each record has a header. */
rec_max_size = comp
? REC_N_NEW_EXTRA_BYTES
: REC_N_OLD_EXTRA_BYTES;
}
if (comp) {
/* Include the "null" flags in the
maximum possible record size. */
rec_max_size += UT_BITS_IN_BYTES(new_index->n_nullable);
} else {
/* For each column, include a 2-byte offset and a
"null" flag. The 1-byte format is only used in short
records that do not contain externally stored columns.
Such records could never exceed the page limit, even
when using the 2-byte format. */
rec_max_size += 2 * new_index->n_fields;
}
/* Compute the maximum possible record size. */
for (i = 0; i < new_index->n_fields; i++) {
const dict_field_t* field
= dict_index_get_nth_field(new_index, i);
const dict_col_t* col
= dict_field_get_col(field);
ulint field_max_size;
ulint field_ext_max_size;
/* In dtuple_convert_big_rec(), variable-length columns
that are longer than BTR_EXTERN_FIELD_REF_SIZE * 2
may be chosen for external storage.
Fixed-length columns, and all columns of secondary
index records are always stored inline. */
/* Determine the maximum length of the index field.
The field_ext_max_size should be computed as the worst
case in rec_get_converted_size_comp() for
REC_STATUS_ORDINARY records. */
field_max_size = dict_col_get_fixed_size(col);
if (field_max_size) {
/* dict_index_add_col() should guarantee this */
ut_ad(!field->prefix_len
|| field->fixed_len == field->prefix_len);
/* Fixed lengths are not encoded
in ROW_FORMAT=COMPACT. */
field_ext_max_size = 0;
goto add_field_size;
}
field_max_size = dict_col_get_max_size(col);
field_ext_max_size = field_max_size < 256 ? 1 : 2;
if (field->prefix_len) {
if (field->prefix_len < field_max_size) {
field_max_size = field->prefix_len;
}
} else if (field_max_size > BTR_EXTERN_FIELD_REF_SIZE * 2
&& dict_index_is_clust(new_index)) {
/* In the worst case, we have a locally stored
column of BTR_EXTERN_FIELD_REF_SIZE * 2 bytes.
The length can be stored in one byte. If the
column were stored externally, the lengths in
the clustered index page would be
BTR_EXTERN_FIELD_REF_SIZE and 2. */
field_max_size = BTR_EXTERN_FIELD_REF_SIZE * 2;
field_ext_max_size = 1;
}
if (comp) {
/* Add the extra size for ROW_FORMAT=COMPACT.
For ROW_FORMAT=REDUNDANT, these bytes were
added to rec_max_size before this loop. */
rec_max_size += field_ext_max_size;
}
add_field_size:
rec_max_size += field_max_size;
/* Check the size limit on leaf pages. */
if (UNIV_UNLIKELY(rec_max_size >= page_rec_max)) {
return(TRUE);
}
/* Check the size limit on non-leaf pages. Records
stored in non-leaf B-tree pages consist of the unique
columns of the record (the key columns of the B-tree)
and a node pointer field. When we have processed the
unique columns, rec_max_size equals the size of the
node pointer record minus the node pointer column. */
if (i + 1 == dict_index_get_n_unique_in_tree(new_index)
&& rec_max_size + REC_NODE_PTR_SIZE >= page_ptr_max) {
return(TRUE);
}
}
return(FALSE);
}
/**************************************************************************
Adds an index to the dictionary cache. */
UNIV_INTERN
......@@ -1322,7 +1415,10 @@ dict_index_add_to_cache(
dict_table_t* table, /* in: table on which the index is */
dict_index_t* index, /* in, own: index; NOTE! The index memory
object is freed in this function! */
ulint page_no)/* in: root page number of the index */
ulint page_no,/* in: root page number of the index */
ibool strict) /* in: TRUE=refuse to create the index
if records could be too big to fit in
an B-tree page */
{
dict_index_t* new_index;
ulint n_ord;
......@@ -1353,12 +1449,42 @@ dict_index_add_to_cache(
new_index->n_fields = new_index->n_def;
if (strict && dict_index_too_big_for_tree(table, new_index)) {
too_big:
dict_mem_index_free(new_index);
dict_mem_index_free(index);
return(DB_TOO_BIG_RECORD);
}
if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) {
n_ord = new_index->n_fields;
} else {
n_ord = new_index->n_uniq;
}
switch (dict_table_get_format(table)) {
case DICT_TF_FORMAT_51:
/* ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT store
prefixes of externally stored columns locally within
the record. There are no special considerations for
the undo log record size. */
goto undo_size_ok;
case DICT_TF_FORMAT_ZIP:
/* In ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED,
column prefix indexes require that prefixes of
externally stored columns are written to the undo log.
This may make the undo log record bigger than the
record on the B-tree page. The maximum size of an
undo log record is the page size. That must be
checked for below. */
break;
#if DICT_TF_FORMAT_ZIP != DICT_TF_FORMAT_MAX
# error "DICT_TF_FORMAT_ZIP != DICT_TF_FORMAT_MAX"
#endif
}
for (i = 0; i < n_ord; i++) {
const dict_field_t* field
= dict_index_get_nth_field(new_index, i);
......@@ -1384,15 +1510,15 @@ dict_index_add_to_cache(
if (dict_index_too_big_for_undo(table, new_index)) {
/* An undo log record might not fit in
a single page. Refuse to create this index. */
dict_mem_index_free(new_index);
dict_mem_index_free(index);
return(DB_TOO_BIG_RECORD);
goto too_big;
}
break;
}
}
undo_size_ok:
/* Flag the ordering columns */
for (i = 0; i < n_ord; i++) {
......@@ -1446,12 +1572,59 @@ dict_index_remove_from_cache(
dict_index_t* index) /* in, own: index */
{
ulint size;
ulint retries = 0;
btr_search_t* info;
ut_ad(table && index);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
ut_ad(mutex_own(&(dict_sys->mutex)));
/* We always create search info whether or not adaptive
hash index is enabled or not. */
info = index->search_info;
ut_ad(info);
/* We are not allowed to free the in-memory index struct
dict_index_t until all entries in the adaptive hash index
that point to any of the page belonging to his b-tree index
are dropped. This is so because dropping of these entries
require access to dict_index_t struct. To avoid such scenario
We keep a count of number of such pages in the search_info and
only free the dict_index_t struct when this count drops to
zero. */
for (;;) {
ulint ref_count = btr_search_info_get_ref_count(info);
if (ref_count == 0) {
break;
}
/* Sleep for 10ms before trying again. */
os_thread_sleep(10000);
++retries;
if (retries % 500 == 0) {
/* No luck after 5 seconds of wait. */
fprintf(stderr, "InnoDB: Error: Waited for"
" %lu secs for hash index"
" ref_count (%lu) to drop"
" to 0.\n"
"index: \"%s\""
" table: \"%s\"\n",
retries/100,
ref_count,
index->name,
table->name);
}
/* To avoid a hang here we commit suicide if the
ref_count doesn't drop to zero in 600 seconds. */
if (retries >= 60000) {
ut_error;
}
}
rw_lock_free(&index->lock);
/* Remove the index from the list of indexes of the table */
......@@ -1901,27 +2074,19 @@ dict_table_get_referenced_constraint(
dict_table_t* table, /* in: InnoDB table */
dict_index_t* index) /* in: InnoDB index */
{
dict_foreign_t* foreign = NULL;
ut_ad(index && table);
/* If the referenced list is empty, nothing to do */
if (UT_LIST_GET_LEN(table->referenced_list) == 0) {
dict_foreign_t* foreign;
return(NULL);
}
ut_ad(index != NULL);
ut_ad(table != NULL);
foreign = UT_LIST_GET_FIRST(table->referenced_list);
for (foreign = UT_LIST_GET_FIRST(table->referenced_list);
foreign;
foreign = UT_LIST_GET_NEXT(referenced_list, foreign)) {
while (foreign) {
if (foreign->referenced_index == index
|| foreign->referenced_index == index) {
if (foreign->referenced_index == index) {
return(foreign);
}
foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
}
return(NULL);
......@@ -1940,29 +2105,20 @@ dict_table_get_foreign_constraint(
dict_table_t* table, /* in: InnoDB table */
dict_index_t* index) /* in: InnoDB index */
{
dict_foreign_t* foreign = NULL;
ut_ad(index && table);
/* If list empty then nothgin to do */
if (UT_LIST_GET_LEN(table->foreign_list) == 0) {
return(NULL);
}
dict_foreign_t* foreign;
/* Check whether this index is defined for a foreign key */
ut_ad(index != NULL);
ut_ad(table != NULL);
foreign = UT_LIST_GET_FIRST(table->foreign_list);
for (foreign = UT_LIST_GET_FIRST(table->foreign_list);
foreign;
foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
while (foreign) {
if (foreign->foreign_index == index
|| foreign->referenced_index == index) {
return(foreign);
}
foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
}
return(NULL);
......@@ -2132,6 +2288,30 @@ dict_foreign_find_index(
return(NULL);
}
/**************************************************************************
Find an index that is equivalent to the one passed in and is not marked
for deletion. */
UNIV_INTERN
dict_index_t*
dict_foreign_find_equiv_index(
/*==========================*/
/* out: index equivalent to
foreign->foreign_index, or NULL */
dict_foreign_t* foreign)/* in: foreign key */
{
ut_a(foreign != NULL);
/* Try to find an index which contains the columns as the
first fields and in the right order, and the types are the
same as in foreign->foreign_index */
return(dict_foreign_find_index(
foreign->foreign_table,
foreign->foreign_col_names, foreign->n_fields,
foreign->foreign_index, TRUE, /* check types */
FALSE/* allow columns to be NULL */));
}
/**************************************************************************
Returns an index object by matching on the name and column names and
if more than one index matches return the index with the max id */
......@@ -2362,7 +2542,7 @@ dict_foreign_add_to_cache(
Scans from pointer onwards. Stops if is at the start of a copy of
'string' where characters are compared without case sensitivity, and
only outside `` or "" quotes. Stops also at '\0'. */
UNIV_INTERN
static
const char*
dict_scan_to(
/*=========*/
......@@ -2537,7 +2717,7 @@ dict_scan_id(
len = 3 * len + 1;
*id = dst = mem_heap_alloc(heap, len);
innobase_convert_from_id(dst, str, len);
innobase_convert_from_id(cs, dst, str, len);
} else if (!strncmp(str, srv_mysql50_table_name_prefix,
sizeof srv_mysql50_table_name_prefix)) {
/* This is a pre-5.1 table name
......@@ -2551,7 +2731,7 @@ dict_scan_id(
len = 5 * len + 1;
*id = dst = mem_heap_alloc(heap, len);
innobase_convert_from_table_id(dst, str, len);
innobase_convert_from_table_id(cs, dst, str, len);
}
return(ptr);
......@@ -3468,25 +3648,6 @@ dict_create_foreign_constraints_low(
goto loop;
}
/**************************************************************************
Determines whether a string starts with the specified keyword. */
UNIV_INTERN
ibool
dict_str_starts_with_keyword(
/*=========================*/
/* out: TRUE if str starts
with keyword */
void* mysql_thd, /* in: MySQL thread handle */
const char* str, /* in: string to scan for keyword */
const char* keyword) /* in: keyword to look for */
{
struct charset_info_st* cs = innobase_get_charset(mysql_thd);
ibool success;
dict_accept(cs, str, keyword, &success);
return(success);
}
/*************************************************************************
Scans a table create SQL string and adds to the data dictionary the foreign
key constraints declared in the string. This function should be called after
......@@ -4455,41 +4616,6 @@ dict_table_get_index_on_name(
}
/**************************************************************************
Find and index that is equivalent to the one passed in. */
UNIV_INTERN
dict_index_t*
dict_table_find_equivalent_index(
/*=============================*/
dict_table_t* table, /* in/out: table */
dict_index_t* index) /* in: index to match */
{
ulint i;
const char** column_names;
dict_index_t* equiv_index;
if (UT_LIST_GET_LEN(table->foreign_list) == 0) {
return(NULL);
}
column_names = mem_alloc(index->n_fields * sizeof *column_names);
/* Convert the column names to the format & type accepted by the find
index function */
for (i = 0; i < index->n_fields; i++) {
column_names[i] = index->fields[i].name;
}
equiv_index = dict_foreign_find_index(
table, column_names, index->n_fields,
index, TRUE, FALSE);
mem_free((void*) column_names);
return(equiv_index);
}
/**************************************************************************
Replace the index passed in with another equivalent index in the tables
foreign key list. */
......@@ -4500,30 +4626,18 @@ dict_table_replace_index_in_foreign_list(
dict_table_t* table, /* in/out: table */
dict_index_t* index) /* in: index to be replaced */
{
dict_index_t* new_index;
new_index = dict_table_find_equivalent_index(table, index);
/* If match found */
if (new_index) {
dict_foreign_t* foreign;
ut_a(new_index != index);
foreign = UT_LIST_GET_FIRST(table->foreign_list);
/* If the list is not empty then this should hold */
ut_a(foreign);
dict_foreign_t* foreign;
/* Iterate over the foreign index list and replace the index
passed in with the new index */
while (foreign) {
for (foreign = UT_LIST_GET_FIRST(table->foreign_list);
foreign;
foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) {
if (foreign->foreign_index == index) {
foreign->foreign_index = new_index;
}
if (foreign->foreign_index == index) {
dict_index_t* new_index
= dict_foreign_find_equiv_index(foreign);
ut_a(new_index);
foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
foreign->foreign_index = new_index;
}
}
}
......
......@@ -765,7 +765,8 @@ dict_load_indexes(
index->id = id;
dict_load_fields(index, heap);
error = dict_index_add_to_cache(table, index, page_no);
error = dict_index_add_to_cache(table, index, page_no,
FALSE);
/* The data dictionary tables should never contain
invalid index definitions. If we ignored this error
and simply did not load this index definition, the
......
......@@ -58,13 +58,15 @@ dict_mem_table_create(
table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS)
* sizeof(dict_col_t));
table->auto_inc_lock = mem_heap_alloc(heap, lock_get_size());
table->autoinc_lock = mem_heap_alloc(heap, lock_get_size());
mutex_create(&table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX);
/* The actual increment value will be set by MySQL, we simply
default to 1 here.*/
table->autoinc_increment = 1;
table->autoinc = 0;
/* The number of transactions that are either waiting on the
AUTOINC lock or have been granted the lock. */
table->n_waiting_or_granted_auto_inc_locks = 0;
#ifdef UNIV_DEBUG
table->magic_n = DICT_TABLE_MAGIC_N;
......
......@@ -4312,18 +4312,16 @@ fil_io(
ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE)
|| !ibuf_bitmap_page(zip_size, block_offset)
|| sync || is_log);
#ifdef UNIV_SYNC_DEBUG
ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE)
|| ibuf_page(space_id, zip_size, block_offset));
#endif
#endif
if (sync) {
mode = OS_AIO_SYNC;
} else if (type == OS_FILE_READ && !is_log
&& ibuf_page(space_id, zip_size, block_offset)) {
mode = OS_AIO_IBUF;
} else if (is_log) {
mode = OS_AIO_LOG;
} else if (type == OS_FILE_READ
&& ibuf_page(space_id, zip_size, block_offset)) {
mode = OS_AIO_IBUF;
} else {
mode = OS_AIO_NORMAL;
}
......@@ -4492,8 +4490,6 @@ fil_aio_wait(
#ifdef WIN_ASYNC_IO
ret = os_aio_windows_handle(segment, 0, &fil_node,
&message, &type);
#elif defined(POSIX_ASYNC_IO)
ret = os_aio_posix_handle(segment, &fil_node, &message);
#else
ret = 0; /* Eliminate compiler warning */
ut_error;
......
......@@ -345,9 +345,8 @@ fsp_get_space_header(
block = buf_page_get(id, zip_size, 0, RW_X_LATCH, mtr);
header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
#endif /* UNIV_SYNC_DEBUG */
ut_ad(id == mach_read_from_4(FSP_SPACE_ID + header));
ut_ad(zip_size == dict_table_flags_to_zip_size(
mach_read_from_4(FSP_SPACE_FLAGS + header)));
......@@ -700,6 +699,7 @@ xdes_get_descriptor_with_space_hdr(
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_S_FIX)
|| mtr_memo_contains_page(mtr, sp_header, MTR_MEMO_PAGE_X_FIX));
ut_ad(page_offset(sp_header) == FSP_HEADER_OFFSET);
/* Read free limit and space size */
limit = mach_read_from_4(sp_header + FSP_FREE_LIMIT);
size = mach_read_from_4(sp_header + FSP_SIZE);
......@@ -730,9 +730,8 @@ xdes_get_descriptor_with_space_hdr(
block = buf_page_get(space, zip_size, descr_page_no,
RW_X_LATCH, mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
#endif /* UNIV_SYNC_DEBUG */
descr_page = buf_block_get_frame(block);
}
......@@ -765,9 +764,8 @@ xdes_get_descriptor(
fsp_header_t* sp_header;
block = buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
#endif /* UNIV_SYNC_DEBUG */
sp_header = FSP_HEADER_OFFSET + buf_block_get_frame(block);
return(xdes_get_descriptor_with_space_hdr(sp_header, space, offset,
mtr));
......@@ -948,9 +946,7 @@ fsp_header_init(
zip_size = dict_table_flags_to_zip_size(flags);
block = buf_page_create(space, 0, zip_size, mtr);
buf_page_get(space, zip_size, 0, RW_X_LATCH, mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
#endif /* UNIV_SYNC_DEBUG */
/* The prior contents of the file page should be ignored */
......@@ -1316,6 +1312,7 @@ fsp_fill_free_list(
mtr_t ibuf_mtr;
ut_ad(header && mtr);
ut_ad(page_offset(header) == FSP_HEADER_OFFSET);
/* Check if we can fill free list from above the free list limit */
size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
......@@ -1380,10 +1377,9 @@ fsp_fill_free_list(
space, i, zip_size, mtr);
buf_page_get(space, zip_size, i,
RW_X_LATCH, mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block,
SYNC_FSP_PAGE);
#endif /* UNIV_SYNC_DEBUG */
fsp_init_file_page(block, mtr);
mlog_write_ulint(buf_block_get_frame(block)
+ FIL_PAGE_TYPE,
......@@ -1404,9 +1400,8 @@ fsp_fill_free_list(
buf_page_get(space, zip_size,
i + FSP_IBUF_BITMAP_OFFSET,
RW_X_LATCH, &ibuf_mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
#endif /* UNIV_SYNC_DEBUG */
fsp_init_file_page(block, &ibuf_mtr);
ibuf_bitmap_page_init(block, &ibuf_mtr);
......@@ -1577,6 +1572,7 @@ fsp_alloc_free_page(
if (free == ULINT_UNDEFINED) {
ut_print_buf(stderr, ((byte*)descr) - 500, 1000);
putc('\n', stderr);
ut_error;
}
......@@ -1636,9 +1632,7 @@ fsp_alloc_free_page(
buf_page_create(space, page_no, zip_size, mtr);
block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
#endif /* UNIV_SYNC_DEBUG */
/* Prior contents of the page should be ignored */
fsp_init_file_page(block, mtr);
......@@ -1760,6 +1754,7 @@ fsp_free_extent(
if (xdes_get_state(descr, mtr) == XDES_FREE) {
ut_print_buf(stderr, (byte*)descr - 500, 1000);
putc('\n', stderr);
ut_error;
}
......@@ -1867,6 +1862,8 @@ fsp_alloc_seg_inode_page(
ulint zip_size;
ulint i;
ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
space = page_get_space_id(page_align(space_header));
zip_size = dict_table_flags_to_zip_size(
mach_read_from_4(FSP_SPACE_FLAGS + space_header));
......@@ -1879,9 +1876,7 @@ fsp_alloc_seg_inode_page(
}
block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
#endif /* UNIV_SYNC_DEBUG */
block->check_index_page_at_flush = FALSE;
......@@ -1922,6 +1917,8 @@ fsp_alloc_seg_inode(
ulint zip_size;
ulint n;
ut_ad(page_offset(space_header) == FSP_HEADER_OFFSET);
if (flst_get_len(space_header + FSP_SEG_INODES_FREE, mtr) == 0) {
/* Allocate a new segment inode page */
......@@ -1939,9 +1936,8 @@ fsp_alloc_seg_inode(
mach_read_from_4(FSP_SPACE_FLAGS + space_header));
block = buf_page_get(page_get_space_id(page_align(space_header)),
zip_size, page_no, RW_X_LATCH, mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
#endif /* UNIV_SYNC_DEBUG */
page = buf_block_get_frame(block);
n = fsp_seg_inode_page_find_free(page, 0, zip_size, mtr);
......@@ -2402,6 +2398,7 @@ fseg_fill_free_list(
ulint used;
ut_ad(inode && mtr);
ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
reserved = fseg_n_reserved_pages_low(inode, &used, mtr);
......@@ -2462,6 +2459,8 @@ fseg_alloc_free_extent(
dulint seg_id;
fil_addr_t first;
ut_ad(!((page_offset(inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
if (flst_get_len(inode + FSEG_FREE, mtr) > 0) {
/* Segment free list is not empty, allocate from it */
......@@ -2531,6 +2530,7 @@ fseg_alloc_free_page_low(
ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR));
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
== FSEG_MAGIC_N_VALUE);
ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
seg_id = mtr_read_dulint(seg_inode + FSEG_ID, mtr);
ut_ad(!ut_dulint_is_zero(seg_id));
......@@ -2712,9 +2712,8 @@ fseg_alloc_free_page_low(
mach_read_from_4(FSP_SPACE_FLAGS + space_header));
block = buf_page_create(space, ret_page, zip_size, mtr);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level(block, SYNC_FSP_PAGE);
#endif /* UNIV_SYNC_DEBUG */
if (UNIV_UNLIKELY(block != buf_page_get(space, zip_size,
ret_page, RW_X_LATCH,
mtr))) {
......@@ -3121,6 +3120,7 @@ fseg_mark_page_used(
ulint not_full_n_used;
ut_ad(seg_inode && mtr);
ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
descr = xdes_get_descriptor(space, zip_size, page, mtr);
......@@ -3183,6 +3183,7 @@ fseg_free_page_low(
ut_ad(seg_inode && mtr);
ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N)
== FSEG_MAGIC_N_VALUE);
ut_ad(!((page_offset(seg_inode) - FSEG_ARR_OFFSET) % FSEG_INODE_SIZE));
/* Drop search system page hash index if the page is found in
the pool and is hashed */
......
......@@ -373,11 +373,20 @@ ha_print_info(
FILE* file, /* in: file where to print */
hash_table_t* table) /* in: hash table */
{
#ifdef UNIV_DEBUG
/* Some of the code here is disabled for performance reasons in production
builds, see http://bugs.mysql.com/36941 */
#define PRINT_USED_CELLS
#endif /* UNIV_DEBUG */
#ifdef PRINT_USED_CELLS
hash_cell_t* cell;
ulint cells = 0;
ulint n_bufs;
ulint i;
#endif /* PRINT_USED_CELLS */
ulint n_bufs;
#ifdef PRINT_USED_CELLS
for (i = 0; i < hash_get_n_cells(table); i++) {
cell = hash_get_nth_cell(table, i);
......@@ -387,10 +396,14 @@ ha_print_info(
cells++;
}
}
#endif /* PRINT_USED_CELLS */
fprintf(file, "Hash table size %lu",
(ulong) hash_get_n_cells(table));
fprintf(file,
"Hash table size %lu, used cells %lu",
(ulong) hash_get_n_cells(table), (ulong) cells);
#ifdef PRINT_USED_CELLS
fprintf(file, ", used cells %lu", (ulong) cells);
#endif /* PRINT_USED_CELLS */
if (table->heaps == NULL && table->heap != NULL) {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment