Commit 7199ac59 authored by Michael Widenius's avatar Michael Widenius

Aria fixes:

- Fixed multi-user problem with one thread doing inserts and another doing scans that gave error 175
- Fixed bug that caused assert in move_to_next_bitmap() & _ma_read_bitmap_page()
- Much more DBUG_ASSERT(!maria_assert_if_crashed_table) to detect errors early
- EXTERNAL_LOCKING -> MARIA_EXTERNAL_LOCKING (to use same define everywhere

storage/maria/ma_bitmap.c:
  More secure handling of first_bitmap_with_space (now we also take care of wrong values)
  Don't set page to -1; This fixed unlikely bug that caused assert in move_to_next_bitmap() & _ma_read_bitmap_page()
storage/maria/ma_blockrec.c:
  More DBUG_ASSERT()'s
  Fixed multi-user problem with one thread doing inserts and another doing scans that gave error 175
  (We should use data_file_length from start of scan, not new value as new bitmap page may not yet be in page cache)
storage/maria/ma_check.c:
  EXTERNAL_LOCKING -> MARIA_EXTERNAL_LOCKING (to use same define everywhere)
storage/maria/ma_checkpoint.c:
  Made maria_checkpoint_min_activity static so that one can change it in debugger while testing.
  Fixed long standing performance problem that caused write of state info at checkpoint for any file that was ever changed since open.
storage/maria/ma_create.c:
  EXTERNAL_LOCKING -> MARIA_EXTERNAL_LOCKING (to use same define everywhere
storage/maria/ma_dynrec.c:
  Added missing MARIA_EXTERNAL_LOCKING (minor performance improvement)
storage/maria/ma_locking.c:
  EXTERNAL_LOCKING -> MARIA_EXTERNAL_LOCKING (to use same define everywhere
storage/maria/ma_open.c:
  EXTERNAL_LOCKING -> MARIA_EXTERNAL_LOCKING (to use same define everywhere
storage/maria/ma_pagecache.c:
  Added assert to detect reads outside of data file
storage/maria/maria_def.h:
  Added checkpoint_state to cache state writes in checkpoint
parent 5e876bd3
......@@ -176,6 +176,7 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
PAGECACHE_LOCK_LEFT_UNLOCKED,
PAGECACHE_PIN_LEFT_UNPINNED,
PAGECACHE_WRITE_DELAY, 0, LSN_IMPOSSIBLE);
DBUG_ASSERT(!res);
DBUG_RETURN(res);
}
else
......@@ -199,6 +200,7 @@ static inline my_bool write_changed_bitmap(MARIA_SHARE *share,
page_link.unlock= PAGECACHE_LOCK_LEFT_UNLOCKED;
page_link.changed= 1;
push_dynamic(&bitmap->pinned_pages, (const uchar*) (void*) &page_link);
DBUG_ASSERT(!res);
DBUG_RETURN(res);
}
}
......@@ -225,6 +227,7 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
uint max_page_size;
MARIA_FILE_BITMAP *bitmap= &share->bitmap;
uint size= share->block_size;
pgcache_page_no_t first_bitmap_with_space;
#ifndef DBUG_OFF
/* We want to have a copy of the bitmap to be able to print differences */
size*= 2;
......@@ -266,13 +269,14 @@ my_bool _ma_bitmap_init(MARIA_SHARE *share, File file)
pthread_mutex_init(&share->bitmap.bitmap_lock, MY_MUTEX_INIT_SLOW);
pthread_cond_init(&share->bitmap.bitmap_cond, 0);
first_bitmap_with_space= share->state.first_bitmap_with_space;
_ma_bitmap_reset_cache(share);
if (share->state.first_bitmap_with_space == ~(pgcache_page_no_t) 0)
{
/* Start scanning for free space from start of file */
share->state.first_bitmap_with_space = 0;
}
/* Restore first_bitmap_with_space if it's resonable */
if (first_bitmap_with_space <= (share->state.state.data_file_length /
share->block_size))
share->state.first_bitmap_with_space= first_bitmap_with_space;
return 0;
}
......@@ -644,7 +648,8 @@ void _ma_bitmap_delete_all(MARIA_SHARE *share)
@notes
This is called after we have swapped file descriptors and we want
bitmap to forget all cached information
bitmap to forget all cached information.
It's also called directly after we have opened a file.
*/
void _ma_bitmap_reset_cache(MARIA_SHARE *share)
......@@ -660,13 +665,20 @@ void _ma_bitmap_reset_cache(MARIA_SHARE *share)
We can't read a page yet, as in some case we don't have an active
page cache yet.
Pretend we have a dummy, full and not changed bitmap page in memory.
We set bitmap->page to a value so that if we use it in
move_to_next_bitmap() it will point to page 0.
(This can only happen if writing to a bitmap page fails)
*/
bitmap->page= ~(ulonglong) 0;
bitmap->page= ((pgcache_page_no_t) 0) - bitmap->pages_covered;
bitmap->used_size= bitmap->total_size;
bfill(bitmap->map, share->block_size, 255);
#ifndef DBUG_OFF
memcpy(bitmap->map + bitmap->block_size, bitmap->map, bitmap->block_size);
#endif
/* Start scanning for free space from start of file */
share->state.first_bitmap_with_space = 0;
}
}
......
......@@ -1019,7 +1019,7 @@ make_space_for_directory(MARIA_HA *info,
UNDO of DELETE (in which case we know the row was on the
page before) or if the bitmap told us there was space on page
*/
DBUG_ASSERT(0);
DBUG_ASSERT(!maria_assert_if_crashed_table);
return(1);
}
}
......@@ -1776,6 +1776,7 @@ static my_bool get_head_or_tail_page(MARIA_HA *info,
DBUG_RETURN(0);
crashed:
DBUG_ASSERT(!maria_assert_if_crashed_table);
_ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); /* File crashed */
DBUG_RETURN(1);
}
......@@ -1869,6 +1870,7 @@ static my_bool get_rowpos_in_head_or_tail_page(MARIA_HA *info,
DBUG_RETURN(0);
err:
DBUG_ASSERT(!maria_assert_if_crashed_table);
_ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD); /* File crashed */
DBUG_RETURN(1);
}
......@@ -2755,7 +2757,7 @@ static my_bool write_block_record(MARIA_HA *info,
DBUG_ASSERT(length <= column->length);
break;
default: /* Wrong data */
DBUG_ASSERT(0);
DBUG_ASSERT(!maria_assert_if_crashed_table);
length=0;
break;
}
......@@ -3418,6 +3420,7 @@ static my_bool write_block_record(MARIA_HA *info,
DBUG_RETURN(0);
crashed:
DBUG_ASSERT(!maria_assert_if_crashed_table);
/* Something was wrong with data on page */
_ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
......@@ -3812,6 +3815,7 @@ static my_bool _ma_update_block_record2(MARIA_HA *info,
DBUG_RETURN(0);
err:
DBUG_ASSERT(!maria_assert_if_crashed_table);
DBUG_PRINT("error", ("errpos: %d", errpos));
if (info->non_flushable_state)
_ma_bitmap_flushable(info, -1);
......@@ -3952,6 +3956,7 @@ static my_bool _ma_update_at_original_place(MARIA_HA *info,
DBUG_RETURN(0);
err:
DBUG_ASSERT(!maria_assert_if_crashed_table);
_ma_mark_file_crashed(share);
if (info->non_flushable_state)
_ma_bitmap_flushable(info, -1);
......@@ -4333,6 +4338,7 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record)
DBUG_RETURN(0);
err:
DBUG_ASSERT(!maria_assert_if_crashed_table);
_ma_bitmap_flushable(info, -1);
_ma_unpin_all_pages_and_finalize_row(info, LSN_IMPOSSIBLE);
DBUG_RETURN(1);
......@@ -4533,6 +4539,7 @@ static uchar *read_next_extent(MARIA_HA *info, MARIA_EXTENT_CURSOR *extent,
crashed:
DBUG_ASSERT(!maria_assert_if_crashed_table);
_ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
DBUG_PRINT("error", ("wrong extent information"));
DBUG_RETURN(0);
......@@ -4680,6 +4687,7 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record,
if (!info->trn)
{
/* File crashed */
DBUG_ASSERT(!maria_assert_if_crashed_table);
_ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
}
......@@ -4961,6 +4969,7 @@ int _ma_read_block_record2(MARIA_HA *info, uchar *record,
DBUG_RETURN(0);
err:
DBUG_ASSERT(!maria_assert_if_crashed_table);
/* Something was wrong with data on record */
DBUG_PRINT("error", ("Found record with wrong data"));
_ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
......@@ -5100,6 +5109,7 @@ int _ma_read_block_record(MARIA_HA *info, uchar *record,
DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == HEAD_PAGE);
if (!(data= get_record_position(buff, block_size, offset, &end_of_data)))
{
DBUG_ASSERT(!maria_assert_if_crashed_table);
DBUG_PRINT("error", ("Wrong directory entry in data block"));
my_errno= HA_ERR_RECORD_DELETED; /* File crashed */
DBUG_RETURN(HA_ERR_RECORD_DELETED);
......@@ -5330,7 +5340,7 @@ restart_record_read:
#ifdef SANITY_CHECKS
if (info->scan.dir < info->scan.dir_end)
{
DBUG_ASSERT(0);
DBUG_ASSERT(!maria_assert_if_crashed_table);
goto err;
}
#endif
......@@ -5442,7 +5452,7 @@ restart_bitmap_scan:
/* Read next bitmap */
info->scan.bitmap_page+= share->bitmap.pages_covered;
filepos= (my_off_t) info->scan.bitmap_page * block_size;
if (unlikely(filepos >= share->state.state.data_file_length))
if (unlikely(info->scan.bitmap_page >= info->scan.max_page))
{
DBUG_PRINT("info", ("Found end of file"));
DBUG_RETURN((my_errno= HA_ERR_END_OF_FILE));
......@@ -5460,6 +5470,7 @@ restart_bitmap_scan:
goto restart_bitmap_scan;
err:
DBUG_ASSERT(!maria_assert_if_crashed_table);
DBUG_PRINT("error", ("Wrong data on page"));
_ma_set_fatal_error(share, HA_ERR_WRONG_IN_RECORD);
DBUG_RETURN(HA_ERR_WRONG_IN_RECORD);
......@@ -6391,7 +6402,7 @@ err:
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
LSN_IMPOSSIBLE, 0, FALSE);
_ma_mark_file_crashed(share);
DBUG_ASSERT(0); /* catch recovery errors early */
DBUG_ASSERT(!maria_assert_if_crashed_table); /* catch recovery error early */
DBUG_RETURN((my_errno= error));
}
......@@ -6494,7 +6505,7 @@ err:
PAGECACHE_UNPIN, LSN_IMPOSSIBLE,
LSN_IMPOSSIBLE, 0, FALSE);
_ma_mark_file_crashed(share);
DBUG_ASSERT(0);
DBUG_ASSERT(!maria_assert_if_crashed_table);
DBUG_RETURN((my_errno= error));
}
......@@ -6566,7 +6577,7 @@ uint _ma_apply_redo_free_blocks(MARIA_HA *info,
{
pthread_mutex_unlock(&share->bitmap.bitmap_lock);
_ma_mark_file_crashed(share);
DBUG_ASSERT(0);
DBUG_ASSERT(!maria_assert_if_crashed_table);
DBUG_RETURN(res);
}
}
......@@ -6652,7 +6663,7 @@ uint _ma_apply_redo_free_head_or_tail(MARIA_HA *info, LSN lsn,
err:
_ma_mark_file_crashed(share);
DBUG_ASSERT(0);
DBUG_ASSERT(!maria_assert_if_crashed_table);
DBUG_RETURN(1);
}
......@@ -6853,7 +6864,7 @@ uint _ma_apply_redo_insert_row_blobs(MARIA_HA *info,
err:
_ma_mark_file_crashed(share);
DBUG_ASSERT(0);
DBUG_ASSERT(!maria_assert_if_crashed_table);
DBUG_RETURN(1);
}
......@@ -6923,6 +6934,7 @@ end:
DBUG_RETURN(res);
err:
DBUG_ASSERT(!maria_assert_if_crashed_table);
res= 1;
_ma_mark_file_crashed(share);
goto end;
......@@ -7161,6 +7173,7 @@ my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn,
DBUG_RETURN(0);
err:
DBUG_ASSERT(!maria_assert_if_crashed_table);
_ma_mark_file_crashed(share);
if (info->non_flushable_state)
_ma_bitmap_flushable(info, -1);
......@@ -7336,6 +7349,7 @@ end:
DBUG_RETURN(error);
err:
DBUG_ASSERT(!maria_assert_if_crashed_table);
error= 1;
_ma_mark_file_crashed(share);
goto end;
......
......@@ -6031,7 +6031,7 @@ int maria_recreate_table(HA_CHECK *param, MARIA_HA **org_info, char *filename)
(*org_info)->s->state.state.records= info.state->records;
if (share.state.create_time)
(*org_info)->s->state.create_time=share.state.create_time;
#ifdef EXTERNAL_LOCKING
#ifdef MARIA_EXTERNAL_LOCKING
(*org_info)->s->state.unique= (*org_info)->this_unique= share.state.unique;
#endif
(*org_info)->s->state.state.checksum= info.state->checksum;
......
......@@ -533,10 +533,12 @@ filter_flush_file_evenly(enum pagecache_page_type type,
risk could be that while a checkpoint happens no LRD flushing happens.
*/
static uint maria_checkpoint_min_activity= 2*1024*1024;
pthread_handler_t ma_checkpoint_background(void *arg)
{
/** @brief At least this of log/page bytes written between checkpoints */
const uint checkpoint_min_activity= 2*1024*1024;
/*
If the interval could be changed by the user while we are in this thread,
it could be annoying: for example it could cause "case 2" to be executed
......@@ -588,12 +590,12 @@ pthread_handler_t ma_checkpoint_background(void *arg)
would decrease the amount of read pages in recovery).
In case of one short statement per minute (very low load), we don't
want to checkpoint every minute, hence the positive
checkpoint_min_activity.
maria_checkpoint_min_activity.
*/
if (((translog_get_horizon() - log_horizon_at_last_checkpoint) +
(maria_pagecache->global_cache_write -
pagecache_flushes_at_last_checkpoint) *
maria_pagecache->block_size) < checkpoint_min_activity)
maria_pagecache->block_size) < maria_checkpoint_min_activity)
{
/* don't take checkpoint, so don't know what to flush */
pages_to_flush_before_next_checkpoint= 0;
......@@ -1011,17 +1013,25 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon)
possible that Recovery does not start from before the REDO and thus
the state is not recovered. A solution may be to set
share->changed=1 under log mutex when writing log records.
But as anyway we have another problem below, this optimization would
be of little use.
The current solution is to keep a copy the last saved state and
not write the state if it was same as last time. It's ok if
is_of_horizon would be different on disk if all other data is
the same.
*/
/** @todo flush state only if changed since last checkpoint */
DBUG_ASSERT(share->last_version != 0);
state_copy->state.is_of_horizon= share->state.is_of_horizon=
state_copies_horizon;
if (kfile.file >= 0)
share->checkpoint_state.is_of_horizon= state_copies_horizon;
if (kfile.file >= 0 && memcmp(&share->checkpoint_state,
&state_copy->state,
sizeof(state_copy->state)))
{
sync_error|=
_ma_state_info_write_sub(kfile.file, &state_copy->state,
MA_STATE_INFO_WRITE_DONT_MOVE_OFFSET);
memcpy(&share->checkpoint_state,
&state_copy->state, sizeof(state_copy->state));
}
/*
We don't set share->changed=0 because it may interfere with a
concurrent _ma_writeinfo() doing share->changed=1 (cancel its
......
......@@ -677,7 +677,7 @@ int maria_create(const char *name, enum data_file_type datafile_type,
share.state.dellink = HA_OFFSET_ERROR;
share.state.first_bitmap_with_space= 0;
#ifdef EXTERNAL_LOCKING
#ifdef MARIA_EXTERNAL_LOCKING
share.state.process= (ulong) getpid();
#endif
share.state.version= (ulong) time((time_t*) 0);
......
......@@ -1768,6 +1768,7 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info,
{
if (filepos >= info->state->data_file_length)
{
#ifdef MARIA_EXTERNAL_LOCKING
if (!info_read)
{ /* Check if changed */
info_read=1;
......@@ -1780,6 +1781,10 @@ int _ma_read_rnd_dynamic_record(MARIA_HA *info,
my_errno= HA_ERR_END_OF_FILE;
goto err;
}
#else
my_errno= HA_ERR_END_OF_FILE;
goto err;
#endif
}
if (info->opt_flag & READ_CACHE_USED)
{
......
......@@ -103,7 +103,7 @@ int maria_lock_database(MARIA_HA *info, int lock_type)
rw_unlock(&share->mmap_lock);
}
#endif
#ifdef EXTERNAL_LOCKING
#ifdef MARIA_EXTERNAL_LOCKING
share->state.process= share->last_process=share->this_process;
share->state.unique= info->last_unique= info->this_unique;
share->state.update_count= info->last_loop= ++info->this_loop;
......@@ -303,7 +303,7 @@ int _ma_writeinfo(register MARIA_HA *info, uint operation)
{ /* Two threads can't be here */
olderror= my_errno; /* Remember last error */
#ifdef EXTERNAL_LOCKING
#ifdef MARIA_EXTERNAL_LOCKING
/*
The following only makes sense if we want to be allow two different
processes access the same table at the same time
......@@ -341,7 +341,7 @@ int _ma_writeinfo(register MARIA_HA *info, uint operation)
int _ma_test_if_changed(register MARIA_HA *info)
{
#ifdef EXTERNAL_LOCKING
#ifdef MARIA_EXTERNAL_LOCKING
MARIA_SHARE *share= info->s;
if (share->state.process != share->last_process ||
share->state.unique != info->last_unique ||
......
......@@ -135,7 +135,7 @@ static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, const char *name,
info.update= (short) (HA_STATE_NEXT_FOUND+HA_STATE_PREV_FOUND);
info.opt_flag=READ_CHECK_USED;
info.this_unique= (ulong) info.dfile.file; /* Uniq number in process */
#ifdef EXTERNAL_LOCKING
#ifdef MARIA_EXTERNAL_LOCKING
if (share->data_file_type == COMPRESSED_RECORD)
info.this_unique= share->state.unique;
info.this_loop=0; /* Update counter */
......@@ -804,7 +804,7 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
_ma_set_index_pagecache_callbacks(&share->kfile, share);
share->this_process=(ulong) getpid();
#ifdef EXTERNAL_LOCKING
#ifdef MARIA_EXTERNAL_LOCKING
share->last_process= share->state.process;
#endif
share->base.key_parts=key_parts;
......@@ -1443,7 +1443,7 @@ static uchar *_ma_state_info_read(uchar *ptr, MARIA_STATE_INFO *state)
uint _ma_state_info_read_dsk(File file __attribute__((unused)),
MARIA_STATE_INFO *state __attribute__((unused)))
{
#ifdef EXTERNAL_LOCKING
#ifdef MARIA_EXTERNAL_LOCKING
uchar buff[MARIA_STATE_INFO_SIZE + MARIA_STATE_EXTRA_SIZE];
/* trick to detect transactional tables */
......
......@@ -2777,6 +2777,7 @@ static void read_block(PAGECACHE *pagecache,
pagecache_pthread_mutex_lock(&pagecache->cache_lock);
if (error)
{
DBUG_ASSERT(maria_in_recovery || !maria_assert_if_crashed_table);
block->status|= PCBLOCK_ERROR;
block->error= (int16) my_errno;
my_debug_put_break_here();
......
......@@ -273,6 +273,7 @@ typedef struct st_maria_file_bitmap
typedef struct st_maria_share
{ /* Shared between opens */
MARIA_STATE_INFO state;
MARIA_STATE_INFO checkpoint_state; /* Copy of saved state by checkpoint */
MARIA_BASE_INFO base;
MARIA_STATE_HISTORY *state_history;
MARIA_KEYDEF ft2_keyinfo; /* Second-level ft-key definition */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment