ha_innodb.cc 209 KB
Newer Older
unknown's avatar
unknown committed
1
/* Copyright (C) 2000-2005 MySQL AB & Innobase Oy
2

3 4 5 6
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.
7

8 9 10 11
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
12

13 14 15 16
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

17
/* This file defines the InnoDB handler: the interface between MySQL and InnoDB
18 19
NOTE: You can only use noninlined InnoDB functions in this file, because we
have disables the InnoDB inlining in this file. */
20

21 22
/* TODO list for the InnoDB handler in 5.0:
  - Remove the flag trx->active_trans and look at the InnoDB
23
    trx struct state field
unknown's avatar
unknown committed
24
  - fix savepoint functions to use savepoint storage area
unknown's avatar
unknown committed
25 26 27
  - Find out what kind of problems the OS X case-insensitivity causes to
    table and database names; should we 'normalize' the names like we do
    in Windows?
28
*/
unknown's avatar
unknown committed
29

30
#ifdef USE_PRAGMA_IMPLEMENTATION
31 32 33 34
#pragma implementation				// gcc: Class implementation
#endif

#include "mysql_priv.h"
unknown's avatar
unknown committed
35
#include "slave.h"
unknown's avatar
unknown committed
36

37 38 39 40
#ifdef HAVE_INNOBASE_DB
#include <m_ctype.h>
#include <hash.h>
#include <myisampack.h>
unknown's avatar
unknown committed
41
#include <mysys_err.h>
42
#include <my_sys.h>
43

44 45
#define MAX_ULONG_BIT ((ulong) 1 << (sizeof(ulong)*8-1))

46
#include "ha_innodb.h"
unknown's avatar
unknown committed
47

unknown's avatar
unknown committed
48 49 50
pthread_mutex_t innobase_share_mutex, /* to protect innobase_open_files */
                prepare_commit_mutex; /* to force correct commit order in
				      binlog */
51
bool innodb_inited= 0;
unknown's avatar
unknown committed
52

unknown's avatar
unknown committed
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
/*-----------------------------------------------------------------*/
/* These variables are used to implement (semi-)synchronous MySQL binlog
replication for InnoDB tables. */

pthread_cond_t  innobase_repl_cond;             /* Posix cond variable;
                                                this variable is signaled
                                                when enough binlog has been
                                                sent to slave, so that a
                                                waiting trx can return the
                                                'ok' message to the client
                                                for a commit */
pthread_mutex_t innobase_repl_cond_mutex;       /* Posix cond variable mutex
                                                that also protects the next
                                                innobase_repl_... variables */
uint            innobase_repl_state;            /* 1 if synchronous replication
                                                is switched on and is working
                                                ok; else 0 */
uint            innobase_repl_file_name_inited  = 0; /* This is set to 1 when
                                                innobase_repl_file_name
                                                contains meaningful data */
char*           innobase_repl_file_name;        /* The binlog name up to which
                                                we have sent some binlog to
                                                the slave */
my_off_t        innobase_repl_pos;              /* The position in that file
                                                up to which we have sent the
                                                binlog to the slave */
uint            innobase_repl_n_wait_threads    = 0; /* This tells how many
                                                transactions currently are
                                                waiting for the binlog to be
                                                sent to the client */
uint            innobase_repl_wait_file_name_inited = 0; /* This is set to 1
                                                when we know the 'smallest'
                                                wait position */
char*           innobase_repl_wait_file_name;   /* NULL, or the 'smallest'
                                                innobase_repl_file_name that
                                                a transaction is waiting for */
my_off_t        innobase_repl_wait_pos;         /* The smallest position in
                                                that file that a trx is
                                                waiting for: the trx can
                                                proceed and send an 'ok' to
                                                the client when MySQL has sent
                                                the binlog up to this position
                                                to the slave */
/*-----------------------------------------------------------------*/



100
/* Store MySQL definition of 'byte': in Linux it is char while InnoDB
101 102 103
uses unsigned char; the header univ.i which we include next defines
'byte' as a macro which expands to 'unsigned char' */

104
typedef byte	mysql_byte;
unknown's avatar
unknown committed
105

unknown's avatar
unknown committed
106 107
#define INSIDE_HA_INNOBASE_CC

108
/* Include necessary InnoDB headers */
109
extern "C" {
unknown's avatar
unknown committed
110
#include "../innobase/include/univ.i"
unknown's avatar
unknown committed
111
#include "../innobase/include/os0file.h"
unknown's avatar
unknown committed
112
#include "../innobase/include/os0thread.h"
unknown's avatar
unknown committed
113 114 115 116
#include "../innobase/include/srv0start.h"
#include "../innobase/include/srv0srv.h"
#include "../innobase/include/trx0roll.h"
#include "../innobase/include/trx0trx.h"
unknown's avatar
unknown committed
117
#include "../innobase/include/trx0sys.h"
118
#include "../innobase/include/mtr0mtr.h"
unknown's avatar
unknown committed
119 120 121 122 123
#include "../innobase/include/row0ins.h"
#include "../innobase/include/row0mysql.h"
#include "../innobase/include/row0sel.h"
#include "../innobase/include/row0upd.h"
#include "../innobase/include/log0log.h"
124
#include "../innobase/include/lock0lock.h"
unknown's avatar
unknown committed
125 126 127
#include "../innobase/include/dict0crea.h"
#include "../innobase/include/btr0cur.h"
#include "../innobase/include/btr0btr.h"
128
#include "../innobase/include/fsp0fsp.h"
129
#include "../innobase/include/sync0sync.h"
unknown's avatar
unknown committed
130
#include "../innobase/include/fil0fil.h"
131
#include "../innobase/include/trx0xa.h"
132 133 134 135 136
}

#define HA_INNOBASE_ROWS_IN_TABLE 10000 /* to get optimization right */
#define HA_INNOBASE_RANGE_COUNT	  100

137 138
uint 	innobase_init_flags 	= 0;
ulong 	innobase_cache_size 	= 0;
139
ulong 	innobase_large_page_size = 0;
140

unknown's avatar
unknown committed
141 142 143
/* The default values for the following, type long, start-up parameters
are declared in mysqld.cc: */

144
long innobase_mirrored_log_groups, innobase_log_files_in_group,
145
     innobase_log_file_size, innobase_log_buffer_size,
unknown's avatar
unknown committed
146 147
     innobase_buffer_pool_awe_mem_mb,
     innobase_buffer_pool_size, innobase_additional_mem_pool_size,
unknown's avatar
unknown committed
148
     innobase_file_io_threads,  innobase_lock_wait_timeout,
149
     innobase_force_recovery, innobase_open_files;
unknown's avatar
unknown committed
150

unknown's avatar
unknown committed
151 152
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
unknown's avatar
unknown committed
153
  
unknown's avatar
unknown committed
154
char*	innobase_data_home_dir			= NULL;
unknown's avatar
unknown committed
155
char*	innobase_data_file_path 		= NULL;
unknown's avatar
unknown committed
156
char*	innobase_log_group_home_dir		= NULL;
unknown's avatar
unknown committed
157
char*	innobase_log_arch_dir			= NULL;/* unused */
unknown's avatar
unknown committed
158 159
/* The following has a misleading name: starting from 4.0.5, this also
affects Windows: */
unknown's avatar
unknown committed
160 161 162 163 164
char*	innobase_unix_file_flush_method		= NULL;

/* Below we have boolean-valued start-up parameters, and their default
values */

unknown's avatar
unknown committed
165
uint	innobase_flush_log_at_trx_commit	= 1;
166
ulong	innobase_fast_shutdown			= 1;
unknown's avatar
unknown committed
167
my_bool innobase_log_archive			= FALSE;/* unused */
168 169 170
my_bool innobase_use_doublewrite    = TRUE;
my_bool innobase_use_checksums      = TRUE;
my_bool innobase_use_large_pages    = FALSE;
unknown's avatar
unknown committed
171
my_bool	innobase_use_native_aio			= FALSE;
unknown's avatar
unknown committed
172
my_bool	innobase_file_per_table			= FALSE;
173
my_bool innobase_locks_unsafe_for_binlog        = FALSE;
174
my_bool innobase_create_status_file		= FALSE;
175

unknown's avatar
unknown committed
176
static char *internal_innobase_data_file_path	= NULL;
177

178
/* The following counter is used to convey information to InnoDB
179 180 181 182 183
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
it every INNOBASE_WAKE_INTERVAL'th step. */

#define INNOBASE_WAKE_INTERVAL	32
184
ulong	innobase_active_counter	= 0;
185 186 187

char*	innobase_home 	= NULL;

unknown's avatar
unknown committed
188
static HASH 	innobase_open_tables;
189

190 191 192 193
#ifdef __NETWARE__  	/* some special cleanup for NetWare */
bool nw_panic = FALSE;
#endif

194
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
195 196 197
			      my_bool not_used __attribute__((unused)));
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
198 199 200 201 202
static int innobase_close_connection(THD* thd);
static int innobase_commit(THD* thd, bool all);
static int innobase_rollback(THD* thd, bool all);
static int innobase_rollback_to_savepoint(THD* thd, void *savepoint);
static int innobase_savepoint(THD* thd, void *savepoint);
unknown's avatar
unknown committed
203
static int innobase_release_savepoint(THD* thd, void *savepoint);
204 205

static handlerton innobase_hton = {
unknown's avatar
unknown committed
206
  "InnoDB",
207 208
  0,				/* slot */
  sizeof(trx_named_savept_t),	/* savepoint size. TODO: use it */
209 210 211
  innobase_close_connection,
  innobase_savepoint,
  innobase_rollback_to_savepoint,
unknown's avatar
unknown committed
212
  innobase_release_savepoint,
213 214 215 216 217
  innobase_commit,		/* commit */
  innobase_rollback,		/* rollback */
  innobase_xa_prepare,		/* prepare */
  innobase_xa_recover,		/* recover */
  innobase_commit_by_xid,	/* commit_by_xid */
unknown's avatar
unknown committed
218
  innobase_rollback_by_xid	/* rollback_by_xid */
219
};
220

221 222 223 224 225 226 227 228
/*********************************************************************
Commits a transaction in an InnoDB database. */

void
innobase_commit_low(
/*================*/
	trx_t*	trx);	/* in: transaction handle */

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
struct show_var_st innodb_status_variables[]= {
  {"buffer_pool_pages_data",
  (char*) &export_vars.innodb_buffer_pool_pages_data,     SHOW_LONG},
  {"buffer_pool_pages_dirty",
  (char*) &export_vars.innodb_buffer_pool_pages_dirty,    SHOW_LONG},
  {"buffer_pool_pages_flushed",
  (char*) &export_vars.innodb_buffer_pool_pages_flushed,  SHOW_LONG},
  {"buffer_pool_pages_free",
  (char*) &export_vars.innodb_buffer_pool_pages_free,     SHOW_LONG},
  {"buffer_pool_pages_latched",
  (char*) &export_vars.innodb_buffer_pool_pages_latched,  SHOW_LONG},
  {"buffer_pool_pages_misc",
  (char*) &export_vars.innodb_buffer_pool_pages_misc,     SHOW_LONG},
  {"buffer_pool_pages_total",
  (char*) &export_vars.innodb_buffer_pool_pages_total,    SHOW_LONG},
  {"buffer_pool_read_ahead_rnd",
  (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
  {"buffer_pool_read_ahead_seq",
  (char*) &export_vars.innodb_buffer_pool_read_ahead_seq, SHOW_LONG},
  {"buffer_pool_read_requests",
  (char*) &export_vars.innodb_buffer_pool_read_requests,  SHOW_LONG},
  {"buffer_pool_reads",
  (char*) &export_vars.innodb_buffer_pool_reads,          SHOW_LONG},
  {"buffer_pool_wait_free",
  (char*) &export_vars.innodb_buffer_pool_wait_free,      SHOW_LONG},
  {"buffer_pool_write_requests",
  (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
  {"data_fsyncs",
  (char*) &export_vars.innodb_data_fsyncs,                SHOW_LONG},
  {"data_pending_fsyncs",
  (char*) &export_vars.innodb_data_pending_fsyncs,        SHOW_LONG},
  {"data_pending_reads",
  (char*) &export_vars.innodb_data_pending_reads,         SHOW_LONG},
  {"data_pending_writes",
  (char*) &export_vars.innodb_data_pending_writes,        SHOW_LONG},
  {"data_read",
  (char*) &export_vars.innodb_data_read,                  SHOW_LONG},
  {"data_reads",
  (char*) &export_vars.innodb_data_reads,                 SHOW_LONG},
  {"data_writes",
  (char*) &export_vars.innodb_data_writes,                SHOW_LONG},
  {"data_written",
  (char*) &export_vars.innodb_data_written,               SHOW_LONG},
  {"dblwr_pages_written",
  (char*) &export_vars.innodb_dblwr_pages_written,        SHOW_LONG},
  {"dblwr_writes",
  (char*) &export_vars.innodb_dblwr_writes,               SHOW_LONG},
  {"log_waits",
  (char*) &export_vars.innodb_log_waits,                  SHOW_LONG},
  {"log_write_requests",
  (char*) &export_vars.innodb_log_write_requests,         SHOW_LONG},
  {"log_writes",
  (char*) &export_vars.innodb_log_writes,                 SHOW_LONG},
  {"os_log_fsyncs",
  (char*) &export_vars.innodb_os_log_fsyncs,              SHOW_LONG},
  {"os_log_pending_fsyncs",
  (char*) &export_vars.innodb_os_log_pending_fsyncs,      SHOW_LONG},
  {"os_log_pending_writes",
  (char*) &export_vars.innodb_os_log_pending_writes,      SHOW_LONG},
  {"os_log_written",
  (char*) &export_vars.innodb_os_log_written,             SHOW_LONG},
  {"page_size",
  (char*) &export_vars.innodb_page_size,                  SHOW_LONG},
  {"pages_created",
  (char*) &export_vars.innodb_pages_created,              SHOW_LONG},
  {"pages_read",
  (char*) &export_vars.innodb_pages_read,                 SHOW_LONG},
  {"pages_written",
  (char*) &export_vars.innodb_pages_written,              SHOW_LONG},
unknown's avatar
unknown committed
298 299 300 301 302 303 304 305 306 307
  {"row_lock_waits",
  (char*) &export_vars.innodb_row_lock_waits,             SHOW_LONG},
  {"row_lock_current_waits",
  (char*) &export_vars.innodb_row_lock_current_waits,     SHOW_LONG},
  {"row_lock_time",
  (char*) &export_vars.innodb_row_lock_time,              SHOW_LONGLONG},
  {"row_lock_time_max",
  (char*) &export_vars.innodb_row_lock_time_max,          SHOW_LONG},
  {"row_lock_time_avg",
  (char*) &export_vars.innodb_row_lock_time_avg,          SHOW_LONG},
308 309 310 311 312 313 314 315 316 317
  {"rows_deleted",
  (char*) &export_vars.innodb_rows_deleted,               SHOW_LONG},
  {"rows_inserted",
  (char*) &export_vars.innodb_rows_inserted,              SHOW_LONG},
  {"rows_read",
  (char*) &export_vars.innodb_rows_read,                  SHOW_LONG},
  {"rows_updated",
  (char*) &export_vars.innodb_rows_updated,               SHOW_LONG},
  {NullS, NullS, SHOW_LONG}};

318 319
/* General functions */

unknown's avatar
unknown committed
320 321 322 323 324 325 326 327 328
/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_enter_innodb(
/*=========================*/
	trx_t*	trx)	/* in: transaction handle */
{
329
	if (UNIV_LIKELY(srv_thread_concurrency >= SRV_CONCURRENCY_THRESHOLD)) {
unknown's avatar
unknown committed
330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345

		return;
	}

	srv_conc_enter_innodb(trx);
}

/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_exit_innodb(
/*========================*/
	trx_t*	trx)	/* in: transaction handle */
{
346
	if (UNIV_LIKELY(srv_thread_concurrency >= SRV_CONCURRENCY_THRESHOLD)) {
unknown's avatar
unknown committed
347 348 349 350 351 352 353

		return;
	}

	srv_conc_exit_innodb(trx);
}

unknown's avatar
unknown committed
354
/**********************************************************************
unknown's avatar
unknown committed
355
Releases possible search latch and InnoDB thread FIFO ticket. These should
unknown's avatar
unknown committed
356 357 358 359
be released at each SQL statement end, and also when mysqld passes the
control to the client. It does no harm to release these also in the middle
of an SQL statement. */
inline
unknown's avatar
unknown committed
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
void
innobase_release_stat_resources(
/*============================*/
	trx_t*	trx)	/* in: transaction object */
{
	if (trx->has_search_latch) {
		trx_search_latch_release_if_reserved(trx);
	}

	if (trx->declared_to_be_inside_innodb) {
		/* Release our possible ticket in the FIFO */

		srv_conc_force_exit_innodb(trx);
	}
}

376 377 378 379 380 381 382 383
/************************************************************************
Call this function when mysqld passes control to the client. That is to
avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
documentation, see handler.cc. */

void
innobase_release_temporary_latches(
/*===============================*/
384
        THD *thd)
385
{
unknown's avatar
unknown committed
386 387
	trx_t*	trx;

unknown's avatar
unknown committed
388 389 390 391 392
	if (!innodb_inited) {
		
		return;
	}

unknown's avatar
unknown committed
393 394 395 396 397
	trx = (trx_t*) thd->ha_data[innobase_hton.slot];

	if (trx) {
        	innobase_release_stat_resources(trx);
	}
398 399
}

400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
/************************************************************************
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
when a single database operation may introduce a small need for
server utility activity, like checkpointing. */
inline
void
innobase_active_small(void)
/*=======================*/
{
	innobase_active_counter++;

	if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
		srv_active_wake_master_thread();
	}
}

417
/************************************************************************
unknown's avatar
unknown committed
418 419 420
Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
timeout or a deadlock. */
421 422 423 424 425
static
int
convert_error_code_to_mysql(
/*========================*/
			/* out: MySQL error code */
unknown's avatar
unknown committed
426 427
	int	error,	/* in: InnoDB error code */
	THD*	thd)	/* in: user thread handle or NULL */
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
{
	if (error == DB_SUCCESS) {

		return(0);

  	} else if (error == (int) DB_DUPLICATE_KEY) {

    		return(HA_ERR_FOUND_DUPP_KEY);

 	} else if (error == (int) DB_RECORD_NOT_FOUND) {

    		return(HA_ERR_NO_ACTIVE_RECORD);

 	} else if (error == (int) DB_ERROR) {

unknown's avatar
unknown committed
443
    		return(-1); /* unspecified error */
444 445

 	} else if (error == (int) DB_DEADLOCK) {
unknown's avatar
unknown committed
446
 		/* Since we rolled back the whole transaction, we must
unknown's avatar
unknown committed
447 448 449 450 451 452
 		tell it also to MySQL so that MySQL knows to empty the
 		cached binlog for this transaction */

 		if (thd) {
 			ha_rollback(thd);
 		}
453

454 455 456 457
    		return(HA_ERR_LOCK_DEADLOCK);

 	} else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {

unknown's avatar
unknown committed
458 459 460
		/* Since we rolled back the whole transaction, we must
		tell it also to MySQL so that MySQL knows to empty the
		cached binlog for this transaction */
unknown's avatar
unknown committed
461

unknown's avatar
unknown committed
462 463 464
		if (thd) {
			ha_rollback(thd);
		}
unknown's avatar
unknown committed
465

unknown's avatar
unknown committed
466
   		return(HA_ERR_LOCK_WAIT_TIMEOUT);
467 468 469

 	} else if (error == (int) DB_NO_REFERENCED_ROW) {

unknown's avatar
Merge  
unknown committed
470
    		return(HA_ERR_NO_REFERENCED_ROW);
471 472 473

 	} else if (error == (int) DB_ROW_IS_REFERENCED) {

unknown's avatar
Merge  
unknown committed
474
    		return(HA_ERR_ROW_IS_REFERENCED);
475

476
        } else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) {
477

unknown's avatar
Merge  
unknown committed
478
    		return(HA_ERR_CANNOT_ADD_FOREIGN);
479

unknown's avatar
unknown committed
480 481
        } else if (error == (int) DB_CANNOT_DROP_CONSTRAINT) {

unknown's avatar
unknown committed
482
    		return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
unknown's avatar
unknown committed
483 484
						misleading, a new MySQL error
						code should be introduced */
485 486
        } else if (error == (int) DB_COL_APPEARS_TWICE_IN_INDEX) {

unknown's avatar
unknown committed
487
    		return(HA_ERR_CRASHED);
488

489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
 	} else if (error == (int) DB_OUT_OF_FILE_SPACE) {

    		return(HA_ERR_RECORD_FILE_FULL);

 	} else if (error == (int) DB_TABLE_IS_BEING_USED) {

    		return(HA_ERR_WRONG_COMMAND);

 	} else if (error == (int) DB_TABLE_NOT_FOUND) {

    		return(HA_ERR_KEY_NOT_FOUND);

  	} else if (error == (int) DB_TOO_BIG_RECORD) {

    		return(HA_ERR_TO_BIG_ROW);
unknown's avatar
unknown committed
504 505 506 507

  	} else if (error == (int) DB_CORRUPTION) {

    		return(HA_ERR_CRASHED);
unknown's avatar
unknown committed
508 509 510
  	} else if (error == (int) DB_NO_SAVEPOINT) {

    		return(HA_ERR_NO_SAVEPOINT);
unknown's avatar
unknown committed
511 512 513
  	} else if (error == (int) DB_LOCK_TABLE_FULL) {

    		return(HA_ERR_LOCK_TABLE_FULL);
514
    	} else {
unknown's avatar
unknown committed
515
    		return(-1);			// Unknown error
516 517 518
    	}
}

519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547
/*****************************************************************
If you want to print a thd that is not associated with the current thread,
you must call this function before reserving the InnoDB kernel_mutex, to
protect MySQL from setting thd->query NULL. If you print a thd of the current
thread, we know that MySQL cannot modify thd->query, and it is not necessary
to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
the kernel_mutex.
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_prepare_print_arbitrary_thd(void)
/*============================================*/
{
	VOID(pthread_mutex_lock(&LOCK_thread_count));
}

/*****************************************************************
Relases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_end_print_arbitrary_thd(void)
/*========================================*/
{
	VOID(pthread_mutex_unlock(&LOCK_thread_count));
}

548 549
/*****************************************************************
Prints info of a THD object (== user session thread) to the
unknown's avatar
unknown committed
550
standard output. NOTE that /mysql/innobase/trx/trx0trx.c must contain
551
the prototype for this function! */
552
extern "C"
553 554 555
void
innobase_mysql_print_thd(
/*=====================*/
556
	FILE*   f,	/* in: output stream */
557
        void*   input_thd)/* in: pointer to a MySQL THD object */
558
{
559 560
	const THD*	thd;
	const char*	s;
unknown's avatar
unknown committed
561
	char		buf[301];
562

563
        thd = (const THD*) input_thd;
564

565
  	fprintf(f, "MySQL thread id %lu, query id %lu",
566
		thd->thread_id, (ulong) thd->query_id);
567 568 569 570
	if (thd->host) {
		putc(' ', f);
		fputs(thd->host, f);
	}
571

572 573 574 575
	if (thd->ip) {
		putc(' ', f);
		fputs(thd->ip, f);
	}
576

577
  	if (thd->user) {
578 579
		putc(' ', f);
		fputs(thd->user, f);
580 581
  	}

582
	if ((s = thd->proc_info)) {
583
		putc(' ', f);
584
		fputs(s, f);
585
	}
586

587
	if ((s = thd->query)) {
unknown's avatar
unknown committed
588
		/* determine the length of the query string */
unknown's avatar
unknown committed
589 590 591 592 593
		uint32 i, len;
		
		len = thd->query_length;

		if (len > 300) {
594
			len = 300;	/* ADDITIONAL SAFETY: print at most
unknown's avatar
unknown committed
595
					300 chars to reduce the probability of
596
					a seg fault if there is a race in
unknown's avatar
unknown committed
597 598 599
					thd->query_length in MySQL; after
					May 14, 2004 probably no race any more,
					but better be safe */
unknown's avatar
unknown committed
600
		}
unknown's avatar
unknown committed
601

602 603 604
                /* Use strmake to reduce the timeframe
                   for a race, compared to fwrite() */
		i= (uint) (strmake(buf, s, len) - buf);
unknown's avatar
unknown committed
605
		putc('\n', f);
unknown's avatar
unknown committed
606
		fwrite(buf, 1, i, f);
607
	}
608

609
	putc('\n', f);
610 611
}

612
/**********************************************************************
613
Get the variable length bounds of the given character set.
614 615 616 617

NOTE that the exact prototype of this function has to be in
/innobase/data/data0type.ic! */
extern "C"
618
void
619 620
innobase_get_cset_width(
/*====================*/
621 622 623
	ulint	cset,		/* in: MySQL charset-collation code */
	ulint*	mbminlen,	/* out: minimum length of a char (in bytes) */
	ulint*	mbmaxlen)	/* out: maximum length of a char (in bytes) */
624 625 626
{
	CHARSET_INFO*	cs;
	ut_ad(cset < 256);
627 628
	ut_ad(mbminlen);
	ut_ad(mbmaxlen);
629 630

	cs = all_charsets[cset];
631 632 633 634 635 636 637
	if (cs) {
		*mbminlen = cs->mbminlen;
		*mbmaxlen = cs->mbmaxlen;
	} else {
		ut_a(cset == 0);
		*mbminlen = *mbmaxlen = 0;
	}
638 639
}

640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669
/**********************************************************************
Compares NUL-terminated UTF-8 strings case insensitively.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
int
innobase_strcasecmp(
/*================*/
				/* out: 0 if a=b, <0 if a<b, >1 if a>b */
	const char*	a,	/* in: first string to compare */
	const char*	b)	/* in: second string to compare */
{
	return(my_strcasecmp(system_charset_info, a, b));
}

/**********************************************************************
Makes all characters in a NUL-terminated UTF-8 string lower case.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
void
innobase_casedn_str(
/*================*/
	char*	a)	/* in/out: string to put in lower case */
{
	my_casedn_str(system_charset_info, a);
}

670 671 672 673 674 675 676 677 678
/*************************************************************************
Creates a temporary file. */
extern "C"
int
innobase_mysql_tmpfile(void)
/*========================*/
			/* out: temporary file descriptor, or < 0 on error */
{
	char	filename[FN_REFLEN];
unknown's avatar
unknown committed
679
	int	fd2 = -1;
680
	File	fd = create_temp_file(filename, mysql_tmpdir, "ib",
681 682 683 684 685 686 687
#ifdef __WIN__
				O_BINARY | O_TRUNC | O_SEQUENTIAL |
				O_TEMPORARY | O_SHORT_LIVED |
#endif /* __WIN__ */
				O_CREAT | O_EXCL | O_RDWR,
				MYF(MY_WME));
	if (fd >= 0) {
unknown's avatar
unknown committed
688 689 690 691
#ifndef __WIN__
		/* On Windows, open files cannot be removed, but files can be
		created with the O_TEMPORARY flag to the same effect
		("delete on close"). */
692 693
		unlink(filename);
#endif /* !__WIN__ */
unknown's avatar
unknown committed
694 695 696 697 698 699 700 701 702 703 704 705
		/* Copy the file descriptor, so that the additional resources
		allocated by create_temp_file() can be freed by invoking
		my_close().

		Because the file descriptor returned by this function
		will be passed to fdopen(), it will be closed by invoking
		fclose(), which in turn will invoke close() instead of
		my_close(). */
		fd2 = dup(fd);
		if (fd2 < 0) {
			DBUG_PRINT("error",("Got error %d on dup",fd2));
			my_errno=errno;
706 707 708 709
                        my_error(EE_OUT_OF_FILERESOURCES,
                                 MYF(ME_BELL+ME_WAITTANG),
                                 filename, my_errno);
                }
unknown's avatar
unknown committed
710 711 712
		my_close(fd, MYF(MY_WME));
	}
	return(fd2);
713 714
}

715
/*************************************************************************
716 717
Gets the InnoDB transaction handle for a MySQL handler object, creates
an InnoDB transaction struct if the corresponding MySQL thread struct still
718
lacks one. */
719
static
720 721 722
trx_t*
check_trx_exists(
/*=============*/
723
			/* out: InnoDB transaction handle */
724 725 726 727
	THD*	thd)	/* in: user thread handle */
{
	trx_t*	trx;

unknown's avatar
unknown committed
728
	ut_ad(thd == current_thd);
unknown's avatar
unknown committed
729

730
        trx = (trx_t*) thd->ha_data[innobase_hton.slot];
731 732

	if (trx == NULL) {
unknown's avatar
unknown committed
733
	        DBUG_ASSERT(thd != NULL);
734
		trx = trx_allocate_for_mysql();
735

736
		trx->mysql_thd = thd;
737 738
		trx->mysql_query_str = &(thd->query);
                trx->active_trans = 0;
739

740 741 742 743
		/* Update the info whether we should skip XA steps that eat
		CPU time */
		trx->support_xa = (ibool)(thd->variables.innodb_support_xa);

744
                thd->ha_data[innobase_hton.slot] = trx;
unknown's avatar
unknown committed
745
	} else {
unknown's avatar
unknown committed
746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
		if (trx->magic_n != TRX_MAGIC_N) {
			mem_analyze_corruption((byte*)trx);

			ut_a(0);
		}
	}

	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	} else {
		trx->check_foreigns = TRUE;
	}

	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
		trx->check_unique_secondary = FALSE;
	} else {
		trx->check_unique_secondary = TRUE;
763 764 765 766 767 768
	}

	return(trx);
}

/*************************************************************************
769
Updates the user_thd field in a handle and also allocates a new InnoDB
770 771
transaction handle if needed, and updates the transaction fields in the
prebuilt struct. */
772
inline
773 774 775 776 777 778
int
ha_innobase::update_thd(
/*====================*/
			/* out: 0 or error code */
	THD*	thd)	/* in: thd to use the handle */
{
779 780
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;
unknown's avatar
unknown committed
781

782 783
	trx = check_trx_exists(thd);

784
	if (prebuilt->trx != trx) {
785

786
		row_update_prebuilt_trx(prebuilt, trx);
787 788 789
	}

	user_thd = thd;
790

791 792 793
	return(0);
}

unknown's avatar
unknown committed
794
/*************************************************************************
unknown's avatar
unknown committed
795 796 797 798 799
Registers that InnoDB takes part in an SQL statement, so that MySQL knows to
roll back the statement if the statement results in an error. This MUST be
called for every SQL statement that may be rolled back by MySQL. Calling this
several times to register the same statement is allowed, too. */
inline
unknown's avatar
unknown committed
800
void
unknown's avatar
unknown committed
801 802 803
innobase_register_stmt(
/*===================*/
	THD*	thd)	/* in: MySQL thd (connection) object */
804
{
unknown's avatar
unknown committed
805
        /* Register the statement */
unknown's avatar
unknown committed
806
        trans_register_ha(thd, FALSE, &innobase_hton);
unknown's avatar
unknown committed
807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825
}

/*************************************************************************
Registers an InnoDB transaction in MySQL, so that the MySQL XA code knows
to call the InnoDB prepare and commit, or rollback for the transaction. This
MUST be called for every transaction for which the user may call commit or
rollback. Calling this several times to register the same transaction is
allowed, too.
This function also registers the current SQL statement. */
inline
void
innobase_register_trx_and_stmt(
/*===========================*/
	THD*	thd)	/* in: MySQL thd (connection) object */
{
	/* NOTE that actually innobase_register_stmt() registers also
	the transaction in the AUTOCOMMIT=1 mode. */

	innobase_register_stmt(thd);
unknown's avatar
unknown committed
826

unknown's avatar
unknown committed
827 828
        if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {

unknown's avatar
unknown committed
829
              /* No autocommit mode, register for a transaction */
unknown's avatar
unknown committed
830 831
              trans_register_ha(thd, TRUE, &innobase_hton);
        }
832
}
unknown's avatar
unknown committed
833 834 835 836 837 838 839 840 841 842

/*   BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
     ------------------------------------------------------------

1) The use of the query cache for TBL is disabled when there is an
uncommitted change to TBL.

2) When a change to TBL commits, InnoDB stores the current value of
its global trx id counter, let us denote it by INV_TRX_ID, to the table object
in the InnoDB data dictionary, and does only allow such transactions whose
unknown's avatar
unknown committed
843
id <= INV_TRX_ID to use the query cache.
unknown's avatar
unknown committed
844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886

3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
of TBL immediately.

How this is implemented inside InnoDB:

1) Since every modification always sets an IX type table lock on the InnoDB
table, it is easy to check if there can be uncommitted modifications for a
table: just check if there are locks in the lock list of the table.

2) When a transaction inside InnoDB commits, it reads the global trx id
counter and stores the value INV_TRX_ID to the tables on which it had a lock.

3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
InnoDB calls an invalidate method for the MySQL query cache for that table.

How this is implemented inside sql_cache.cc:

1) The query cache for an InnoDB table TBL is invalidated immediately at an
INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
invalidation to the transaction commit.

2) To store or retrieve a value from the query cache of an InnoDB table TBL,
any query must first ask InnoDB's permission. We must pass the thd as a
parameter because InnoDB will look at the trx id, if any, associated with
that thd.

3) Use of the query cache for InnoDB tables is now allowed also when
AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
put restrictions on the use of the query cache.
*/

/**********************************************************************
The MySQL query cache uses this to check from InnoDB if the query cache at
the moment is allowed to operate on an InnoDB table. The SQL query must
be a non-locking SELECT.

The query cache is allowed to operate on certain query only if this function
returns TRUE for all tables in the query.

If thd is not in the autocommit state, this function also starts a new
transaction for thd if there is no active trx yet, and assigns a consistent
unknown's avatar
unknown committed
887 888 889 890 891 892 893 894
read view to it if there is no read view yet.

Why a deadlock of threads is not possible: the query cache calls this function
at the start of a SELECT processing. Then the calling thread cannot be
holding any InnoDB semaphores. The calling thread is holding the
query cache mutex, and this function will reserver the InnoDB kernel mutex.
Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above
the InnoDB kernel mutex. */
unknown's avatar
unknown committed
895

unknown's avatar
unknown committed
896
my_bool
unknown's avatar
unknown committed
897 898 899 900 901 902 903 904 905 906 907 908
innobase_query_caching_of_table_permitted(
/*======================================*/
				/* out: TRUE if permitted, FALSE if not;
				note that the value FALSE does not mean
				we should invalidate the query cache:
				invalidation is called explicitly */
	THD*	thd,		/* in: thd of the user who is trying to
				store a result to the query cache or
				retrieve it */
	char*	full_name,	/* in: concatenation of database name,
				the null character '\0', and the table
				name */
unknown's avatar
unknown committed
909
	uint	full_name_len,	/* in: length of the full name, i.e.
unknown's avatar
unknown committed
910
				len(dbname) + len(tablename) + 1 */
unknown's avatar
unknown committed
911
        ulonglong *unused)      /* unused for this engine */
unknown's avatar
unknown committed
912 913 914 915 916 917 918 919 920
{
	ibool	is_autocommit;
	trx_t*	trx;
	char	norm_name[1000];

	ut_a(full_name_len < 999);

	if (thd->variables.tx_isolation == ISO_SERIALIZABLE) {
		/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
unknown's avatar
unknown committed
921
		plain SELECT if AUTOCOMMIT is not on. */
unknown's avatar
unknown committed
922 923 924 925
	
		return((my_bool)FALSE);
	}

unknown's avatar
unknown committed
926
        trx = check_trx_exists(thd);
unknown's avatar
unknown committed
927 928 929 930 931 932 933
	if (trx->has_search_latch) {
		ut_print_timestamp(stderr);
		fprintf(stderr,
"  InnoDB: Error: the calling thread is holding the adaptive search\n"
"InnoDB: latch though calling innobase_query_caching_of_table_permitted\n");
	}

unknown's avatar
unknown committed
934 935 936 937 938 939 940 941 942 943
	innobase_release_stat_resources(trx);

	if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {

		is_autocommit = TRUE;
	} else {
		is_autocommit = FALSE;

	}

unknown's avatar
unknown committed
944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
	if (is_autocommit && trx->n_mysql_tables_in_use == 0) {
		/* We are going to retrieve the query result from the query
		cache. This cannot be a store operation to the query cache
		because then MySQL would have locks on tables already.

		TODO: if the user has used LOCK TABLES to lock the table,
		then we open a transaction in the call of row_.. below.
		That trx can stay open until UNLOCK TABLES. The same problem
		exists even if we do not use the query cache. MySQL should be
		modified so that it ALWAYS calls some cleanup function when
		the processing of a query ends!

		We can imagine we instantaneously serialize this consistent
		read trx to the current trx id counter. If trx2 would have
		changed the tables of a query result stored in the cache, and
		trx2 would have already committed, making the result obsolete,
		then trx2 would have already invalidated the cache. Thus we
		can trust the result in the cache is ok for this query. */
unknown's avatar
unknown committed
962 963 964 965 966 967 968 969 970 971 972 973

		return((my_bool)TRUE);
	}
	
	/* Normalize the table name to InnoDB format */

	memcpy(norm_name, full_name, full_name_len);

	norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the
					    separator between db and table */
	norm_name[full_name_len] = '\0';
#ifdef __WIN__
974
	innobase_casedn_str(norm_name);
unknown's avatar
unknown committed
975
#endif
unknown's avatar
unknown committed
976 977 978
	/* The call of row_search_.. will start a new transaction if it is
	not yet started */

979 980
        if (trx->active_trans == 0) {

unknown's avatar
unknown committed
981
                innobase_register_trx_and_stmt(thd);
982 983
                trx->active_trans = 1;
        }
unknown's avatar
unknown committed
984

unknown's avatar
unknown committed
985 986
	if (row_search_check_if_query_cache_permitted(trx, norm_name)) {

unknown's avatar
unknown committed
987
		/* printf("Query cache for %s permitted\n", norm_name); */
unknown's avatar
unknown committed
988 989 990 991

		return((my_bool)TRUE);
	}

unknown's avatar
unknown committed
992
	/* printf("Query cache for %s NOT permitted\n", norm_name); */
unknown's avatar
unknown committed
993 994 995 996 997 998 999 1000

	return((my_bool)FALSE);
}

/*********************************************************************
Invalidates the MySQL query cache for the table.
NOTE that the exact prototype of this function has to be in
/innobase/row/row0ins.c! */
1001
extern "C"
unknown's avatar
unknown committed
1002 1003 1004 1005 1006
void
innobase_invalidate_query_cache(
/*============================*/
	trx_t*	trx,		/* in: transaction which modifies the table */
	char*	full_name,	/* in: concatenation of database name, null
unknown's avatar
unknown committed
1007 1008 1009 1010 1011
				char '\0', table name, null char'\0';
				NOTE that in Windows this is always
				in LOWER CASE! */
	ulint	full_name_len)	/* in: full name length where also the null
				chars count */
unknown's avatar
unknown committed
1012
{
unknown's avatar
unknown committed
1013 1014 1015 1016
	/* Note that the sync0sync.h rank of the query cache mutex is just
	above the InnoDB kernel mutex. The caller of this function must not
	have latches of a lower rank. */

unknown's avatar
unknown committed
1017
	/* Argument TRUE below means we are using transactions */
1018
#ifdef HAVE_QUERY_CACHE
unknown's avatar
unknown committed
1019 1020 1021 1022
	query_cache.invalidate((THD*)(trx->mysql_thd),
					(const char*)full_name,
					(uint32)full_name_len,
					TRUE);
1023
#endif
unknown's avatar
unknown committed
1024
}
1025 1026

/*********************************************************************
1027 1028
Get the quote character to be used in SQL identifiers.
This definition must match the one in innobase/ut/ut0ut.c! */
1029
extern "C"
1030 1031 1032
int
mysql_get_identifier_quote_char(
/*============================*/
1033
				/* out: quote character to be
1034 1035 1036 1037 1038 1039 1040 1041 1042
				used in SQL identifiers; EOF if none */
	trx_t*		trx,	/* in: transaction */
	const char*	name,	/* in: name to print */
	ulint		namelen)/* in: length of name */
{
	if (!trx || !trx->mysql_thd) {
		return(EOF);
	}
	return(get_quote_char_for_identifier((THD*) trx->mysql_thd,
unknown's avatar
unknown committed
1043
						name, (int) namelen));
1044 1045
}

1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
/**************************************************************************
Determines if the currently running transaction has been interrupted. */
extern "C"
ibool
trx_is_interrupted(
/*===============*/
			/* out: TRUE if interrupted */
	trx_t*	trx)	/* in: transaction */
{
	return(trx && trx->mysql_thd && ((THD*) trx->mysql_thd)->killed);
}

1058 1059 1060 1061 1062 1063 1064 1065
/**************************************************************************
Obtain a pointer to the MySQL THD object, as in current_thd().  This
definition must match the one in sql/ha_innodb.cc! */
extern "C"
void*
innobase_current_thd(void)
/*======================*/
			/* out: MySQL THD object */
1066
{
1067
	return(current_thd);
unknown's avatar
unknown committed
1068 1069
}

1070 1071 1072 1073
/*********************************************************************
Call this when you have opened a new table handle in HANDLER, before you
call index_read_idx() etc. Actually, we can let the cursor stay open even
over a transaction commit! Then you should call this before every operation,
1074
fetch next etc. This function inits the necessary things even after a
1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
transaction commit. */

void
ha_innobase::init_table_handle_for_HANDLER(void)
/*============================================*/
{
        row_prebuilt_t* prebuilt;

        /* If current thd does not yet have a trx struct, create one.
        If the current handle does not yet have a prebuilt struct, create
        one. Update the trx pointers in the prebuilt struct. Normally
        this operation is done in external_lock. */

        update_thd(current_thd);

        /* Initialize the prebuilt struct much like it would be inited in
        external_lock */

        prebuilt = (row_prebuilt_t*)innobase_prebuilt;

unknown's avatar
unknown committed
1095 1096
	innobase_release_stat_resources(prebuilt->trx);

1097 1098 1099 1100 1101 1102 1103 1104
        /* If the transaction is not started yet, start it */

        trx_start_if_not_started_noninline(prebuilt->trx);

        /* Assign a read view if the transaction does not have it yet */

        trx_assign_read_view(prebuilt->trx);

1105 1106
	/* Set the MySQL flag to mark that there is an active transaction */

1107 1108
        if (prebuilt->trx->active_trans == 0) {

unknown's avatar
unknown committed
1109
                innobase_register_trx_and_stmt(current_thd);
1110 1111 1112

                prebuilt->trx->active_trans = 1;
        }
1113

1114 1115 1116 1117 1118 1119 1120 1121 1122
        /* We did the necessary inits in this function, no need to repeat them
        in row_search_for_mysql */

        prebuilt->sql_stat_start = FALSE;

        /* We let HANDLER always to do the reads as consistent reads, even
        if the trx isolation level would have been specified as SERIALIZABLE */

        prebuilt->select_lock_type = LOCK_NONE;
unknown's avatar
unknown committed
1123
        prebuilt->stored_select_lock_type = LOCK_NONE;
1124 1125 1126

        /* Always fetch all columns in the index record */

unknown's avatar
unknown committed
1127
        prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
1128 1129 1130 1131 1132

        /* We want always to fetch all columns in the whole row? Or do
	we???? */

        prebuilt->read_just_key = FALSE;
1133 1134

	prebuilt->used_in_HANDLER = TRUE;
1135 1136

	prebuilt->keep_other_fields_on_keyread = FALSE;
1137 1138
}

1139
/*************************************************************************
1140
Opens an InnoDB database. */
1141

unknown's avatar
unknown committed
1142
handlerton*
1143 1144
innobase_init(void)
/*===============*/
1145
			/* out: TRUE if error */
1146
{
unknown's avatar
unknown committed
1147
	static char	current_dir[3];		/* Set if using current lib */
1148 1149
	int		err;
	bool		ret;
1150
	char 	        *default_path;
unknown's avatar
merge  
unknown committed
1151

1152 1153
  	DBUG_ENTER("innobase_init");

unknown's avatar
unknown committed
1154 1155
	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);

unknown's avatar
unknown committed
1156
  	os_innodb_umask = (ulint)my_umask;
unknown's avatar
unknown committed
1157

unknown's avatar
unknown committed
1158 1159 1160 1161 1162 1163
	/* First calculate the default path for innodb_data_home_dir etc.,
	in case the user has not given any value.

	Note that when using the embedded server, the datadirectory is not
	necessarily the current directory of this program. */

1164
	if (mysqld_embedded) {
unknown's avatar
unknown committed
1165
		default_path = mysql_real_data_home;
unknown's avatar
unknown committed
1166
		fil_path_to_mysql_datadir = mysql_real_data_home;
unknown's avatar
unknown committed
1167 1168 1169 1170 1171 1172
	} else {
	  	/* It's better to use current lib, to keep paths short */
	  	current_dir[0] = FN_CURLIB;
	  	current_dir[1] = FN_LIBCHAR;
	  	current_dir[2] = 0;
	  	default_path = current_dir;
unknown's avatar
unknown committed
1173 1174
	}

unknown's avatar
unknown committed
1175 1176
	ut_a(default_path);

unknown's avatar
unknown committed
1177 1178 1179 1180 1181 1182
	if (specialflag & SPECIAL_NO_PRIOR) {
	        srv_set_thread_priorities = FALSE;
	} else {
	        srv_set_thread_priorities = TRUE;
	        srv_query_thread_priority = QUERY_PRIOR;
	}
unknown's avatar
unknown committed
1183

unknown's avatar
unknown committed
1184 1185
	/* Set InnoDB initialization parameters according to the values
	read from MySQL .cnf file */
unknown's avatar
unknown committed
1186

unknown's avatar
unknown committed
1187
	/*--------------- Data files -------------------------*/
1188

unknown's avatar
unknown committed
1189
	/* The default dir for data files is the datadir of MySQL */
unknown's avatar
unknown committed
1190 1191

	srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
unknown's avatar
unknown committed
1192
			 default_path);
unknown's avatar
unknown committed
1193

unknown's avatar
unknown committed
1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209
	/* Set default InnoDB data file size to 10 MB and let it be
  	auto-extending. Thus users can use InnoDB in >= 4.0 without having
	to specify any startup options. */

	if (!innobase_data_file_path) {
  		innobase_data_file_path = (char*) "ibdata1:10M:autoextend";
	}

	/* Since InnoDB edits the argument in the next call, we make another
	copy of it: */

	internal_innobase_data_file_path = my_strdup(innobase_data_file_path,
						   MYF(MY_WME));

	ret = (bool) srv_parse_data_file_paths_and_sizes(
				internal_innobase_data_file_path,
unknown's avatar
unknown committed
1210 1211 1212 1213 1214 1215
				&srv_data_file_names,
				&srv_data_file_sizes,
				&srv_data_file_is_raw_partition,
				&srv_n_data_files,
				&srv_auto_extend_last_data_file,
				&srv_last_file_size_max);
1216
	if (ret == FALSE) {
unknown's avatar
unknown committed
1217 1218
	  	sql_print_error(
			"InnoDB: syntax error in innodb_data_file_path");
unknown's avatar
unknown committed
1219 1220
	  	my_free(internal_innobase_data_file_path,
						MYF(MY_ALLOW_ZERO_PTR));
1221
	  	DBUG_RETURN(0);
unknown's avatar
unknown committed
1222
	}
1223

unknown's avatar
unknown committed
1224 1225 1226 1227 1228 1229 1230
	/* -------------- Log files ---------------------------*/

	/* The default dir for log files is the datadir of MySQL */
	
	if (!innobase_log_group_home_dir) {
	  	innobase_log_group_home_dir = default_path;
	}
unknown's avatar
unknown committed
1231 1232

#ifdef UNIV_LOG_ARCHIVE	  	
unknown's avatar
unknown committed
1233 1234 1235 1236 1237 1238 1239
	/* Since innodb_log_arch_dir has no relevance under MySQL,
	starting from 4.0.6 we always set it the same as
	innodb_log_group_home_dir: */

	innobase_log_arch_dir = innobase_log_group_home_dir;

	srv_arch_dir = innobase_log_arch_dir;
unknown's avatar
unknown committed
1240
#endif /* UNIG_LOG_ARCHIVE */
unknown's avatar
unknown committed
1241

unknown's avatar
unknown committed
1242 1243 1244
	ret = (bool)
		srv_parse_log_group_home_dirs(innobase_log_group_home_dir,
						&srv_log_group_home_dirs);
unknown's avatar
unknown committed
1245

unknown's avatar
unknown committed
1246 1247 1248 1249
	if (ret == FALSE || innobase_mirrored_log_groups != 1) {
		fprintf(stderr,
		"InnoDB: syntax error in innodb_log_group_home_dir\n"
		"InnoDB: or a wrong number of mirrored log groups\n");
unknown's avatar
unknown committed
1250

unknown's avatar
unknown committed
1251 1252
	  	my_free(internal_innobase_data_file_path,
						MYF(MY_ALLOW_ZERO_PTR));
1253
		DBUG_RETURN(0);
unknown's avatar
unknown committed
1254
	}
unknown's avatar
unknown committed
1255

unknown's avatar
unknown committed
1256 1257 1258
	/* --------------------------------------------------*/

	srv_file_flush_method_str = innobase_unix_file_flush_method;
1259

unknown's avatar
unknown committed
1260
	srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
1261
	srv_n_log_files = (ulint) innobase_log_files_in_group;
unknown's avatar
unknown committed
1262 1263
	srv_log_file_size = (ulint) innobase_log_file_size;

unknown's avatar
unknown committed
1264
#ifdef UNIV_LOG_ARCHIVE
unknown's avatar
unknown committed
1265
	srv_log_archive_on = (ulint) innobase_log_archive;
unknown's avatar
unknown committed
1266
#endif /* UNIV_LOG_ARCHIVE */
unknown's avatar
unknown committed
1267
	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
unknown's avatar
unknown committed
1268
	srv_flush_log_at_trx_commit = (ulint) innobase_flush_log_at_trx_commit;
1269

unknown's avatar
unknown committed
1270 1271
        /* We set srv_pool_size here in units of 1 kB. InnoDB internally
        changes the value so that it becomes the number of database pages. */
unknown's avatar
unknown committed
1272 1273

        if (innobase_buffer_pool_awe_mem_mb == 0) {
unknown's avatar
unknown committed
1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288
                /* Careful here: we first convert the signed long int to ulint
                and only after that divide */
 
                srv_pool_size = ((ulint) innobase_buffer_pool_size) / 1024;
        } else {
                srv_use_awe = TRUE;
                srv_pool_size = (ulint)
                                (1024 * innobase_buffer_pool_awe_mem_mb);
                srv_awe_window_size = (ulint) innobase_buffer_pool_size;
 
                /* Note that what the user specified as
                innodb_buffer_pool_size is actually the AWE memory window
                size in this case, and the real buffer pool size is
                determined by .._awe_mem_mb. */
        }
unknown's avatar
unknown committed
1289

unknown's avatar
unknown committed
1290 1291 1292
	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;

	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
1293

1294
	srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
unknown's avatar
Merge  
unknown committed
1295 1296
	srv_force_recovery = (ulint) innobase_force_recovery;

unknown's avatar
unknown committed
1297 1298
	srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
	srv_use_checksums = (ibool) innobase_use_checksums;
1299

unknown's avatar
unknown committed
1300 1301
	os_use_large_pages = (ibool) innobase_use_large_pages;
	os_large_page_size = (ulint) innobase_large_page_size;
1302
  
unknown's avatar
unknown committed
1303
	srv_file_per_table = (ibool) innobase_file_per_table;
1304
        srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
unknown's avatar
unknown committed
1305 1306

	srv_max_n_open_files = (ulint) innobase_open_files;
1307
	srv_innodb_status = (ibool) innobase_create_status_file;
unknown's avatar
unknown committed
1308

1309
	srv_print_verbose_log = mysqld_embedded ? 0 : 1;
unknown's avatar
unknown committed
1310

unknown's avatar
unknown committed
1311
	/* Store the default charset-collation number of this MySQL
1312
	installation */
unknown's avatar
unknown committed
1313

1314
	data_mysql_default_charset_coll = (ulint)default_charset_info->number;
unknown's avatar
unknown committed
1315

1316 1317
	ut_a(DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL ==
					my_charset_latin1.number);
1318

unknown's avatar
unknown committed
1319 1320 1321 1322
	/* Store the latin1_swedish_ci character ordering table to InnoDB. For
	non-latin1_swedish_ci charsets we use the MySQL comparison functions,
	and consequently we do not need to know the ordering internally in
	InnoDB. */
unknown's avatar
unknown committed
1323

1324
	ut_a(0 == strcmp((char*)my_charset_latin1.name,
unknown's avatar
unknown committed
1325 1326
						(char*)"latin1_swedish_ci"));
	memcpy(srv_latin1_ordering, my_charset_latin1.sort_order, 256);
1327

1328 1329 1330 1331 1332 1333 1334 1335
	/* Since we in this module access directly the fields of a trx
        struct, and due to different headers and flags it might happen that
	mutex_t has a different size in this module and in InnoDB
	modules, we check at run time that the size is the same in
	these compilation modules. */

	srv_sizeof_trx_t_in_ha_innodb_cc = sizeof(trx_t);

unknown's avatar
unknown committed
1336
	err = innobase_start_or_create_for_mysql();
1337 1338

	if (err != DB_SUCCESS) {
unknown's avatar
unknown committed
1339 1340
	  	my_free(internal_innobase_data_file_path,
						MYF(MY_ALLOW_ZERO_PTR));
1341
		DBUG_RETURN(0);
1342
	}
unknown's avatar
unknown committed
1343 1344 1345

	(void) hash_init(&innobase_open_tables,system_charset_info, 32, 0, 0,
			 		(hash_get_key) innobase_get_key, 0, 0);
unknown's avatar
unknown committed
1346 1347
        pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
        pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
1348
	innodb_inited= 1;
unknown's avatar
unknown committed
1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364

	/* If this is a replication slave and we needed to do a crash recovery,
	set the master binlog position to what InnoDB internally knew about
	how far we got transactions durable inside InnoDB. There is a
	problem here: if the user used also MyISAM tables, InnoDB might not
	know the right position for them.

	THIS DOES NOT WORK CURRENTLY because replication seems to initialize
	glob_mi also after innobase_init. */
	
/*	if (trx_sys_mysql_master_log_pos != -1) {
		ut_memcpy(glob_mi.log_file_name, trx_sys_mysql_master_log_name,
				1 + ut_strlen(trx_sys_mysql_master_log_name));
		glob_mi.pos = trx_sys_mysql_master_log_pos;
	}
*/
1365
	DBUG_RETURN(&innobase_hton);
1366 1367 1368
}

/***********************************************************************
1369
Closes an InnoDB database. */
1370

1371
bool
1372 1373
innobase_end(void)
/*==============*/
1374
				/* out: TRUE if error */
1375
{
1376
	int	err= 0;
1377 1378 1379

	DBUG_ENTER("innobase_end");

1380 1381 1382 1383 1384
#ifdef __NETWARE__ 	/* some special cleanup for NetWare */
	if (nw_panic) {
		set_panic_flag_for_netware();
	}
#endif
unknown's avatar
unknown committed
1385
	if (innodb_inited) {
1386

1387
	        srv_fast_shutdown = (ulint) innobase_fast_shutdown;
unknown's avatar
unknown committed
1388 1389 1390 1391 1392 1393 1394
	  	innodb_inited = 0;
	  	if (innobase_shutdown_for_mysql() != DB_SUCCESS) {
	    		err = 1;
		}
	  	hash_free(&innobase_open_tables);
	  	my_free(internal_innobase_data_file_path,
						MYF(MY_ALLOW_ZERO_PTR));
unknown's avatar
unknown committed
1395 1396
                pthread_mutex_destroy(&innobase_share_mutex);
                pthread_mutex_destroy(&prepare_commit_mutex);
1397
	}
1398

1399
  	DBUG_RETURN(err);
1400 1401 1402
}

/********************************************************************
unknown's avatar
unknown committed
1403 1404
Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
the logs, and the name of this function should be innobase_checkpoint. */
1405

1406
bool
1407 1408
innobase_flush_logs(void)
/*=====================*/
1409
				/* out: TRUE if error */
1410
{
1411
  	bool 	result = 0;
1412 1413 1414

  	DBUG_ENTER("innobase_flush_logs");

unknown's avatar
unknown committed
1415
	log_buffer_flush_to_disk();
1416

1417 1418 1419 1420
  	DBUG_RETURN(result);
}

/*********************************************************************
1421
Commits a transaction in an InnoDB database. */
1422

unknown's avatar
unknown committed
1423 1424 1425 1426 1427
void
innobase_commit_low(
/*================*/
	trx_t*	trx)	/* in: transaction handle */
{
unknown's avatar
unknown committed
1428
        if (trx->conc_state == TRX_NOT_STARTED) {
unknown's avatar
unknown committed
1429

unknown's avatar
unknown committed
1430 1431
                return;
        }
unknown's avatar
unknown committed
1432

unknown's avatar
unknown committed
1433 1434 1435 1436
#ifdef HAVE_REPLICATION
        THD *thd=current_thd;

        if (thd && thd->slave_thread) {
1437
                /* Update the replication position info inside InnoDB */
unknown's avatar
unknown committed
1438

1439
                trx->mysql_master_log_file_name
1440
                                        = active_mi->rli.group_master_log_name;
unknown's avatar
unknown committed
1441 1442
                trx->mysql_master_log_pos = ((ib_longlong)
                                active_mi->rli.future_group_master_log_pos);
1443
        }
unknown's avatar
SCRUM  
unknown committed
1444
#endif /* HAVE_REPLICATION */
unknown's avatar
unknown committed
1445

unknown's avatar
unknown committed
1446
	trx_commit_for_mysql(trx);
unknown's avatar
unknown committed
1447 1448
}

1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485
/*********************************************************************
Creates an InnoDB transaction struct for the thd if it does not yet have one.
Starts a new InnoDB transaction if a transaction is not yet started. And
assigns a new snapshot for a consistent read if the transaction does not yet
have one. */

int
innobase_start_trx_and_assign_read_view(
/*====================================*/
			/* out: 0 */
	THD*	thd)	/* in: MySQL thread handle of the user for whom
			the transaction should be committed */
{
	trx_t*	trx;

  	DBUG_ENTER("innobase_start_trx_and_assign_read_view");

	/* Create a new trx struct for thd, if it does not yet have one */

	trx = check_trx_exists(thd);

	/* This is just to play safe: release a possible FIFO ticket and
	search latch. Since we will reserve the kernel mutex, we have to
	release the search system latch first to obey the latching order. */

	innobase_release_stat_resources(trx);

	/* If the transaction is not started yet, start it */

	trx_start_if_not_started_noninline(trx);

	/* Assign a read view if the transaction does not have it yet */

	trx_assign_read_view(trx);

	/* Set the MySQL flag to mark that there is an active transaction */

1486 1487
        if (trx->active_trans == 0) {

unknown's avatar
unknown committed
1488
                innobase_register_trx_and_stmt(current_thd);
1489 1490 1491

                trx->active_trans = 1;
        }
1492 1493 1494 1495

	DBUG_RETURN(0);
}

unknown's avatar
unknown committed
1496
/*********************************************************************
unknown's avatar
unknown committed
1497 1498
Commits a transaction in an InnoDB database or marks an SQL statement
ended. */
unknown's avatar
unknown committed
1499 1500
static
int
1501 1502
innobase_commit(
/*============*/
unknown's avatar
unknown committed
1503
			/* out: 0 */
unknown's avatar
unknown committed
1504
	THD*	thd,	/* in: MySQL thread handle of the user for whom
1505
			the transaction should be committed */
1506 1507
        bool    all)    /* in: TRUE - commit transaction
                               FALSE - the current SQL statement ended */
1508
{
1509
	trx_t*		trx;
1510 1511 1512 1513

  	DBUG_ENTER("innobase_commit");
  	DBUG_PRINT("trans", ("ending transaction"));

unknown's avatar
unknown committed
1514
	trx = check_trx_exists(thd);
1515

1516 1517 1518
	/* Update the info whether we should skip XA steps that eat CPU time */
	trx->support_xa = (ibool)(thd->variables.innodb_support_xa);

unknown's avatar
unknown committed
1519 1520 1521
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */
unknown's avatar
unknown committed
1522

unknown's avatar
unknown committed
1523
	innobase_release_stat_resources(trx);
unknown's avatar
unknown committed
1524

1525
        /* The flag trx->active_trans is set to 1 in
unknown's avatar
unknown committed
1526 1527 1528

	1. ::external_lock(),
	2. ::start_stmt(),
1529
	3. innobase_query_caching_of_table_permitted(),
unknown's avatar
unknown committed
1530
	4. innobase_savepoint(),
1531
	5. ::init_table_handle_for_HANDLER(),
1532 1533
	6. innobase_start_trx_and_assign_read_view(),
	7. ::transactional_table_lock()
unknown's avatar
unknown committed
1534 1535 1536 1537 1538

	and it is only set to 0 in a commit or a rollback. If it is 0 we know
	there cannot be resources to be freed and we could return immediately.
	For the time being, we play safe and do the cleanup though there should
	be nothing to clean up. */
unknown's avatar
unknown committed
1539

1540
        if (trx->active_trans == 0
unknown's avatar
unknown committed
1541 1542 1543
	    && trx->conc_state != TRX_NOT_STARTED) {
	    
	        fprintf(stderr,
1544
"InnoDB: Error: trx->active_trans == 0\n"
unknown's avatar
unknown committed
1545
"InnoDB: but trx->conc_state != TRX_NOT_STARTED\n");
unknown's avatar
unknown committed
1546
	}
unknown's avatar
unknown committed
1547

unknown's avatar
unknown committed
1548 1549
        if (all
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
unknown's avatar
unknown committed
1550
	        
1551
 		/* We were instructed to commit the whole transaction, or
unknown's avatar
unknown committed
1552 1553
		this is an SQL statement end and autocommit is on */

unknown's avatar
unknown committed
1554
                /* We need current binlog position for ibbackup to work.
unknown's avatar
unknown committed
1555 1556 1557 1558 1559
                Note, the position is current because of prepare_commit_mutex */
                trx->mysql_log_file_name = mysql_bin_log.get_log_fname();
                trx->mysql_log_offset =
                        (ib_longlong)mysql_bin_log.get_log_file()->pos_in_file;

unknown's avatar
unknown committed
1560
		innobase_commit_low(trx);
unknown's avatar
unknown committed
1561

unknown's avatar
unknown committed
1562 1563 1564 1565
                if (trx->active_trans == 2) {

                        pthread_mutex_unlock(&prepare_commit_mutex);
                }
1566
                trx->active_trans = 0;
unknown's avatar
unknown committed
1567

unknown's avatar
unknown committed
1568
	} else {
unknown's avatar
unknown committed
1569 1570 1571
	        /* We just mark the SQL statement ended and do not do a
		transaction commit */

unknown's avatar
unknown committed
1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582
		if (trx->auto_inc_lock) {
			/* If we had reserved the auto-inc lock for some
			table in this SQL statement we release it now */
		  	
			row_unlock_table_autoinc_for_mysql(trx);
		}
		/* Store the current undo_no of the transaction so that we
		know where to roll back if we have to roll back the next
		SQL statement */

		trx_mark_sql_stat_end(trx);
unknown's avatar
unknown committed
1583
	}
1584

unknown's avatar
unknown committed
1585 1586
	/* Tell the InnoDB server that there might be work for utility
	threads: */
1587 1588 1589

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
1590
	DBUG_RETURN(0);
1591 1592
}

unknown's avatar
unknown committed
1593
/* TODO: put the
unknown's avatar
unknown committed
1594 1595
MySQL-4.1 functionality back to 5.0. This is needed to get InnoDB Hot Backup
to work. */
unknown's avatar
unknown committed
1596

1597 1598 1599 1600
/*********************************************************************
This is called when MySQL writes the binlog entry for the current
transaction. Writes to the InnoDB tablespace info which tells where the
MySQL binlog entry for the current transaction ended. Also commits the
unknown's avatar
unknown committed
1601
transaction inside InnoDB but does NOT flush InnoDB log files to disk.
unknown's avatar
unknown committed
1602
To flush you have to call innobase_commit_complete(). We have separated
unknown's avatar
unknown committed
1603 1604
flushing to eliminate the bottleneck of LOCK_log in log.cc which disabled
InnoDB's group commit capability. */
1605 1606 1607 1608

int
innobase_report_binlog_offset_and_commit(
/*=====================================*/
unknown's avatar
unknown committed
1609
                                /* out: 0 */
1610
        THD*    thd,            /* in: user thread */
unknown's avatar
unknown committed
1611
        void*   trx_handle,     /* in: InnoDB trx handle */
1612 1613
        char*   log_file_name,  /* in: latest binlog file name */
        my_off_t end_offset)    /* in: the offset in the binlog file
unknown's avatar
unknown committed
1614
                                   up to which we wrote */
1615
{
unknown's avatar
unknown committed
1616 1617 1618
	trx_t*	trx;

	trx = (trx_t*)trx_handle;
1619

unknown's avatar
unknown committed
1620 1621
	ut_a(trx != NULL);

unknown's avatar
unknown committed
1622
	trx->mysql_log_file_name = log_file_name;
unknown's avatar
unknown committed
1623
	trx->mysql_log_offset = (ib_longlong)end_offset;
unknown's avatar
unknown committed
1624

unknown's avatar
unknown committed
1625
#ifdef HAVE_REPLICATION
unknown's avatar
unknown committed
1626 1627 1628 1629 1630 1631 1632 1633
        if (thd->variables.sync_replication) {
                /* Let us store the binlog file name and the position, so that
                we know how long to wait for the binlog to the replicated to
                the slave in synchronous replication. */

                if (trx->repl_wait_binlog_name == NULL) {

                        trx->repl_wait_binlog_name =
unknown's avatar
unknown committed
1634
                                  (char*)mem_alloc_noninline(FN_REFLEN + 100);
unknown's avatar
unknown committed
1635 1636
                }

unknown's avatar
unknown committed
1637
                ut_a(strlen(log_file_name) < FN_REFLEN + 100);
unknown's avatar
unknown committed
1638 1639 1640 1641 1642

                strcpy(trx->repl_wait_binlog_name, log_file_name);

                trx->repl_wait_binlog_pos = (ib_longlong)end_offset;
        }
unknown's avatar
unknown committed
1643
#endif /* HAVE_REPLICATION */
unknown's avatar
unknown committed
1644 1645
	trx->flush_log_later = TRUE;

unknown's avatar
unknown committed
1646
	innobase_commit(thd, TRUE);
unknown's avatar
unknown committed
1647 1648 1649 1650 1651 1652

	trx->flush_log_later = FALSE;

	return(0);
}

unknown's avatar
unknown committed
1653
#if 0
1654 1655 1656
/***********************************************************************
This function stores the binlog offset and flushes logs. */

unknown's avatar
unknown committed
1657
void
1658 1659 1660
innobase_store_binlog_offset_and_flush_log(
/*=======================================*/
    char *binlog_name,          /* in: binlog name */
unknown's avatar
unknown committed
1661
    longlong	offset)		/* in: binlog offset */
1662 1663
{
	mtr_t mtr;
unknown's avatar
unknown committed
1664

1665 1666 1667
	assert(binlog_name != NULL);

	/* Start a mini-transaction */
unknown's avatar
unknown committed
1668
        mtr_start_noninline(&mtr);
1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679

	/* Update the latest MySQL binlog name and offset info
        in trx sys header */

        trx_sys_update_mysql_binlog_offset(
            binlog_name,
            offset,
            TRX_SYS_MYSQL_LOG_INFO, &mtr);

        /* Commits the mini-transaction */
        mtr_commit(&mtr);
unknown's avatar
unknown committed
1680

1681 1682 1683 1684 1685
	/* Syncronous flush of the log buffer to disk */
	log_buffer_flush_to_disk();
}
#endif

unknown's avatar
unknown committed
1686 1687 1688 1689 1690 1691 1692 1693
/*********************************************************************
This is called after MySQL has written the binlog entry for the current
transaction. Flushes the InnoDB log files to disk if required. */

int
innobase_commit_complete(
/*=====================*/
                                /* out: 0 */
1694
        THD*    thd)            /* in: user thread */
unknown's avatar
unknown committed
1695 1696 1697
{
	trx_t*	trx;

1698
        trx = (trx_t*) thd->ha_data[innobase_hton.slot];
unknown's avatar
unknown committed
1699

1700
        if (trx && trx->active_trans) {
unknown's avatar
unknown committed
1701

unknown's avatar
unknown committed
1702
                trx->active_trans = 0;
unknown's avatar
unknown committed
1703

unknown's avatar
unknown committed
1704
                if (srv_flush_log_at_trx_commit == 0) {
unknown's avatar
unknown committed
1705

unknown's avatar
unknown committed
1706 1707
                        return(0);
                }
unknown's avatar
unknown committed
1708

unknown's avatar
unknown committed
1709
                trx_commit_complete_for_mysql(trx);
1710
        }
unknown's avatar
unknown committed
1711

unknown's avatar
unknown committed
1712
#ifdef HAVE_REPLICATION
unknown's avatar
unknown committed
1713 1714 1715 1716
        if (thd->variables.sync_replication
            && trx->repl_wait_binlog_name
            && innobase_repl_state != 0) {

unknown's avatar
unknown committed
1717 1718 1719 1720
		struct timespec abstime;
		int	cmp;
		int	ret;

unknown's avatar
unknown committed
1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807
                /* In synchronous replication, let us wait until the MySQL
                replication has sent the relevant binlog segment to the
                replication slave. */

                pthread_mutex_lock(&innobase_repl_cond_mutex);
try_again:
                if (innobase_repl_state == 0) {

                        pthread_mutex_unlock(&innobase_repl_cond_mutex);

                        return(0);
                }

                cmp = strcmp(innobase_repl_file_name,
                                        trx->repl_wait_binlog_name);
                if (cmp > 0
                    || (cmp == 0 && innobase_repl_pos
                                    >= (my_off_t)trx->repl_wait_binlog_pos)) {
                        /* We have already sent the relevant binlog to the
                        slave: no need to wait here */

                        pthread_mutex_unlock(&innobase_repl_cond_mutex);

/*                      printf("Binlog now sent\n"); */

                        return(0);
                }

                /* Let us update the info about the minimum binlog position
                of waiting threads in the innobase_repl_... variables */

                if (innobase_repl_wait_file_name_inited != 0) {
                        cmp = strcmp(trx->repl_wait_binlog_name,
                                        innobase_repl_wait_file_name);
                        if (cmp < 0
                            || (cmp == 0 && (my_off_t)trx->repl_wait_binlog_pos
                                         <= innobase_repl_wait_pos)) {
                                /* This thd has an even lower position, let
                                us update the minimum info */

                                strcpy(innobase_repl_wait_file_name,
                                        trx->repl_wait_binlog_name);

                                innobase_repl_wait_pos =
                                        trx->repl_wait_binlog_pos;
                        }
                } else {
                        strcpy(innobase_repl_wait_file_name,
                                                trx->repl_wait_binlog_name);

                        innobase_repl_wait_pos = trx->repl_wait_binlog_pos;

                        innobase_repl_wait_file_name_inited = 1;
                }
                set_timespec(abstime, thd->variables.sync_replication_timeout);

                /* Let us suspend this thread to wait on the condition;
                when replication has progressed far enough, we will release
                these waiting threads. The following call
                pthread_cond_timedwait also atomically unlocks
                innobase_repl_cond_mutex. */

                innobase_repl_n_wait_threads++;

/*              printf("Waiting for binlog to be sent\n"); */

                ret = pthread_cond_timedwait(&innobase_repl_cond,
                                        &innobase_repl_cond_mutex, &abstime);
                innobase_repl_n_wait_threads--;

                if (ret != 0) {
                        ut_print_timestamp(stderr);

                        fprintf(stderr,
"  InnoDB: Error: MySQL synchronous replication\n"
"InnoDB: was not able to send the binlog to the slave within the\n"
"InnoDB: timeout %lu. We assume that the slave has become inaccessible,\n"
"InnoDB: and switch off synchronous replication until the communication.\n"
"InnoDB: to the slave works again.\n",
				thd->variables.sync_replication_timeout);
                        fprintf(stderr,
"InnoDB: MySQL synchronous replication has sent binlog\n"
"InnoDB: to the slave up to file %s, position %lu\n", innobase_repl_file_name,
                                        (ulong)innobase_repl_pos);
                        fprintf(stderr,
"InnoDB: This transaction needs it to be sent up to\n"
"InnoDB: file %s, position %lu\n", trx->repl_wait_binlog_name,
1808
                                        (ulong)trx->repl_wait_binlog_pos);
unknown's avatar
unknown committed
1809 1810 1811 1812 1813 1814 1815 1816 1817 1818

                        innobase_repl_state = 0;

                        pthread_mutex_unlock(&innobase_repl_cond_mutex);

                        return(0);
                }

                goto try_again;
        }
unknown's avatar
unknown committed
1819
#endif // HAVE_REPLICATION
unknown's avatar
unknown committed
1820
	return(0);
1821 1822
}

unknown's avatar
unknown committed
1823
#ifdef HAVE_REPLICATION
1824
/*********************************************************************
unknown's avatar
unknown committed
1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917
In synchronous replication, reports to InnoDB up to which binlog position
we have sent the binlog to the slave. Note that replication is synchronous
for one slave only. For other slaves, we do nothing in this function. This
function is used in a replication master. */

int
innobase_repl_report_sent_binlog(
/*=============================*/
                                /* out: 0 */
        THD*    thd,            /* in: thread doing the binlog communication to
                                the slave */
        char*   log_file_name,  /* in: binlog file name */
        my_off_t end_offset)    /* in: the offset in the binlog file up to
                                which we sent the contents to the slave */
{
        int     cmp;
        ibool   can_release_threads     = 0;

        /* If synchronous replication is not switched on, or this thd is
        sending binlog to a slave where we do not need synchronous replication,
        then return immediately */

        if (thd->server_id != thd->variables.sync_replication_slave_id) {

                /* Do nothing */

                return(0);
        }

        pthread_mutex_lock(&innobase_repl_cond_mutex);

        if (innobase_repl_state == 0) {

                ut_print_timestamp(stderr);
                fprintf(stderr,
"  InnoDB: Switching MySQL synchronous replication on again at\n"
"InnoDB: binlog file %s, position %lu\n", log_file_name, (ulong)end_offset);

                innobase_repl_state = 1;
        }

        /* The position should increase monotonically, since just one thread
        is sending the binlog to the slave for which we want synchronous
        replication. Let us check this, and print an error to the .err log
        if that is not the case. */

        if (innobase_repl_file_name_inited) {
                cmp = strcmp(log_file_name, innobase_repl_file_name);

                if (cmp < 0
                    || (cmp == 0 && end_offset < innobase_repl_pos)) {

                        ut_print_timestamp(stderr);
                        fprintf(stderr,
"  InnoDB: Error: MySQL synchronous replication has sent binlog\n"
"InnoDB: to the slave up to file %s, position %lu\n", innobase_repl_file_name,
                                        (ulong)innobase_repl_pos);
                        fprintf(stderr,
"InnoDB: but now MySQL reports that it sent the binlog only up to\n"
"InnoDB: file %s, position %lu\n", log_file_name, (ulong)end_offset);

                }
        }

        strcpy(innobase_repl_file_name, log_file_name);
        innobase_repl_pos = end_offset;
        innobase_repl_file_name_inited = 1;

        if (innobase_repl_n_wait_threads > 0) {
                /* Let us check if some of the waiting threads doing a trx
                commit can now proceed */

                cmp = strcmp(innobase_repl_file_name,
                                        innobase_repl_wait_file_name);
                if (cmp > 0
                    || (cmp == 0 && innobase_repl_pos
                                    >= innobase_repl_wait_pos)) {

                        /* Yes, at least one waiting thread can now proceed:
                        let us release all waiting threads with a broadcast */

                        can_release_threads = 1;

                        innobase_repl_wait_file_name_inited = 0;
                }
        }

        pthread_mutex_unlock(&innobase_repl_cond_mutex);

        if (can_release_threads) {

                pthread_cond_broadcast(&innobase_repl_cond);
        }
unknown's avatar
unknown committed
1918 1919

	return(0);
unknown's avatar
unknown committed
1920
}
unknown's avatar
unknown committed
1921
#endif /* HAVE_REPLICATION */
unknown's avatar
unknown committed
1922 1923

/*********************************************************************
unknown's avatar
unknown committed
1924
Rolls back a transaction or the latest SQL statement. */
1925

1926
static int
1927 1928 1929
innobase_rollback(
/*==============*/
			/* out: 0 or error number */
unknown's avatar
unknown committed
1930
	THD*	thd,	/* in: handle to the MySQL thread of the user
1931
			whose transaction should be rolled back */
1932 1933
        bool    all)    /* in: TRUE - commit transaction
                               FALSE - the current SQL statement ended */
1934 1935
{
	int	error = 0;
1936
	trx_t*	trx;
1937

1938 1939 1940
	DBUG_ENTER("innobase_rollback");
	DBUG_PRINT("trans", ("aborting transaction"));

1941
	trx = check_trx_exists(thd);
1942

1943 1944 1945
	/* Update the info whether we should skip XA steps that eat CPU time */
	trx->support_xa = (ibool)(thd->variables.innodb_support_xa);

unknown's avatar
unknown committed
1946 1947 1948 1949 1950 1951
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

unknown's avatar
unknown committed
1952
        if (trx->auto_inc_lock) {
unknown's avatar
unknown committed
1953 1954 1955 1956
		/* If we had reserved the auto-inc lock for some table (if
		we come here to roll back the latest SQL statement) we
		release it now before a possibly lengthy rollback */
		
unknown's avatar
unknown committed
1957 1958 1959
		row_unlock_table_autoinc_for_mysql(trx);
	}

unknown's avatar
unknown committed
1960 1961
        if (all
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
unknown's avatar
Merge  
unknown committed
1962

1963
		error = trx_rollback_for_mysql(trx);
1964
                trx->active_trans = 0;
unknown's avatar
unknown committed
1965
	} else {
1966
		error = trx_rollback_last_sql_stat_for_mysql(trx);
unknown's avatar
unknown committed
1967
	}
1968

unknown's avatar
unknown committed
1969 1970 1971
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004
/*********************************************************************
Rolls back a transaction */

int
innobase_rollback_trx(
/*==================*/
			/* out: 0 or error number */
	trx_t*	trx)	/*  in: transaction */
{
	int	error = 0;

	DBUG_ENTER("innobase_rollback_trx");
	DBUG_PRINT("trans", ("aborting transaction"));

	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

        if (trx->auto_inc_lock) {
		/* If we had reserved the auto-inc lock for some table (if
		we come here to roll back the latest SQL statement) we
		release it now before a possibly lengthy rollback */
		
		row_unlock_table_autoinc_for_mysql(trx);
	}

	error = trx_rollback_for_mysql(trx);

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

unknown's avatar
unknown committed
2005 2006 2007
/*********************************************************************
Rolls back a transaction to a savepoint. */

2008
static int
unknown's avatar
unknown committed
2009 2010 2011 2012 2013 2014
innobase_rollback_to_savepoint(
/*===========================*/
				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
				no savepoint with the given name */
	THD*	thd,		/* in: handle to the MySQL thread of the user
				whose transaction should be rolled back */
2015
        void *savepoint)        /* in: savepoint data */
unknown's avatar
unknown committed
2016 2017 2018 2019
{
	ib_longlong mysql_binlog_cache_pos;
	int	    error = 0;
	trx_t*	    trx;
unknown's avatar
unknown committed
2020
        char 	    name[64];
unknown's avatar
unknown committed
2021 2022 2023 2024 2025

	DBUG_ENTER("innobase_rollback_to_savepoint");

	trx = check_trx_exists(thd);

unknown's avatar
unknown committed
2026 2027 2028
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */
unknown's avatar
unknown committed
2029 2030

	innobase_release_stat_resources(trx);
unknown's avatar
unknown committed
2031

2032
        /* TODO: use provided savepoint data area to store savepoint data */
unknown's avatar
unknown committed
2033 2034

        longlong2str((ulonglong)savepoint, name, 36);
2035

unknown's avatar
unknown committed
2036
        error = (int) trx_rollback_to_savepoint_for_mysql(trx, name,
unknown's avatar
unknown committed
2037
						&mysql_binlog_cache_pos);
unknown's avatar
unknown committed
2038
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
2039 2040
}

unknown's avatar
unknown committed
2041 2042
/*********************************************************************
Release transaction savepoint name. */
unknown's avatar
unknown committed
2043 2044
static
int
unknown's avatar
unknown committed
2045
innobase_release_savepoint(
unknown's avatar
unknown committed
2046
/*=======================*/
unknown's avatar
unknown committed
2047 2048 2049 2050
				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
				no savepoint with the given name */
	THD*	thd,		/* in: handle to the MySQL thread of the user
				whose transaction should be rolled back */
unknown's avatar
unknown committed
2051
        void*	savepoint)      /* in: savepoint data */
unknown's avatar
unknown committed
2052 2053 2054
{
	int	    error = 0;
	trx_t*	    trx;
unknown's avatar
unknown committed
2055
        char 	    name[64];
unknown's avatar
unknown committed
2056

unknown's avatar
unknown committed
2057
	DBUG_ENTER("innobase_release_savepoint");
unknown's avatar
unknown committed
2058 2059 2060

	trx = check_trx_exists(thd);

unknown's avatar
unknown committed
2061
        /* TODO: use provided savepoint data area to store savepoint data */
unknown's avatar
unknown committed
2062 2063

        longlong2str((ulonglong)savepoint, name, 36);
2064

unknown's avatar
unknown committed
2065
	error = (int) trx_release_savepoint_for_mysql(trx, name);
unknown's avatar
unknown committed
2066 2067 2068 2069

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

2070
/*********************************************************************
unknown's avatar
unknown committed
2071
Sets a transaction savepoint. */
unknown's avatar
unknown committed
2072 2073
static
int
unknown's avatar
unknown committed
2074 2075 2076 2077
innobase_savepoint(
/*===============*/
				/* out: always 0, that is, always succeeds */
	THD*	thd,		/* in: handle to the MySQL thread */
unknown's avatar
unknown committed
2078
        void*	savepoint)      /* in: savepoint data */
unknown's avatar
unknown committed
2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092
{
	int	error = 0;
	trx_t*	trx;

	DBUG_ENTER("innobase_savepoint");

	if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
		/* In the autocommit state there is no sense to set a
		savepoint: we return immediate success */
	        DBUG_RETURN(0);
	}

	trx = check_trx_exists(thd);

unknown's avatar
unknown committed
2093 2094 2095 2096 2097 2098
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

2099 2100
        /* cannot happen outside of transaction */
        DBUG_ASSERT(trx->active_trans);
unknown's avatar
unknown committed
2101

2102
        /* TODO: use provided savepoint data area to store savepoint data */
2103
        char name[64];
unknown's avatar
unknown committed
2104
        longlong2str((ulonglong)savepoint,name,36);
2105

unknown's avatar
unknown committed
2106
        error = (int) trx_savepoint_for_mysql(trx, name, (ib_longlong)0);
unknown's avatar
unknown committed
2107 2108 2109 2110

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

2111
/*********************************************************************
unknown's avatar
unknown committed
2112
Frees a possible InnoDB trx object associated with the current THD. */
2113

2114
static int
2115 2116
innobase_close_connection(
/*======================*/
unknown's avatar
unknown committed
2117 2118
			/* out: 0 or error number */
	THD*	thd)	/* in: handle to the MySQL thread of the user
2119
			whose resources should be free'd */
2120
{
2121
        trx_free_for_mysql((trx_t*)thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
2122
	return(0);
2123
}
2124 2125 2126


/*****************************************************************************
2127
** InnoDB database tables
2128 2129
*****************************************************************************/

2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149
/********************************************************************
Get the record format from the data dictionary. */
enum row_type
ha_innobase::get_row_type() const
/*=============================*/
			/* out: ROW_TYPE_REDUNDANT or ROW_TYPE_COMPACT */
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;

	if (prebuilt && prebuilt->table) {
		if (prebuilt->table->comp) {
			return(ROW_TYPE_COMPACT);
		} else {
			return(ROW_TYPE_REDUNDANT);
		}
	}
	ut_ad(0);
	return(ROW_TYPE_NOT_USED);
}

2150
/********************************************************************
unknown's avatar
unknown committed
2151
Gives the file extension of an InnoDB single-table tablespace. */
unknown's avatar
unknown committed
2152 2153 2154 2155
static const char* ha_innobase_exts[] = {
  ".ibd",
  NullS
};
2156 2157 2158 2159

const char**
ha_innobase::bas_ext() const
/*========================*/
unknown's avatar
unknown committed
2160
				/* out: file extension string */
2161
{
unknown's avatar
unknown committed
2162
  return ha_innobase_exts;
2163 2164
}

unknown's avatar
unknown committed
2165

2166 2167 2168
/*********************************************************************
Normalizes a table name string. A normalized name consists of the
database name catenated to '/' and table name. An example:
unknown's avatar
unknown committed
2169 2170
test/mytable. On Windows normalization puts both the database name and the
table name always to lower case. */
2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184
static
void
normalize_table_name(
/*=================*/
	char*		norm_name,	/* out: normalized name as a
					null-terminated string */
	const char*	name)		/* in: table name string */
{
	char*	name_ptr;
	char*	db_ptr;
	char*	ptr;

	/* Scan name from the end */

unknown's avatar
unknown committed
2185
	ptr = strend(name)-1;
2186 2187 2188 2189 2190 2191 2192

	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	name_ptr = ptr + 1;

unknown's avatar
unknown committed
2193
	DBUG_ASSERT(ptr > name);
2194 2195

	ptr--;
2196

2197 2198 2199 2200 2201 2202 2203 2204 2205
	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	db_ptr = ptr + 1;

	memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name));

	norm_name[name_ptr - db_ptr - 1] = '/';
unknown's avatar
unknown committed
2206 2207

#ifdef __WIN__
unknown's avatar
unknown committed
2208
	innobase_casedn_str(norm_name);
unknown's avatar
unknown committed
2209
#endif
2210
}
2211

2212
/*********************************************************************
unknown's avatar
unknown committed
2213
Creates and opens a handle to a table which already exists in an InnoDB
2214 2215 2216 2217 2218 2219 2220 2221
database. */

int
ha_innobase::open(
/*==============*/
					/* out: 1 if error, 0 if success */
	const char*	name,		/* in: table name */
	int 		mode,		/* in: not used */
unknown's avatar
unknown committed
2222
	uint 		test_if_locked)	/* in: not used */
2223
{
2224 2225
	dict_table_t*	ib_table;
  	char		norm_name[1000];
2226
	THD*		thd;
2227 2228 2229 2230 2231 2232

	DBUG_ENTER("ha_innobase::open");

	UT_NOT_USED(mode);
	UT_NOT_USED(test_if_locked);

2233
	thd = current_thd;
2234 2235
	normalize_table_name(norm_name, name);

2236 2237
	user_thd = NULL;

unknown's avatar
unknown committed
2238 2239
	last_query_id = (ulong)-1;

unknown's avatar
unknown committed
2240 2241 2242 2243
	if (!(share=get_share(name))) {

		DBUG_RETURN(1);
	}
2244

2245 2246 2247 2248
	/* Create buffers for packing the fields of a record. Why
	table->reclength did not work here? Obviously, because char
	fields when packed actually became 1 byte longer, when we also
	stored the string length as the first byte. */
2249

unknown's avatar
unknown committed
2250 2251
	upd_and_key_val_buff_len =
				table->s->reclength + table->s->max_key_length
2252
							+ MAX_REF_PARTS * 3;
2253
	if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME),
unknown's avatar
unknown committed
2254 2255
				     &upd_buff, upd_and_key_val_buff_len,
				     &key_val_buff, upd_and_key_val_buff_len,
2256
				     NullS)) {
2257
	  	free_share(share);
unknown's avatar
unknown committed
2258

2259
	  	DBUG_RETURN(1);
2260 2261
  	}

2262
	/* Get pointer to a table object in InnoDB dictionary cache */
2263

2264 2265 2266
	ib_table = dict_table_get_and_increment_handle_count(
				      		     norm_name, NULL);
 	if (NULL == ib_table) {
unknown's avatar
unknown committed
2267 2268 2269 2270 2271 2272 2273 2274 2275
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"Cannot find table %s from the internal data dictionary\n"
"of InnoDB though the .frm file for the table exists. Maybe you\n"
"have deleted and recreated InnoDB data files but have forgotten\n"
"to delete the corresponding .frm files of InnoDB tables, or you\n"
"have moved .frm files to another database?\n"
"Look from section 15.1 of http://www.innodb.com/ibman.html\n"
"how you can resolve the problem.\n",
2276
			  norm_name);
unknown's avatar
unknown committed
2277 2278 2279
	        free_share(share);
    		my_free((char*) upd_buff, MYF(0));
    		my_errno = ENOENT;
2280

2281
    		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
unknown's avatar
unknown committed
2282 2283
  	}

2284
 	if (ib_table->ibd_file_missing && !thd->tablespace_op) {
unknown's avatar
unknown committed
2285 2286 2287 2288 2289 2290 2291 2292 2293
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"MySQL is trying to open a table handle but the .ibd file for\n"
"table %s does not exist.\n"
"Have you deleted the .ibd file from the database directory under\n"
"the MySQL datadir, or have you used DISCARD TABLESPACE?\n"
"Look from section 15.1 of http://www.innodb.com/ibman.html\n"
"how you can resolve the problem.\n",
			  norm_name);
2294
	        free_share(share);
2295
    		my_free((char*) upd_buff, MYF(0));
2296
    		my_errno = ENOENT;
unknown's avatar
unknown committed
2297

2298
    		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
2299 2300
  	}

2301
	innobase_prebuilt = row_create_prebuilt(ib_table);
2302

unknown's avatar
unknown committed
2303 2304
	((row_prebuilt_t*)innobase_prebuilt)->mysql_row_len =
							table->s->reclength;
2305

unknown's avatar
unknown committed
2306 2307
	/* Looks like MySQL-3.23 sometimes has primary key number != 0 */

2308
 	primary_key = table->s->primary_key;
unknown's avatar
unknown committed
2309
	key_used_on_scan = primary_key;
2310

unknown's avatar
unknown committed
2311 2312 2313 2314 2315
	/* Allocate a buffer for a 'row reference'. A row reference is
	a string of bytes of length ref_length which uniquely specifies
        a row in our table. Note that MySQL may also compare two row
        references for equality by doing a simple memcmp on the strings
        of length ref_length! */
2316

unknown's avatar
unknown committed
2317
  	if (!row_table_got_default_clust_index(ib_table)) {
unknown's avatar
unknown committed
2318 2319 2320 2321 2322
	        if (primary_key >= MAX_KEY) {
	                fprintf(stderr,
		    "InnoDB: Error: table %s has a primary key in InnoDB\n"
		    "InnoDB: data dictionary, but not in MySQL!\n", name);
		}
2323 2324 2325

		((row_prebuilt_t*)innobase_prebuilt)
				->clust_index_was_generated = FALSE;
unknown's avatar
unknown committed
2326 2327 2328 2329 2330
 		/* MySQL allocates the buffer for ref. key_info->key_length
		includes space for all key columns + one byte for each column
		that may be NULL. ref_length must be as exact as possible to
		save space, because all row reference buffers are allocated
		based on ref_length. */
unknown's avatar
unknown committed
2331
 
unknown's avatar
unknown committed
2332
  		ref_length = table->key_info[primary_key].key_length;
2333
	} else {
unknown's avatar
unknown committed
2334 2335 2336
	        if (primary_key != MAX_KEY) {
	                fprintf(stderr,
		    "InnoDB: Error: table %s has no primary key in InnoDB\n"
unknown's avatar
unknown committed
2337 2338 2339 2340 2341 2342 2343 2344
		    "InnoDB: data dictionary, but has one in MySQL!\n"
		    "InnoDB: If you created the table with a MySQL\n"
                    "InnoDB: version < 3.23.54 and did not define a primary\n"
                    "InnoDB: key, but defined a unique key with all non-NULL\n"
                    "InnoDB: columns, then MySQL internally treats that key\n"
                    "InnoDB: as the primary key. You can fix this error by\n"
		    "InnoDB: dump + DROP + CREATE + reimport of the table.\n",
				name);
unknown's avatar
unknown committed
2345 2346
		}

2347 2348 2349
		((row_prebuilt_t*)innobase_prebuilt)
				->clust_index_was_generated = TRUE;

unknown's avatar
unknown committed
2350
  		ref_length = DATA_ROW_ID_LEN;
unknown's avatar
unknown committed
2351

unknown's avatar
unknown committed
2352 2353 2354 2355 2356 2357 2358
		/* If we automatically created the clustered index, then
		MySQL does not know about it, and MySQL must NOT be aware
		of the index used on scan, to make it avoid checking if we
		update the column of the index. That is why we assert below
		that key_used_on_scan is the undefined value MAX_KEY.
		The column is the row id in the automatical generation case,
		and it will never be updated anyway. */
unknown's avatar
unknown committed
2359 2360 2361 2362 2363
	       
		if (key_used_on_scan != MAX_KEY) {
	                fprintf(stderr,
"InnoDB: Warning: table %s key_used_on_scan is %lu even though there is no\n"
"InnoDB: primary key inside InnoDB.\n",
2364
				name, (ulong)key_used_on_scan);
unknown's avatar
unknown committed
2365
		}
2366
	}
2367

unknown's avatar
unknown committed
2368 2369 2370
	block_size = 16 * 1024;	/* Index block size in InnoDB: used by MySQL
				in query optimization */

unknown's avatar
Merge  
unknown committed
2371
	/* Init table lock structure */
2372
	thr_lock_data_init(&share->lock,&lock,(void*) 0);
2373 2374

  	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
2375

2376 2377 2378 2379
  	DBUG_RETURN(0);
}

/**********************************************************************
2380
Closes a handle to an InnoDB table. */
2381 2382 2383 2384 2385 2386 2387 2388 2389 2390

int
ha_innobase::close(void)
/*====================*/
				/* out: error number */
{
  	DBUG_ENTER("ha_innobase::close");

	row_prebuilt_free((row_prebuilt_t*) innobase_prebuilt);

2391
    	my_free((char*) upd_buff, MYF(0));
2392 2393
        free_share(share);

2394
	/* Tell InnoDB server that there might be work for
2395 2396 2397 2398 2399 2400 2401
	utility threads: */

	srv_active_wake_master_thread();

  	DBUG_RETURN(0);
}

unknown's avatar
unknown committed
2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465
/* The following accessor functions should really be inside MySQL code! */

/******************************************************************
Gets field offset for a field in a table. */
inline
uint
get_field_offset(
/*=============*/
			/* out: offset */
	TABLE*	table,	/* in: MySQL table object */
	Field*	field)	/* in: MySQL field object */
{
	return((uint) (field->ptr - (char*) table->record[0]));
}

/******************************************************************
Checks if a field in a record is SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
uint
field_in_record_is_null(
/*====================*/
			/* out: 1 if NULL, 0 otherwise */
	TABLE*	table,	/* in: MySQL table object */
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	if (!field->null_ptr) {

		return(0);
	}

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	if (record[null_offset] & field->null_bit) {

		return(1);
	}

	return(0);
}

/******************************************************************
Sets a field in a record to SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
void
set_field_in_record_to_null(
/*========================*/
	TABLE*	table,	/* in: MySQL table object */
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	record[null_offset] = record[null_offset] | field->null_bit;
}

2466 2467
extern "C" {
/*****************************************************************
unknown's avatar
unknown committed
2468 2469 2470 2471
InnoDB uses this function to compare two data fields for which the data type
is such that we must use MySQL code to compare them. NOTE that the prototype
of this function is in rem0cmp.c in InnoDB source code! If you change this
function, remember to update the prototype there! */
2472 2473 2474

int
innobase_mysql_cmp(
2475
/*===============*/
2476 2477
					/* out: 1, 0, -1, if a is greater,
					equal, less than b, respectively */
2478
	int		mysql_type,	/* in: MySQL type */
unknown's avatar
unknown committed
2479
	uint		charset_number,	/* in: number of the charset */
2480 2481 2482 2483 2484 2485 2486
	unsigned char*	a,		/* in: data field */
	unsigned int	a_length,	/* in: data field length,
					not UNIV_SQL_NULL */
	unsigned char*	b,		/* in: data field */
	unsigned int	b_length)	/* in: data field length,
					not UNIV_SQL_NULL */
{
unknown's avatar
unknown committed
2487
	CHARSET_INFO*		charset;
2488
	enum_field_types	mysql_tp;
2489
	int                     ret;
2490

unknown's avatar
unknown committed
2491 2492
	DBUG_ASSERT(a_length != UNIV_SQL_NULL);
	DBUG_ASSERT(b_length != UNIV_SQL_NULL);
2493 2494 2495 2496 2497

	mysql_tp = (enum_field_types) mysql_type;

	switch (mysql_tp) {

2498
        case MYSQL_TYPE_BIT:
2499
	case MYSQL_TYPE_STRING:
2500
	case MYSQL_TYPE_VAR_STRING:
unknown's avatar
unknown committed
2501 2502 2503 2504
	case FIELD_TYPE_TINY_BLOB:
	case FIELD_TYPE_MEDIUM_BLOB:
	case FIELD_TYPE_BLOB:
	case FIELD_TYPE_LONG_BLOB:
2505
        case MYSQL_TYPE_VARCHAR:
unknown's avatar
unknown committed
2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525
		/* Use the charset number to pick the right charset struct for
		the comparison. Since the MySQL function get_charset may be
		slow before Bar removes the mutex operation there, we first
		look at 2 common charsets directly. */

		if (charset_number == default_charset_info->number) {
			charset = default_charset_info;
		} else if (charset_number == my_charset_latin1.number) {
			charset = &my_charset_latin1;
		} else {
			charset = get_charset(charset_number, MYF(MY_WME));

			if (charset == NULL) {
				fprintf(stderr,
"InnoDB: fatal error: InnoDB needs charset %lu for doing a comparison,\n"
"InnoDB: but MySQL cannot find that charset.\n", (ulong)charset_number);
				ut_a(0);
			}
		}

unknown's avatar
unknown committed
2526 2527 2528 2529 2530 2531
                /* Starting from 4.1.3, we use strnncollsp() in comparisons of
                non-latin1_swedish_ci strings. NOTE that the collation order
                changes then: 'b\0\0...' is ordered BEFORE 'b  ...'. Users
                having indexes on such data need to rebuild their tables! */

                ret = charset->coll->strnncollsp(charset,
unknown's avatar
unknown committed
2532
                                  a, a_length,
2533
                                                 b, b_length, 0);
2534
		if (ret < 0) {
2535
		        return(-1);
2536
		} else if (ret > 0) {
2537
		        return(1);
2538
		} else {
2539
		        return(0);
2540
	        }
2541 2542 2543 2544 2545 2546 2547 2548 2549
	default:
		assert(0);
	}

	return(0);
}
}

/******************************************************************
unknown's avatar
unknown committed
2550 2551 2552
Converts a MySQL type to an InnoDB type. Note that this function returns
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. */
2553 2554
inline
ulint
2555 2556
get_innobase_type_from_mysql_type(
/*==============================*/
unknown's avatar
unknown committed
2557 2558 2559 2560 2561
				/* out: DATA_BINARY, DATA_VARCHAR, ... */
	ulint*	unsigned_flag,	/* out: DATA_UNSIGNED if an 'unsigned type';
				at least ENUM and SET, and unsigned integer
				types are 'unsigned types' */
	Field*	field)		/* in: MySQL field */
2562
{
unknown's avatar
unknown committed
2563 2564 2565
	/* The following asserts try to check that the MySQL type code fits in
	8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
	the type */
2566

unknown's avatar
unknown committed
2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593
	DBUG_ASSERT((ulint)FIELD_TYPE_STRING < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_VAR_STRING < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_DOUBLE < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_FLOAT < 256);
	DBUG_ASSERT((ulint)FIELD_TYPE_DECIMAL < 256);

	if (field->flags & UNSIGNED_FLAG) {

		*unsigned_flag = DATA_UNSIGNED;
	} else {
		*unsigned_flag = 0;
	}

	if (field->real_type() == FIELD_TYPE_ENUM
	    || field->real_type() == FIELD_TYPE_SET) {

		/* MySQL has field->type() a string type for these, but the
		data is actually internally stored as an unsigned integer
		code! */

		*unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
						flag set to zero, even though
						internally this is an unsigned
						integer type */
		return(DATA_INT);
	}

2594
	switch (field->type()) {
unknown's avatar
unknown committed
2595 2596
	        /* NOTE that we only allow string types in DATA_MYSQL
		and DATA_VARMYSQL */
unknown's avatar
unknown committed
2597 2598 2599
                case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
                case MYSQL_TYPE_VARCHAR:    /* new >= 5.0.3 true VARCHAR */
					if (field->binary()) {
2600 2601
						return(DATA_BINARY);
					} else if (strcmp(
unknown's avatar
unknown committed
2602 2603
						  field->charset()->name,
						 "latin1_swedish_ci") == 0) {
2604
						return(DATA_VARCHAR);
2605 2606
					} else {
						return(DATA_VARMYSQL);
2607
					}
2608
                case MYSQL_TYPE_BIT:
2609
		case MYSQL_TYPE_STRING: if (field->binary()) {
2610 2611 2612

						return(DATA_FIXBINARY);
					} else if (strcmp(
unknown's avatar
unknown committed
2613 2614
						   field->charset()->name,
						   "latin1_swedish_ci") == 0) {
2615
						return(DATA_CHAR);
2616 2617
					} else {
						return(DATA_MYSQL);
2618
					}
unknown's avatar
unknown committed
2619
                case FIELD_TYPE_NEWDECIMAL:
unknown's avatar
unknown committed
2620
                                        return(DATA_FIXBINARY);
2621 2622 2623 2624 2625 2626 2627 2628 2629
		case FIELD_TYPE_LONG:
		case FIELD_TYPE_LONGLONG:
		case FIELD_TYPE_TINY:
		case FIELD_TYPE_SHORT:
		case FIELD_TYPE_INT24:
		case FIELD_TYPE_DATE:
		case FIELD_TYPE_DATETIME:
		case FIELD_TYPE_YEAR:
		case FIELD_TYPE_NEWDATE:
2630 2631 2632
		case FIELD_TYPE_TIME:
		case FIELD_TYPE_TIMESTAMP:
					return(DATA_INT);
2633
		case FIELD_TYPE_FLOAT:
2634
					return(DATA_FLOAT);
2635
		case FIELD_TYPE_DOUBLE:
2636
					return(DATA_DOUBLE);
2637
		case FIELD_TYPE_DECIMAL:
2638 2639 2640 2641 2642 2643
					return(DATA_DECIMAL);
		case FIELD_TYPE_TINY_BLOB:
		case FIELD_TYPE_MEDIUM_BLOB:
		case FIELD_TYPE_BLOB:
		case FIELD_TYPE_LONG_BLOB:
					return(DATA_BLOB);
2644 2645 2646 2647 2648 2649
		default:
					assert(0);
	}

	return(0);
}
2650

unknown's avatar
unknown committed
2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676
/***********************************************************************
Writes an unsigned integer value < 64k to 2 bytes, in the little-endian
storage format. */
inline
void
innobase_write_to_2_little_endian(
/*==============================*/
	byte*	buf,	/* in: where to store */
	ulint	val)	/* in: value to write, must be < 64k */
{
	ut_a(val < 256 * 256);

	buf[0] = (byte)(val & 0xFF);
	buf[1] = (byte)(val / 256);
}

/***********************************************************************
Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
storage format. */
inline
uint
innobase_read_from_2_little_endian(
/*===============================*/
			/* out: value */
	const mysql_byte*	buf)	/* in: from where to read */
{
unknown's avatar
unknown committed
2677
	return (uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1])));
unknown's avatar
unknown committed
2678 2679
}

2680
/***********************************************************************
2681
Stores a key value for a row to a buffer. */
2682 2683 2684 2685 2686 2687 2688

uint
ha_innobase::store_key_val_for_row(
/*===============================*/
				/* out: key value length as stored in buff */
	uint 		keynr,	/* in: key number */
	char*		buff,	/* in/out: buffer for the key value (in MySQL
2689 2690
				format) */
	uint		buff_len,/* in: buffer length */
2691
	const mysql_byte* record)/* in: row in MySQL format */
2692 2693 2694 2695 2696
{
	KEY*		key_info 	= table->key_info + keynr;
  	KEY_PART_INFO*	key_part	= key_info->key_part;
  	KEY_PART_INFO*	end		= key_part + key_info->key_parts;
	char*		buff_start	= buff;
unknown's avatar
unknown committed
2697 2698 2699 2700 2701
	enum_field_types mysql_type;
	Field*		field;
	ulint		blob_len;
	byte*		blob_data;
	ibool		is_null;
2702

2703 2704
  	DBUG_ENTER("store_key_val_for_row");

unknown's avatar
unknown committed
2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718
	/* The format for storing a key field in MySQL is the following:

	1. If the column can be NULL, then in the first byte we put 1 if the
	field value is NULL, 0 otherwise.

	2. If the column is of a BLOB type (it must be a column prefix field
	in this case), then we put the length of the data in the field to the
	next 2 bytes, in the little-endian format. If the field is SQL NULL,
	then these 2 bytes are set to 0. Note that the length of data in the
	field is <= column prefix length.

	3. In a column prefix field, prefix_len next bytes are reserved for
	data. In a normal field the max field length next bytes are reserved
	for data. For a VARCHAR(n) the max field length is n. If the stored
unknown's avatar
unknown committed
2719
	value is the SQL NULL then these data bytes are set to 0.
unknown's avatar
unknown committed
2720

unknown's avatar
unknown committed
2721 2722 2723 2724 2725 2726
	4. We always use a 2 byte length for a true >= 5.0.3 VARCHAR. Note that
	in the MySQL row format, the length is stored in 1 or 2 bytes,
	depending on the maximum allowed length. But in the MySQL key value
	format, the length always takes 2 bytes.

	We have to zero-fill the buffer so that MySQL is able to use a
2727 2728
	simple memcmp to compare two key values to determine if they are
	equal. MySQL does this to compare contents of two 'ref' values. */
unknown's avatar
unknown committed
2729

2730
	bzero(buff, buff_len);
unknown's avatar
unknown committed
2731

2732
  	for (; key_part != end; key_part++) {
unknown's avatar
unknown committed
2733
	        is_null = FALSE;
2734 2735 2736 2737

    		if (key_part->null_bit) {
      			if (record[key_part->null_offset]
						& key_part->null_bit) {
unknown's avatar
unknown committed
2738 2739 2740 2741 2742 2743
				*buff = 1;
				is_null = TRUE;
      			} else {
				*buff = 0;
			}
			buff++;
2744
    		}
2745

unknown's avatar
unknown committed
2746 2747 2748
		field = key_part->field;
		mysql_type = field->type();

unknown's avatar
unknown committed
2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767
		if (mysql_type == MYSQL_TYPE_VARCHAR) {
						/* >= 5.0.3 true VARCHAR */
			ulint	lenlen;
			ulint	len;
			byte*	data;

			if (is_null) {
				buff += key_part->length + 2;
				
				continue;
			}

			lenlen = (ulint)
				(((Field_varstring*)field)->length_bytes);

			data = row_mysql_read_true_varchar(&len, 
				(byte*) (record
				+ (ulint)get_field_offset(table, field)),
				lenlen);
unknown's avatar
unknown committed
2768 2769 2770

			/* In a column prefix index, we may need to truncate
			the stored value: */
unknown's avatar
unknown committed
2771
		
unknown's avatar
unknown committed
2772 2773 2774 2775
			if (len > key_part->length) {
			        len = key_part->length;
			}

unknown's avatar
unknown committed
2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792
			/* The length in a key value is always stored in 2
			bytes */

			row_mysql_store_true_var_len((byte*)buff, len, 2);
			buff += 2;

			memcpy(buff, data, len);

			/* Note that we always reserve the maximum possible
			length of the true VARCHAR in the key value, though
			only len first bytes after the 2 length bytes contain
			actual data. The rest of the space was reset to zero
			in the bzero() call above. */

			buff += key_part->length;

		} else if (mysql_type == FIELD_TYPE_TINY_BLOB
unknown's avatar
unknown committed
2793 2794 2795
		    || mysql_type == FIELD_TYPE_MEDIUM_BLOB
		    || mysql_type == FIELD_TYPE_BLOB
		    || mysql_type == FIELD_TYPE_LONG_BLOB) {
2796

2797
			ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
unknown's avatar
unknown committed
2798 2799

		        if (is_null) {
unknown's avatar
unknown committed
2800
				buff += key_part->length + 2;
unknown's avatar
unknown committed
2801
				 
unknown's avatar
unknown committed
2802
				continue;
unknown's avatar
unknown committed
2803 2804 2805 2806
			}
		    
		        blob_data = row_mysql_read_blob_ref(&blob_len,
				(byte*) (record
unknown's avatar
unknown committed
2807
				+ (ulint)get_field_offset(table, field)),
unknown's avatar
unknown committed
2808 2809
					(ulint) field->pack_length());

unknown's avatar
unknown committed
2810 2811
			ut_a(get_field_offset(table, field)
						     == key_part->offset);
unknown's avatar
unknown committed
2812 2813 2814 2815 2816

			/* All indexes on BLOB and TEXT are column prefix
			indexes, and we may need to truncate the data to be
			stored in the kay value: */

unknown's avatar
unknown committed
2817 2818 2819 2820 2821 2822 2823
			if (blob_len > key_part->length) {
			        blob_len = key_part->length;
			}

			/* MySQL reserves 2 bytes for the length and the
			storage of the number is little-endian */

unknown's avatar
unknown committed
2824 2825
			innobase_write_to_2_little_endian(
					(byte*)buff, (ulint)blob_len);
unknown's avatar
unknown committed
2826 2827 2828 2829
			buff += 2;

			memcpy(buff, blob_data, blob_len);

unknown's avatar
unknown committed
2830 2831 2832
			/* Note that we always reserve the maximum possible
			length of the BLOB prefix in the key value. */

unknown's avatar
unknown committed
2833 2834
			buff += key_part->length;
		} else {
unknown's avatar
unknown committed
2835 2836 2837 2838 2839
			/* Here we handle all other data types except the
			true VARCHAR, BLOB and TEXT. Note that the column
			value we store may be also in a column prefix
			index. */

unknown's avatar
unknown committed
2840 2841 2842 2843 2844
		        if (is_null) {
				 buff += key_part->length;
				 
				 continue;
			}
unknown's avatar
unknown committed
2845

unknown's avatar
unknown committed
2846 2847 2848 2849
			memcpy(buff, record + key_part->offset,
							key_part->length);
			buff += key_part->length;
		}
2850 2851
  	}

2852
	ut_a(buff <= buff_start + buff_len);
unknown's avatar
unknown committed
2853 2854

	DBUG_RETURN((uint)(buff - buff_start));
2855 2856 2857
}

/******************************************************************
unknown's avatar
unknown committed
2858 2859
Builds a 'template' to the prebuilt struct. The template is used in fast
retrieval of just those column values MySQL needs in its processing. */
unknown's avatar
unknown committed
2860
static
2861
void
2862 2863 2864 2865 2866 2867 2868 2869 2870
build_template(
/*===========*/
	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
	THD*		thd,		/* in: current user thread, used
					only if templ_type is
					ROW_MYSQL_REC_FIELDS */
	TABLE*		table,		/* in: MySQL table */
	ulint		templ_type)	/* in: ROW_MYSQL_WHOLE_ROW or
					ROW_MYSQL_REC_FIELDS */
2871
{
2872 2873
	dict_index_t*	index;
	dict_index_t*	clust_index;
2874
	mysql_row_templ_t* templ;
2875
	Field*		field;
2876 2877
	ulint		n_fields;
	ulint		n_requested_fields	= 0;
unknown's avatar
Merge  
unknown committed
2878
	ibool		fetch_all_in_key	= FALSE;
2879
	ibool		fetch_primary_key_cols	= FALSE;
2880
	ulint		i;
2881 2882
	/* byte offset of the end of last requested column */
	ulint		mysql_prefix_len	= 0;
2883

unknown's avatar
unknown committed
2884 2885 2886 2887
	if (prebuilt->select_lock_type == LOCK_X) {
		/* We always retrieve the whole clustered index record if we
		use exclusive row level locks, for example, if the read is
		done in an UPDATE statement. */
unknown's avatar
unknown committed
2888

unknown's avatar
unknown committed
2889 2890 2891
	        templ_type = ROW_MYSQL_WHOLE_ROW;
	}

unknown's avatar
unknown committed
2892 2893 2894
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
	     if (prebuilt->hint_need_to_fetch_extra_cols
						== ROW_RETRIEVE_ALL_COLS) {
2895

unknown's avatar
unknown committed
2896 2897
		/* We know we must at least fetch all columns in the key, or
		all columns in the table */
unknown's avatar
unknown committed
2898

unknown's avatar
Merge  
unknown committed
2899
		if (prebuilt->read_just_key) {
unknown's avatar
unknown committed
2900
			/* MySQL has instructed us that it is enough to
2901 2902 2903 2904 2905
			fetch the columns in the key; looks like MySQL
			can set this flag also when there is only a
			prefix of the column in the key: in that case we
			retrieve the whole column from the clustered
			index */
unknown's avatar
unknown committed
2906

unknown's avatar
Merge  
unknown committed
2907 2908 2909 2910
			fetch_all_in_key = TRUE;
		} else {
			templ_type = ROW_MYSQL_WHOLE_ROW;
		}
unknown's avatar
unknown committed
2911 2912
	    } else if (prebuilt->hint_need_to_fetch_extra_cols
						== ROW_RETRIEVE_PRIMARY_KEY) {
unknown's avatar
unknown committed
2913 2914 2915 2916 2917
		/* We must at least fetch all primary key cols. Note that if
		the clustered index was internally generated by InnoDB on the
		row id (no primary key was defined), then
		row_search_for_mysql() will always retrieve the row id to a
		special buffer in the prebuilt struct. */
unknown's avatar
unknown committed
2918 2919 2920

		fetch_primary_key_cols = TRUE;
	    }
2921 2922
	}

unknown's avatar
unknown committed
2923
	clust_index = dict_table_get_first_index_noninline(prebuilt->table);
unknown's avatar
unknown committed
2924

2925
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
unknown's avatar
unknown committed
2926
		index = prebuilt->index;
2927 2928
	} else {
		index = clust_index;
2929
	}
2930

2931 2932 2933 2934 2935 2936 2937
	if (index == clust_index) {
		prebuilt->need_to_access_clustered = TRUE;
	} else {
		prebuilt->need_to_access_clustered = FALSE;
		/* Below we check column by column if we need to access
		the clustered index */
	}
2938

2939
	n_fields = (ulint)table->s->fields; /* number of columns */
2940 2941 2942 2943 2944 2945

	if (!prebuilt->mysql_template) {
		prebuilt->mysql_template = (mysql_row_templ_t*)
						mem_alloc_noninline(
					n_fields * sizeof(mysql_row_templ_t));
	}
2946

2947
	prebuilt->template_type = templ_type;
2948
	prebuilt->null_bitmap_len = table->s->null_bytes;
2949

2950 2951
	prebuilt->templ_contains_blob = FALSE;

unknown's avatar
unknown committed
2952 2953
	/* Note that in InnoDB, i is the column number. MySQL calls columns
	'fields'. */
2954
	for (i = 0; i < n_fields; i++) {
2955
		templ = prebuilt->mysql_template + n_requested_fields;
2956 2957
		field = table->field[i];

2958 2959 2960 2961 2962
		if (UNIV_LIKELY(templ_type == ROW_MYSQL_REC_FIELDS)) {
			/* Decide which columns we should fetch
			and which we can skip. */
			register const ibool	index_contains_field =
				dict_index_contains_col_or_prefix(index, i);
2963

2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989
			if (!index_contains_field && prebuilt->read_just_key) {
				/* If this is a 'key read', we do not need
				columns that are not in the key */

				goto skip_field;
			}

			if (index_contains_field && fetch_all_in_key) {
				/* This field is needed in the query */

				goto include_field;
			}

			if (thd->query_id == field->query_id) {
				/* This field is needed in the query */

				goto include_field;
			}

			if (fetch_primary_key_cols
			    && dict_table_col_in_clustered_key(index->table,
									i)) {
				/* This field is needed in the query */

				goto include_field;
			}
unknown's avatar
unknown committed
2990 2991

			/* This field is not needed in the query, skip it */
2992 2993 2994

			goto skip_field;
		}
2995
include_field:
2996
		n_requested_fields++;
2997

2998
		templ->col_no = i;
2999

3000 3001 3002
		if (index == clust_index) {
			templ->rec_field_no = (index->table->cols + i)
								->clust_pos;
3003
		} else {
3004 3005
			templ->rec_field_no = dict_index_get_nth_col_pos(
								index, i);
3006 3007
		}

3008 3009 3010 3011 3012 3013 3014 3015
		if (templ->rec_field_no == ULINT_UNDEFINED) {
			prebuilt->need_to_access_clustered = TRUE;
		}

		if (field->null_ptr) {
			templ->mysql_null_byte_offset =
				(ulint) ((char*) field->null_ptr
					- (char*) table->record[0]);
3016

3017 3018 3019 3020
			templ->mysql_null_bit_mask = (ulint) field->null_bit;
		} else {
			templ->mysql_null_bit_mask = 0;
		}
3021

unknown's avatar
unknown committed
3022 3023 3024
		templ->mysql_col_offset = (ulint)
					get_field_offset(table, field);

3025
		templ->mysql_col_len = (ulint) field->pack_length();
3026 3027 3028 3029 3030
		if (mysql_prefix_len < templ->mysql_col_offset
				+ templ->mysql_col_len) {
			mysql_prefix_len = templ->mysql_col_offset
				+ templ->mysql_col_len;
		}
unknown's avatar
unknown committed
3031
		templ->type = index->table->cols[i].type.mtype;
unknown's avatar
unknown committed
3032 3033 3034 3035 3036 3037 3038
		templ->mysql_type = (ulint)field->type();

		if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
			templ->mysql_length_bytes = (ulint)
				    (((Field_varstring*)field)->length_bytes);
		}
	
3039 3040
		templ->charset = dtype_get_charset_coll_noninline(
				index->table->cols[i].type.prtype);
3041 3042
		templ->mbminlen = index->table->cols[i].type.mbminlen;
		templ->mbmaxlen = index->table->cols[i].type.mbmaxlen;
unknown's avatar
unknown committed
3043 3044
		templ->is_unsigned = index->table->cols[i].type.prtype
							& DATA_UNSIGNED;
3045 3046
		if (templ->type == DATA_BLOB) {
			prebuilt->templ_contains_blob = TRUE;
3047
		}
3048 3049 3050
skip_field:
		;
	}
3051

3052
	prebuilt->n_template = n_requested_fields;
3053
	prebuilt->mysql_prefix_len = mysql_prefix_len;
3054

unknown's avatar
unknown committed
3055
	if (index != clust_index && prebuilt->need_to_access_clustered) {
3056 3057 3058 3059
		/* Change rec_field_no's to correspond to the clustered index
		record */
		for (i = 0; i < n_requested_fields; i++) {
			templ = prebuilt->mysql_template + i;
3060

3061 3062 3063
			templ->rec_field_no =
			    (index->table->cols + templ->col_no)->clust_pos;
		}
3064
	}
3065 3066 3067
}

/************************************************************************
3068
Stores a row in an InnoDB database, to the table specified in this
3069 3070 3071 3072 3073
handle. */

int
ha_innobase::write_row(
/*===================*/
3074 3075
				/* out: error code */
	mysql_byte* 	record)	/* in: a row in MySQL format */
3076
{
3077
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
3078
  	int 		error;
3079
	longlong	auto_inc;
unknown's avatar
unknown committed
3080
	longlong	dummy;
unknown's avatar
unknown committed
3081
	ibool           auto_inc_used= FALSE;
unknown's avatar
unknown committed
3082

3083
  	DBUG_ENTER("ha_innobase::write_row");
3084

unknown's avatar
unknown committed
3085
	if (prebuilt->trx !=
3086
                        (trx_t*) current_thd->ha_data[innobase_hton.slot]) {
unknown's avatar
unknown committed
3087 3088
		fprintf(stderr,
"InnoDB: Error: the transaction object for the table handle is at\n"
3089 3090
"InnoDB: %p, but for the current thread it is at %p\n",
			prebuilt->trx,
3091
                        (trx_t*) current_thd->ha_data[innobase_hton.slot]);
3092 3093 3094 3095 3096 3097
		fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
		ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200);
		fputs("\n"
			"InnoDB: Dump of 200 bytes around transaction.all: ",
			stderr);
		ut_print_buf(stderr,
unknown's avatar
unknown committed
3098 3099
           	 ((byte*)(&(current_thd->ha_data[innobase_hton.slot]))) - 100,
								200);
3100 3101
		putc('\n', stderr);
		ut_error;
unknown's avatar
unknown committed
3102
	}
unknown's avatar
unknown committed
3103

3104 3105
  	statistic_increment(current_thd->status_var.ha_write_count,
			    &LOCK_status);
3106

3107 3108
        if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
                table->timestamp_field->set_time();
3109

3110
	if ((user_thd->lex->sql_command == SQLCOM_ALTER_TABLE
unknown's avatar
unknown committed
3111
	    || user_thd->lex->sql_command == SQLCOM_OPTIMIZE
3112 3113
	    || user_thd->lex->sql_command == SQLCOM_CREATE_INDEX
	    || user_thd->lex->sql_command == SQLCOM_DROP_INDEX)
unknown's avatar
unknown committed
3114
	    && num_write_row >= 10000) {
3115 3116 3117 3118 3119 3120 3121 3122
		/* ALTER TABLE is COMMITted at every 10000 copied rows.
		The IX table lock for the original table has to be re-issued.
		As this method will be called on a temporary table where the
		contents of the original table is being copied to, it is
		a bit tricky to determine the source table.  The cursor
		position in the source table need not be adjusted after the
		intermediate COMMIT, since writes by other transactions are
		being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */
3123

3124
		dict_table_t*	src_table;
unknown's avatar
unknown committed
3125
		ulint		mode;
3126

3127
		num_write_row = 0;
3128

unknown's avatar
unknown committed
3129 3130
		/* Commit the transaction.  This will release the table
		locks, so they have to be acquired again. */
3131 3132 3133 3134 3135 3136

		/* Altering an InnoDB table */
		/* Get the source table. */
		src_table = lock_get_src_table(
				prebuilt->trx, prebuilt->table, &mode);
		if (!src_table) {
unknown's avatar
unknown committed
3137
no_commit:
3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150
			/* Unknown situation: do not commit */
			/*
			ut_print_timestamp(stderr);
			fprintf(stderr,
				"  InnoDB error: ALTER TABLE is holding lock"
				" on %lu tables!\n",
				prebuilt->trx->mysql_n_tables_locked);
			*/
			;
		} else if (src_table == prebuilt->table) {
			/* Source table is not in InnoDB format:
			no need to re-acquire locks on it. */

3151
			/* Altering to InnoDB format */
unknown's avatar
unknown committed
3152
                        innobase_commit(user_thd, 1);
3153
			/* Note that this transaction is still active. */
3154
			prebuilt->trx->active_trans = 1;
3155 3156
			/* We will need an IX lock on the destination table. */
		        prebuilt->sql_stat_start = TRUE;
3157 3158 3159
		} else {
			/* Ensure that there are no other table locks than
			LOCK_IX and LOCK_AUTO_INC on the destination table. */
unknown's avatar
unknown committed
3160

3161 3162
			if (!lock_is_table_exclusive(prebuilt->table,
							prebuilt->trx)) {
3163 3164 3165 3166 3167
				goto no_commit;
			}

			/* Commit the transaction.  This will release the table
			locks, so they have to be acquired again. */
unknown's avatar
unknown committed
3168
                        innobase_commit(user_thd, 1);
3169
			/* Note that this transaction is still active. */
3170
			prebuilt->trx->active_trans = 1;
3171
			/* Re-acquire the table lock on the source table. */
3172
			row_lock_table_for_mysql(prebuilt, src_table, mode);
3173 3174 3175
			/* We will need an IX lock on the destination table. */
		        prebuilt->sql_stat_start = TRUE;
		}
3176 3177
	}

unknown's avatar
unknown committed
3178 3179
	num_write_row++;

unknown's avatar
unknown committed
3180 3181 3182
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
3183 3184

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
3185 3186
	}

3187
  	if (table->next_number_field && record == table->record[0]) {
unknown's avatar
unknown committed
3188 3189
		/* This is the case where the table has an
		auto-increment column */
unknown's avatar
unknown committed
3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214

		/* Initialize the auto-inc counter if it has not been
		initialized yet */

		if (0 == dict_table_autoinc_peek(prebuilt->table)) {

			/* This call initializes the counter */
		        error = innobase_read_and_init_auto_inc(&dummy);

			if (error) {
				/* Deadlock or lock wait timeout */

				goto func_exit;
			}

			/* We have to set sql_stat_start to TRUE because
			the above call probably has called a select, and
			has reset that flag; row_insert_for_mysql has to
			know to set the IX intention lock on the table,
			something it only does at the start of each
			statement */

			prebuilt->sql_stat_start = TRUE;
		}

unknown's avatar
unknown committed
3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234
		/* We have to use the transactional lock mechanism on the
		auto-inc counter of the table to ensure that replication and
		roll-forward of the binlog exactly imitates also the given
		auto-inc values. The lock is released at each SQL statement's
		end. This lock also prevents a race where two threads would
		call ::get_auto_increment() simultaneously. */

		error = row_lock_table_autoinc_for_mysql(prebuilt);

		if (error != DB_SUCCESS) {
			/* Deadlock or lock wait timeout */

			error = convert_error_code_to_mysql(error, user_thd);

			goto func_exit;
		}

		/* We must use the handler code to update the auto-increment
                value to be sure that we increment it correctly. */

3235
    		update_auto_increment();
unknown's avatar
unknown committed
3236
                auto_inc_used = 1;
unknown's avatar
unknown committed
3237

3238
	}
3239

3240 3241 3242 3243
	if (prebuilt->mysql_template == NULL
			|| prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
		/* Build the template used in converting quickly between
		the two database formats */
3244

3245 3246
		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}
3247

3248
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
3249

3250
	error = row_insert_for_mysql((byte*) record, prebuilt);
unknown's avatar
Merge  
unknown committed
3251

3252
	if (error == DB_SUCCESS && auto_inc_used) {
unknown's avatar
Merge  
unknown committed
3253

unknown's avatar
unknown committed
3254
        	/* Fetch the value that was set in the autoincrement field */
unknown's avatar
unknown committed
3255

unknown's avatar
unknown committed
3256
          	auto_inc = table->next_number_field->val_int();
3257

unknown's avatar
unknown committed
3258
          	if (auto_inc != 0) {
unknown's avatar
unknown committed
3259 3260
			/* This call will update the counter according to the
			value that was inserted in the table */
3261

unknown's avatar
unknown committed
3262 3263
            		dict_table_autoinc_update(prebuilt->table, auto_inc);
          	}
3264
        }
3265

unknown's avatar
unknown committed
3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282
        /* A REPLACE command and LOAD DATA INFILE REPLACE handle a duplicate
        key error themselves, and we must update the autoinc counter if we are
        performing those statements. */

        if (error == DB_DUPLICATE_KEY && auto_inc_used
            && (user_thd->lex->sql_command == SQLCOM_REPLACE
                || user_thd->lex->sql_command == SQLCOM_REPLACE_SELECT
                || (user_thd->lex->sql_command == SQLCOM_LOAD
                    && user_thd->lex->duplicates == DUP_REPLACE))) {

                auto_inc = table->next_number_field->val_int();

                if (auto_inc != 0) {
                        dict_table_autoinc_update(prebuilt->table, auto_inc);
                }
        }

unknown's avatar
unknown committed
3283
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
3284

unknown's avatar
unknown committed
3285
	error = convert_error_code_to_mysql(error, user_thd);
3286

3287
	/* Tell InnoDB server that there might be work for
3288
	utility threads: */
3289
func_exit:
3290
	innobase_active_small();
3291 3292 3293 3294

  	DBUG_RETURN(error);
}

3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305
/**************************************************************************
Checks which fields have changed in a row and stores information
of them to an update vector. */
static
int
calc_row_difference(
/*================*/
					/* out: error number or 0 */
	upd_t*		uvect,		/* in/out: update vector */
	mysql_byte* 	old_row,	/* in: old row in MySQL format */
	mysql_byte* 	new_row,	/* in: new row in MySQL format */
unknown's avatar
unknown committed
3306 3307
	struct st_table* table,		/* in: table in MySQL data
					dictionary */
3308
	mysql_byte*	upd_buff,	/* in: buffer to use */
unknown's avatar
unknown committed
3309
	ulint		buff_len,	/* in: buffer length */
3310
	row_prebuilt_t*	prebuilt,	/* in: InnoDB prebuilt struct */
3311 3312
	THD*		thd)		/* in: user thread */
{
unknown's avatar
unknown committed
3313
	mysql_byte*	original_upd_buff = upd_buff;
3314
	Field*		field;
unknown's avatar
unknown committed
3315
	enum_field_types field_mysql_type;
3316 3317 3318
	uint		n_fields;
	ulint		o_len;
	ulint		n_len;
unknown's avatar
unknown committed
3319
	ulint		col_pack_len;
unknown's avatar
unknown committed
3320
	byte*		new_mysql_row_col;
unknown's avatar
unknown committed
3321 3322 3323
	byte*	        o_ptr;
        byte*	        n_ptr;
        byte*	        buf;
3324
	upd_field_t*	ufield;
3325
	ulint		col_type;
3326
	ulint		n_changed = 0;
unknown's avatar
unknown committed
3327
	dfield_t	dfield;
3328
	uint		i;
3329

3330
	n_fields = table->s->fields;
3331

3332
	/* We use upd_buff to convert changed fields */
unknown's avatar
unknown committed
3333
	buf = (byte*) upd_buff;
3334

3335 3336 3337
	for (i = 0; i < n_fields; i++) {
		field = table->field[i];

3338
		/* if (thd->query_id != field->query_id) { */
3339 3340
			/* TODO: check that these fields cannot have
			changed! */
3341

3342 3343
		/*	goto skip_field;
		}*/
3344

unknown's avatar
unknown committed
3345 3346
		o_ptr = (byte*) old_row + get_field_offset(table, field);
		n_ptr = (byte*) new_row + get_field_offset(table, field);
unknown's avatar
unknown committed
3347
		
unknown's avatar
unknown committed
3348 3349 3350
		/* Use new_mysql_row_col and col_pack_len save the values */

		new_mysql_row_col = n_ptr;
unknown's avatar
unknown committed
3351
		col_pack_len = field->pack_length();
unknown's avatar
unknown committed
3352

unknown's avatar
unknown committed
3353 3354
		o_len = col_pack_len;
		n_len = col_pack_len;
3355

unknown's avatar
unknown committed
3356 3357 3358
		/* We use o_ptr and n_ptr to dig up the actual data for
		comparison. */ 

unknown's avatar
unknown committed
3359 3360
		field_mysql_type = field->type();
	
unknown's avatar
unknown committed
3361
		col_type = prebuilt->table->cols[i].type.mtype;
3362 3363 3364 3365 3366 3367

		switch (col_type) {

		case DATA_BLOB:
			o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
			n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
unknown's avatar
unknown committed
3368

3369
			break;
unknown's avatar
unknown committed
3370

3371 3372 3373
		case DATA_VARCHAR:
		case DATA_BINARY:
		case DATA_VARMYSQL:
unknown's avatar
unknown committed
3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390
			if (field_mysql_type == MYSQL_TYPE_VARCHAR) {
				/* This is a >= 5.0.3 type true VARCHAR where
				the real payload data length is stored in
				1 or 2 bytes */
			
				o_ptr = row_mysql_read_true_varchar(
						&o_len, o_ptr,
				    (ulint)
				    (((Field_varstring*)field)->length_bytes));
								
				n_ptr = row_mysql_read_true_varchar(
						&n_len, n_ptr,
				    (ulint)
				    (((Field_varstring*)field)->length_bytes));
			}

			break;
3391 3392 3393
		default:
			;
		}
3394

3395
		if (field->null_ptr) {
unknown's avatar
unknown committed
3396 3397
			if (field_in_record_is_null(table, field,
							(char*) old_row)) {
3398 3399
				o_len = UNIV_SQL_NULL;
			}
3400

unknown's avatar
unknown committed
3401 3402
			if (field_in_record_is_null(table, field,
							(char*) new_row)) {
3403 3404 3405 3406 3407 3408 3409 3410 3411
				n_len = UNIV_SQL_NULL;
			}
		}

		if (o_len != n_len || (o_len != UNIV_SQL_NULL &&
					0 != memcmp(o_ptr, n_ptr, o_len))) {
			/* The field has changed */

			ufield = uvect->fields + n_changed;
unknown's avatar
unknown committed
3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422
	
			/* Let us use a dummy dfield to make the conversion
			from the MySQL column format to the InnoDB format */

			dfield.type = (prebuilt->table->cols + i)->type;

			if (n_len != UNIV_SQL_NULL) {
				buf = row_mysql_store_col_in_innobase_format(
						&dfield,
						(byte*)buf,
						TRUE,
unknown's avatar
unknown committed
3423
						new_mysql_row_col,
unknown's avatar
unknown committed
3424 3425
						col_pack_len,
						prebuilt->table->comp);
unknown's avatar
unknown committed
3426 3427
				ufield->new_val.data = dfield.data;
				ufield->new_val.len = dfield.len;
unknown's avatar
unknown committed
3428 3429 3430 3431
			} else {
				ufield->new_val.data = NULL;
				ufield->new_val.len = UNIV_SQL_NULL;
			}
3432 3433

			ufield->exp = NULL;
unknown's avatar
unknown committed
3434
			ufield->field_no = prebuilt->table->cols[i].clust_pos;
3435 3436 3437 3438 3439 3440 3441
			n_changed++;
		}
	}

	uvect->n_fields = n_changed;
	uvect->info_bits = 0;

unknown's avatar
unknown committed
3442 3443
	ut_a(buf <= (byte*)original_upd_buff + buff_len);

3444 3445 3446 3447 3448 3449 3450
	return(0);
}

/**************************************************************************
Updates a row given as a parameter to a new value. Note that we are given
whole rows, not just the fields which are updated: this incurs some
overhead for CPU when we check which fields are actually updated.
3451
TODO: currently InnoDB does not prevent the 'Halloween problem':
3452 3453
in a searched update a single row can get updated several times
if its index columns are updated! */
3454

3455 3456 3457 3458
int
ha_innobase::update_row(
/*====================*/
					/* out: error number or 0 */
3459 3460
	const mysql_byte* 	old_row,/* in: old row in MySQL format */
	mysql_byte* 		new_row)/* in: new row in MySQL format */
3461 3462 3463 3464 3465
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	upd_t*		uvect;
	int		error = 0;

3466
	DBUG_ENTER("ha_innobase::update_row");
3467

unknown's avatar
unknown committed
3468
	ut_ad(prebuilt->trx ==
3469
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
3470

3471 3472
        if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
                table->timestamp_field->set_time();
3473

unknown's avatar
unknown committed
3474 3475 3476
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
3477 3478

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
3479 3480
	}

3481 3482 3483 3484 3485
	if (prebuilt->upd_node) {
		uvect = prebuilt->upd_node->update;
	} else {
		uvect = row_get_prebuilt_update_vector(prebuilt);
	}
3486 3487 3488 3489

	/* Build an update vector from the modified fields in the rows
	(uses upd_buff of the handle) */

3490
	calc_row_difference(uvect, (mysql_byte*) old_row, new_row, table,
unknown's avatar
unknown committed
3491 3492 3493
			upd_buff, (ulint)upd_and_key_val_buff_len,
			prebuilt, user_thd);

3494 3495 3496
	/* This is not a delete */
	prebuilt->upd_node->is_delete = FALSE;

unknown's avatar
unknown committed
3497
	assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
3498

unknown's avatar
unknown committed
3499
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
3500

3501
	error = row_update_for_mysql((byte*) old_row, prebuilt);
3502

unknown's avatar
unknown committed
3503
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
3504

unknown's avatar
unknown committed
3505
	error = convert_error_code_to_mysql(error, user_thd);
3506

3507
	/* Tell InnoDB server that there might be work for
3508 3509
	utility threads: */

3510
	innobase_active_small();
3511 3512 3513 3514 3515 3516 3517 3518 3519 3520

	DBUG_RETURN(error);
}

/**************************************************************************
Deletes a row given as the parameter. */

int
ha_innobase::delete_row(
/*====================*/
3521 3522
					/* out: error number or 0 */
	const mysql_byte* record)	/* in: a row in MySQL format */
3523 3524 3525 3526
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int		error = 0;

3527
	DBUG_ENTER("ha_innobase::delete_row");
3528

unknown's avatar
unknown committed
3529
	ut_ad(prebuilt->trx ==
3530
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
3531

unknown's avatar
unknown committed
3532 3533 3534
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
3535 3536

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
3537 3538
	}

3539 3540 3541
	if (!prebuilt->upd_node) {
		row_get_prebuilt_update_vector(prebuilt);
	}
3542 3543

	/* This is a delete */
3544

3545
	prebuilt->upd_node->is_delete = TRUE;
3546

unknown's avatar
unknown committed
3547
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
3548

3549
	error = row_update_for_mysql((byte*) record, prebuilt);
3550

unknown's avatar
unknown committed
3551
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
3552

unknown's avatar
unknown committed
3553
	error = convert_error_code_to_mysql(error, user_thd);
3554

3555
	/* Tell the InnoDB server that there might be work for
3556 3557
	utility threads: */

3558
	innobase_active_small();
3559 3560 3561 3562

	DBUG_RETURN(error);
}

3563
/**************************************************************************
unknown's avatar
unknown committed
3564 3565 3566
Removes a new lock set on a row. This can be called after a row has been read
in the processing of an UPDATE or a DELETE query, if the option
innodb_locks_unsafe_for_binlog is set. */
3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583

void
ha_innobase::unlock_row(void)
/*=========================*/
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;

	DBUG_ENTER("ha_innobase::unlock_row");

	if (last_query_id != user_thd->query_id) {
		ut_print_timestamp(stderr);
		fprintf(stderr,
"  InnoDB: Error: last_query_id is %lu != user_thd_query_id is %lu\n",
			(ulong)last_query_id, (ulong)user_thd->query_id);
		mem_analyze_corruption((byte *) prebuilt->trx);
		ut_error;
	}
unknown's avatar
unknown committed
3584 3585 3586 3587
	
	if (srv_locks_unsafe_for_binlog) {
		row_unlock_for_mysql(prebuilt, FALSE);
	}
3588 3589
}

3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601
/**********************************************************************
Initializes a handle to use an index. */

int
ha_innobase::index_init(
/*====================*/
			/* out: 0 or error number */
	uint 	keynr)	/* in: key (index) number */
{
	int 	error	= 0;
  	DBUG_ENTER("index_init");

unknown's avatar
Merge  
unknown committed
3602
	error = change_active_index(keynr);
3603 3604 3605 3606 3607

  	DBUG_RETURN(error);
}

/**********************************************************************
3608
Currently does nothing. */
3609 3610 3611 3612 3613 3614 3615

int
ha_innobase::index_end(void)
/*========================*/
{
	int 	error	= 0;
  	DBUG_ENTER("index_end");
unknown's avatar
unknown committed
3616
        active_index=MAX_KEY;
3617 3618 3619 3620 3621
  	DBUG_RETURN(error);
}

/*************************************************************************
Converts a search mode flag understood by MySQL to a flag understood
3622
by InnoDB. */
3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636
inline
ulint
convert_search_mode_to_innobase(
/*============================*/
	enum ha_rkey_function	find_flag)
{
	switch (find_flag) {
  		case HA_READ_KEY_EXACT:		return(PAGE_CUR_GE);
  			/* the above does not require the index to be UNIQUE */
  		case HA_READ_KEY_OR_NEXT:	return(PAGE_CUR_GE);
		case HA_READ_KEY_OR_PREV:	return(PAGE_CUR_LE);
		case HA_READ_AFTER_KEY:		return(PAGE_CUR_G);
		case HA_READ_BEFORE_KEY:	return(PAGE_CUR_L);
		case HA_READ_PREFIX:		return(PAGE_CUR_GE);
unknown's avatar
unknown committed
3637
	        case HA_READ_PREFIX_LAST:       return(PAGE_CUR_LE);
unknown's avatar
unknown committed
3638 3639 3640
                case HA_READ_PREFIX_LAST_OR_PREV:return(PAGE_CUR_LE);
		  /* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always
		  pass a complete-field prefix of a key value as the search
unknown's avatar
unknown committed
3641 3642 3643 3644 3645
		  tuple. I.e., it is not allowed that the last field would
		  just contain n first bytes of the full field value.
		  MySQL uses a 'padding' trick to convert LIKE 'abc%'
		  type queries so that it can use as a search tuple
		  a complete-field-prefix of a key value. Thus, the InnoDB
unknown's avatar
unknown committed
3646 3647 3648 3649 3650 3651 3652
		  search mode PAGE_CUR_LE_OR_EXTENDS is never used.
		  TODO: when/if MySQL starts to use also partial-field
		  prefixes, we have to deal with stripping of spaces
		  and comparison of non-latin1 char type fields in
		  innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to
		  work correctly. */

3653 3654 3655 3656 3657
		default:			assert(0);
	}

	return(0);
}
3658

unknown's avatar
unknown committed
3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707
/*
   BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
   ---------------------------------------------------
The following does not cover all the details, but explains how we determine
the start of a new SQL statement, and what is associated with it.

For each table in the database the MySQL interpreter may have several
table handle instances in use, also in a single SQL query. For each table
handle instance there is an InnoDB  'prebuilt' struct which contains most
of the InnoDB data associated with this table handle instance.

  A) if the user has not explicitly set any MySQL table level locks:

  1) MySQL calls ::external_lock to set an 'intention' table level lock on
the table of the handle instance. There we set
prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
true if we are taking this table handle instance to use in a new SQL
statement issued by the user. We also increment trx->n_mysql_tables_in_use.

  2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
instructions to prebuilt->template of the table handle instance in
::index_read. The template is used to save CPU time in large joins.

  3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
allocate a new consistent read view for the trx if it does not yet have one,
or in the case of a locking read, set an InnoDB 'intention' table level
lock on the table.

  4) We do the SELECT. MySQL may repeatedly call ::index_read for the
same table handle instance, if it is a join.

  5) When the SELECT ends, MySQL removes its intention table level locks
in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
 (a) we execute a COMMIT there if the autocommit is on,
 (b) we also release possible 'SQL statement level resources' InnoDB may
have for this SQL statement. The MySQL interpreter does NOT execute
autocommit for pure read transactions, though it should. That is why the
table handler in that case has to execute the COMMIT in ::external_lock.

  B) If the user has explicitly set MySQL table level locks, then MySQL
does NOT call ::external_lock at the start of the statement. To determine
when we are at the start of a new SQL statement we at the start of
::index_read also compare the query id to the latest query id where the
table handle instance was used. If it has changed, we know we are at the
start of a new SQL statement. Since the query id can theoretically
overwrap, we use this test only as a secondary way of determining the
start of a new SQL statement. */


3708 3709 3710 3711 3712 3713 3714 3715 3716
/**************************************************************************
Positions an index cursor to the index specified in the handle. Fetches the
row if any. */

int
ha_innobase::index_read(
/*====================*/
					/* out: 0, HA_ERR_KEY_NOT_FOUND,
					or error number */
3717
	mysql_byte*		buf,	/* in/out: buffer for the returned
3718
					row */
3719
	const mysql_byte* 	key_ptr,/* in: key value; if this is NULL
3720
					we position the cursor at the
unknown's avatar
unknown committed
3721 3722 3723
					start or end of index; this can
					also contain an InnoDB row id, in
					which case key_len is the InnoDB
unknown's avatar
unknown committed
3724 3725 3726 3727
					row id length; the key value can
					also be a prefix of a full key value,
					and the last column can be a prefix
					of a full column */
3728
	uint			key_len,/* in: key value length */
3729 3730 3731 3732 3733 3734 3735 3736 3737 3738
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	ulint		mode;
	dict_index_t*	index;
	ulint		match_mode 	= 0;
	int 		error;
	ulint		ret;

  	DBUG_ENTER("index_read");
unknown's avatar
unknown committed
3739

unknown's avatar
unknown committed
3740
	ut_ad(prebuilt->trx ==
3741
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
3742

3743 3744
  	statistic_increment(current_thd->status_var.ha_read_key_count,
			    &LOCK_status);
3745

unknown's avatar
unknown committed
3746 3747 3748
	if (last_query_id != user_thd->query_id) {
	        prebuilt->sql_stat_start = TRUE;
                last_query_id = user_thd->query_id;
unknown's avatar
unknown committed
3749 3750

		innobase_release_stat_resources(prebuilt->trx);
unknown's avatar
unknown committed
3751 3752
	}

3753
	index = prebuilt->index;
3754

unknown's avatar
unknown committed
3755 3756
	/* Note that if the index for which the search template is built is not
        necessarily prebuilt->index, but can also be the clustered index */
3757

3758 3759 3760 3761
	if (prebuilt->sql_stat_start) {
		build_template(prebuilt, user_thd, table,
							ROW_MYSQL_REC_FIELDS);
	}
3762 3763

	if (key_ptr) {
unknown's avatar
unknown committed
3764 3765 3766
	        /* Convert the search key value to InnoDB format into
		prebuilt->search_tuple */

3767
		row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple,
unknown's avatar
unknown committed
3768 3769 3770 3771
					(byte*) key_val_buff,
					(ulint)upd_and_key_val_buff_len,
					index,
					(byte*) key_ptr,
3772
					(ulint) key_len, prebuilt->trx);
3773 3774 3775 3776 3777 3778
	} else {
		/* We position the cursor to the last or the first entry
		in the index */

 		dtuple_set_n_fields(prebuilt->search_tuple, 0);
	}
3779

3780 3781 3782 3783 3784 3785 3786 3787 3788 3789 3790 3791
	mode = convert_search_mode_to_innobase(find_flag);

	match_mode = 0;

	if (find_flag == HA_READ_KEY_EXACT) {
		match_mode = ROW_SEL_EXACT;

	} else if (find_flag == HA_READ_PREFIX
				|| find_flag == HA_READ_PREFIX_LAST) {
		match_mode = ROW_SEL_EXACT_PREFIX;
	}

unknown's avatar
unknown committed
3792
	last_match_mode = (uint) match_mode;
3793

unknown's avatar
unknown committed
3794
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
3795

unknown's avatar
unknown committed
3796
	ret = row_search_for_mysql((byte*) buf, mode, prebuilt, match_mode, 0);
3797

unknown's avatar
unknown committed
3798
	innodb_srv_conc_exit_innodb(prebuilt->trx);
unknown's avatar
Merge  
unknown committed
3799

3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811
	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;
	} else {
unknown's avatar
unknown committed
3812
		error = convert_error_code_to_mysql((int) ret, user_thd);
3813 3814
		table->status = STATUS_NOT_FOUND;
	}
3815

3816 3817 3818
	DBUG_RETURN(error);
}

unknown's avatar
unknown committed
3819 3820 3821
/***********************************************************************
The following functions works like index_read, but it find the last
row with the current key value or prefix. */
3822 3823

int
unknown's avatar
unknown committed
3824 3825 3826 3827 3828 3829 3830 3831 3832
ha_innobase::index_read_last(
/*=========================*/
			           /* out: 0, HA_ERR_KEY_NOT_FOUND, or an
				   error code */
        mysql_byte*       buf,     /* out: fetched row */
        const mysql_byte* key_ptr, /* in: key value, or a prefix of a full
				   key value */
	uint              key_len) /* in: length of the key val or prefix
				   in bytes */
3833
{
unknown's avatar
unknown committed
3834
        return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
3835 3836
}

3837
/************************************************************************
unknown's avatar
unknown committed
3838
Changes the active index of a handle. */
3839 3840 3841 3842

int
ha_innobase::change_active_index(
/*=============================*/
3843 3844 3845
			/* out: 0 or error code */
	uint 	keynr)	/* in: use this index; MAX_KEY means always clustered
			index, even if it was internally generated by
3846
			InnoDB */
3847
{
unknown's avatar
unknown committed
3848 3849
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	KEY*		key=0;
3850 3851
	statistic_increment(current_thd->status_var.ha_read_key_count,
			    &LOCK_status);
unknown's avatar
unknown committed
3852
	DBUG_ENTER("change_active_index");
3853

unknown's avatar
unknown committed
3854 3855
	ut_ad(user_thd == current_thd);
	ut_ad(prebuilt->trx ==
3856
             (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
3857

unknown's avatar
unknown committed
3858
	active_index = keynr;
3859

3860
	if (keynr != MAX_KEY && table->s->keys > 0) {
unknown's avatar
unknown committed
3861
		key = table->key_info + active_index;
3862

unknown's avatar
unknown committed
3863
		prebuilt->index = dict_table_get_index_noninline(
unknown's avatar
unknown committed
3864 3865
						     prebuilt->table,
						     key->name);
unknown's avatar
unknown committed
3866 3867
        } else {
		prebuilt->index = dict_table_get_first_index_noninline(
unknown's avatar
unknown committed
3868
							   prebuilt->table);
unknown's avatar
unknown committed
3869
	}
3870

unknown's avatar
unknown committed
3871 3872 3873 3874 3875 3876
	if (!prebuilt->index) {
	       sql_print_error(
"Innodb could not find key n:o %u with name %s from dict cache for table %s",
	      keynr, key ? key->name : "NULL", prebuilt->table->name);
	      DBUG_RETURN(1);
	}
3877

unknown's avatar
unknown committed
3878
	assert(prebuilt->search_tuple != 0);
unknown's avatar
Merge  
unknown committed
3879

unknown's avatar
unknown committed
3880
	dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
3881

unknown's avatar
unknown committed
3882
	dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
3883
			prebuilt->index->n_fields);
3884

unknown's avatar
unknown committed
3885 3886 3887 3888 3889
	/* MySQL changes the active index for a handle also during some
	queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
	and then calculates the sum. Previously we played safe and used
	the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
	copying. Starting from MySQL-4.1 we use a more efficient flag here. */
3890

unknown's avatar
unknown committed
3891
	build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
3892

unknown's avatar
unknown committed
3893
	DBUG_RETURN(0);
3894 3895 3896 3897 3898 3899 3900 3901 3902 3903 3904
}

/**************************************************************************
Positions an index cursor to the index specified in keynr. Fetches the
row if any. */
/* ??? This is only used to read whole keys ??? */

int
ha_innobase::index_read_idx(
/*========================*/
					/* out: error number or 0 */
3905
	mysql_byte*	buf,		/* in/out: buffer for the returned
3906 3907
					row */
	uint 		keynr,		/* in: use this index */
3908
	const mysql_byte* key,		/* in: key value; if this is NULL
3909 3910 3911 3912 3913
					we position the cursor at the
					start or end of index */
	uint		key_len,	/* in: key value length */
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
unknown's avatar
Merge  
unknown committed
3914 3915 3916 3917
	if (change_active_index(keynr)) {

		return(1);
	}
3918 3919 3920 3921 3922 3923 3924 3925 3926 3927 3928 3929 3930

	return(index_read(buf, key, key_len, find_flag));
}

/***************************************************************************
Reads the next or previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::general_fetch(
/*=======================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3931
	mysql_byte* 	buf,	/* in/out: buffer for next row in MySQL
3932 3933 3934 3935 3936 3937 3938 3939
				format */
	uint 	direction,	/* in: ROW_SEL_NEXT or ROW_SEL_PREV */
	uint	match_mode)	/* in: 0, ROW_SEL_EXACT, or
				ROW_SEL_EXACT_PREFIX */
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	ulint		ret;
	int		error	= 0;
3940

3941
	DBUG_ENTER("general_fetch");
3942

unknown's avatar
unknown committed
3943
	ut_ad(prebuilt->trx ==
3944
             (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
3945

unknown's avatar
unknown committed
3946
	innodb_srv_conc_enter_innodb(prebuilt->trx);
unknown's avatar
unknown committed
3947

unknown's avatar
Merge  
unknown committed
3948 3949
	ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode,
								direction);
unknown's avatar
unknown committed
3950
	innodb_srv_conc_exit_innodb(prebuilt->trx);
3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963

	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;
	} else {
unknown's avatar
unknown committed
3964
		error = convert_error_code_to_mysql((int) ret, user_thd);
3965 3966
		table->status = STATUS_NOT_FOUND;
	}
3967

3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979
	DBUG_RETURN(error);
}

/***************************************************************************
Reads the next row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_next(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3980
	mysql_byte* 	buf)	/* in/out: buffer for next row in MySQL
3981 3982
				format */
{
3983 3984
  	statistic_increment(current_thd->status_var.ha_read_next_count,
			    &LOCK_status);
3985

3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996
	return(general_fetch(buf, ROW_SEL_NEXT, 0));
}

/***********************************************************************
Reads the next row matching to the key value given as the parameter. */

int
ha_innobase::index_next_same(
/*=========================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
3997 3998
	mysql_byte* 	buf,	/* in/out: buffer for the row */
	const mysql_byte* key,	/* in: key value */
3999 4000
	uint 		keylen)	/* in: key value length */
{
4001 4002
  	statistic_increment(current_thd->status_var.ha_read_next_count,
			    &LOCK_status);
4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015

	return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
}

/***************************************************************************
Reads the previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_prev(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
4016
	mysql_byte* 	buf)	/* in/out: buffer for previous row in MySQL
4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028
				format */
{
	return(general_fetch(buf, ROW_SEL_PREV, 0));
}

/************************************************************************
Positions a cursor on the first record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_first(
/*=====================*/
4029
				/* out: 0, HA_ERR_END_OF_FILE,
4030 4031
				or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
4032 4033 4034 4035
{
	int	error;

  	DBUG_ENTER("index_first");
4036 4037
  	statistic_increment(current_thd->status_var.ha_read_first_count,
			    &LOCK_status);
4038 4039 4040

  	error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);

4041 4042 4043 4044 4045 4046
        /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */

  	if (error == HA_ERR_KEY_NOT_FOUND) {
  		error = HA_ERR_END_OF_FILE;
  	}

4047 4048 4049 4050 4051 4052 4053 4054 4055 4056
  	DBUG_RETURN(error);
}

/************************************************************************
Positions a cursor on the last record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_last(
/*====================*/
4057 4058
				/* out: 0, HA_ERR_END_OF_FILE, or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
4059 4060 4061
{
	int	error;

4062
  	DBUG_ENTER("index_last");
4063 4064
  	statistic_increment(current_thd->status_var.ha_read_last_count,
			    &LOCK_status);
4065 4066 4067

  	error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);

4068
        /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083

  	if (error == HA_ERR_KEY_NOT_FOUND) {
  		error = HA_ERR_END_OF_FILE;
  	}

  	DBUG_RETURN(error);
}

/********************************************************************
Initialize a table scan. */

int
ha_innobase::rnd_init(
/*==================*/
			/* out: 0 or error number */
4084
	bool	scan)	/* in: ???????? */
4085
{
unknown's avatar
Merge  
unknown committed
4086
	int	err;
unknown's avatar
unknown committed
4087

4088
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
4089

unknown's avatar
unknown committed
4090 4091 4092
	/* Store the active index value so that we can restore the original
	value after a scan */

4093
	if (prebuilt->clust_index_was_generated) {
unknown's avatar
Merge  
unknown committed
4094
		err = change_active_index(MAX_KEY);
4095
	} else {
unknown's avatar
Merge  
unknown committed
4096
		err = change_active_index(primary_key);
4097
	}
4098

4099
  	start_of_scan = 1;
4100

unknown's avatar
Merge  
unknown committed
4101
 	return(err);
4102 4103 4104
}

/*********************************************************************
unknown's avatar
unknown committed
4105
Ends a table scan. */
4106 4107 4108 4109 4110 4111

int
ha_innobase::rnd_end(void)
/*======================*/
				/* out: 0 or error number */
{
unknown's avatar
unknown committed
4112
	return(index_end());
4113 4114 4115 4116 4117 4118 4119 4120 4121 4122
}

/*********************************************************************
Reads the next row in a table scan (also used to read the FIRST row
in a table scan). */

int
ha_innobase::rnd_next(
/*==================*/
			/* out: 0, HA_ERR_END_OF_FILE, or error number */
4123
	mysql_byte* buf)/* in/out: returns the row in this buffer,
4124 4125
			in MySQL format */
{
4126
	int	error;
4127 4128

  	DBUG_ENTER("rnd_next");
4129 4130
  	statistic_increment(current_thd->status_var.ha_read_rnd_next_count,
			    &LOCK_status);
4131

4132
  	if (start_of_scan) {
4133 4134 4135 4136
		error = index_first(buf);
		if (error == HA_ERR_KEY_NOT_FOUND) {
			error = HA_ERR_END_OF_FILE;
		}
4137
		start_of_scan = 0;
4138
	} else {
4139
		error = general_fetch(buf, ROW_SEL_NEXT, 0);
4140
	}
4141

4142 4143 4144 4145
  	DBUG_RETURN(error);
}

/**************************************************************************
unknown's avatar
unknown committed
4146
Fetches a row from the table based on a row reference. */
4147

4148 4149 4150
int
ha_innobase::rnd_pos(
/*=================*/
4151 4152 4153
				/* out: 0, HA_ERR_KEY_NOT_FOUND,
				or error code */
	mysql_byte* 	buf,	/* in/out: buffer for the row */
unknown's avatar
unknown committed
4154 4155 4156 4157 4158
	mysql_byte*	pos)	/* in: primary key value of the row in the
				MySQL format, or the row id if the clustered
				index was internally generated by InnoDB;
				the length of data in pos has to be
				ref_length */
4159
{
4160 4161 4162
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	int		error;
	uint		keynr	= active_index;
4163
	DBUG_ENTER("rnd_pos");
unknown's avatar
unknown committed
4164
	DBUG_DUMP("key", (char*) pos, ref_length);
unknown's avatar
unknown committed
4165

4166 4167
	statistic_increment(current_thd->status_var.ha_read_rnd_count,
			    &LOCK_status);
4168

unknown's avatar
unknown committed
4169
	ut_ad(prebuilt->trx ==
4170
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
4171

4172 4173 4174 4175
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from the row id: the
		row reference is the row id, not any key value
unknown's avatar
unknown committed
4176
		that MySQL knows of */
4177

unknown's avatar
Merge  
unknown committed
4178
		error = change_active_index(MAX_KEY);
4179
	} else {
unknown's avatar
Merge  
unknown committed
4180
		error = change_active_index(primary_key);
4181
	}
4182

unknown's avatar
Merge  
unknown committed
4183
	if (error) {
unknown's avatar
unknown committed
4184
	        DBUG_PRINT("error", ("Got error: %ld", error));
unknown's avatar
Merge  
unknown committed
4185 4186
		DBUG_RETURN(error);
	}
unknown's avatar
unknown committed
4187

unknown's avatar
unknown committed
4188 4189 4190 4191
	/* Note that we assume the length of the row reference is fixed
        for the table, and it is == ref_length */

	error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
unknown's avatar
unknown committed
4192 4193 4194

	if (error) {
		DBUG_PRINT("error", ("Got error: %ld", error));
unknown's avatar
unknown committed
4195
	}
unknown's avatar
unknown committed
4196

4197
	change_active_index(keynr);
4198

4199 4200 4201 4202
  	DBUG_RETURN(error);
}

/*************************************************************************
4203
Stores a reference to the current row to 'ref' field of the handle. Note
unknown's avatar
unknown committed
4204 4205
that in the case where we have generated the clustered index for the
table, the function parameter is illogical: we MUST ASSUME that 'record'
unknown's avatar
unknown committed
4206
is the current 'position' of the handle, because if row ref is actually
4207
the row id internally generated in InnoDB, then 'record' does not contain
4208 4209
it. We just guess that the row id must be for the record where the handle
was positioned the last time. */
4210 4211 4212 4213

void
ha_innobase::position(
/*==================*/
4214
	const mysql_byte*	record)	/* in: row in MySQL format */
4215
{
4216 4217
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	uint		len;
4218

unknown's avatar
unknown committed
4219
	ut_ad(prebuilt->trx ==
4220
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
4221

4222 4223 4224 4225
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from row id: the
		row reference will be the row id, not any key value
unknown's avatar
unknown committed
4226
		that MySQL knows of */
4227 4228 4229 4230 4231

		len = DATA_ROW_ID_LEN;

		memcpy(ref, prebuilt->row_id, len);
	} else {
4232 4233
		len = store_key_val_for_row(primary_key, (char*)ref,
							 ref_length, record);
4234
	}
4235

unknown's avatar
unknown committed
4236 4237
	/* We assume that the 'ref' value len is always fixed for the same
	table. */
unknown's avatar
unknown committed
4238
  
unknown's avatar
unknown committed
4239
	if (len != ref_length) {
unknown's avatar
unknown committed
4240
		fprintf(stderr,
unknown's avatar
unknown committed
4241
	 "InnoDB: Error: stored ref len is %lu, but table ref len is %lu\n",
4242
		  (ulong)len, (ulong)ref_length);
unknown's avatar
unknown committed
4243
	}
4244 4245 4246
}

/*********************************************************************
4247
Creates a table definition to an InnoDB database. */
4248 4249 4250 4251
static
int
create_table_def(
/*=============*/
4252
	trx_t*		trx,		/* in: InnoDB transaction handle */
4253 4254
	TABLE*		form,		/* in: information on table
					columns and indexes */
unknown's avatar
unknown committed
4255
	const char*	table_name,	/* in: table name */
unknown's avatar
unknown committed
4256
	const char*	path_of_temp_table,/* in: if this is a table explicitly
unknown's avatar
unknown committed
4257 4258 4259 4260 4261 4262 4263
					created by the user with the
					TEMPORARY keyword, then this
					parameter is the dir path where the
					table should be placed if we create
					an .ibd file for it (no .ibd extension
					in the path, though); otherwise this
					is NULL */
unknown's avatar
unknown committed
4264
	ibool		comp)		/* in: TRUE=compact record format */
4265 4266 4267 4268 4269 4270
{
	Field*		field;
	dict_table_t*	table;
	ulint		n_cols;
  	int 		error;
  	ulint		col_type;
unknown's avatar
unknown committed
4271
	ulint		col_len;
4272 4273
  	ulint		nulls_allowed;
	ulint		unsigned_type;
unknown's avatar
unknown committed
4274
	ulint		binary_type;
unknown's avatar
unknown committed
4275
	ulint		long_true_varchar;
unknown's avatar
unknown committed
4276
	ulint		charset_no;
4277
  	ulint		i;
4278

4279 4280 4281
  	DBUG_ENTER("create_table_def");
  	DBUG_PRINT("enter", ("table_name: %s", table_name));

4282
	n_cols = form->s->fields;
4283

unknown's avatar
unknown committed
4284 4285
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
4286

unknown's avatar
unknown committed
4287
	table = dict_mem_table_create(table_name, 0, n_cols, comp);
4288

unknown's avatar
unknown committed
4289 4290 4291 4292 4293
	if (path_of_temp_table) {
		table->dir_path_of_temp_table =
			mem_heap_strdup(table->heap, path_of_temp_table);
	}

4294 4295 4296
	for (i = 0; i < n_cols; i++) {
		field = form->field[i];

unknown's avatar
unknown committed
4297 4298
		col_type = get_innobase_type_from_mysql_type(&unsigned_type,
									field);
4299 4300 4301 4302 4303 4304
		if (field->null_ptr) {
			nulls_allowed = 0;
		} else {
			nulls_allowed = DATA_NOT_NULL;
		}

unknown's avatar
unknown committed
4305
		if (field->binary()) {
unknown's avatar
unknown committed
4306 4307 4308 4309 4310
			binary_type = DATA_BINARY_TYPE;
		} else {
			binary_type = 0;
		}

unknown's avatar
unknown committed
4311 4312 4313 4314 4315 4316
		charset_no = 0;	

		if (dtype_is_string_type(col_type)) {

			charset_no = (ulint)field->charset()->number;

unknown's avatar
unknown committed
4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333 4334 4335 4336 4337 4338
			ut_a(charset_no < 256); /* in data0type.h we assume
						that the number fits in one
						byte */
		}

		ut_a(field->type() < 256); /* we assume in dtype_form_prtype()
					   that this fits in one byte */
		col_len = field->pack_length();

		/* The MySQL pack length contains 1 or 2 bytes length field
		for a true VARCHAR. Let us subtract that, so that the InnoDB
		column length in the InnoDB data dictionary is the real
		maximum byte length of the actual data. */
	
		long_true_varchar = 0;

		if (field->type() == MYSQL_TYPE_VARCHAR) {
			col_len -= ((Field_varstring*)field)->length_bytes;

			if (((Field_varstring*)field)->length_bytes == 2) {
				long_true_varchar = DATA_LONG_TRUE_VARCHAR;
			}
unknown's avatar
unknown committed
4339 4340
		}

unknown's avatar
unknown committed
4341 4342 4343 4344 4345 4346 4347 4348 4349 4350
		dict_mem_table_add_col(table,
					(char*) field->field_name,
					col_type,
					dtype_form_prtype( 
					    (ulint)field->type()
					     | nulls_allowed | unsigned_type
					     | binary_type | long_true_varchar,
					    charset_no),
					col_len,
					0);
4351 4352 4353 4354
	}

	error = row_create_table_for_mysql(table, trx);

unknown's avatar
unknown committed
4355
	error = convert_error_code_to_mysql(error, NULL);
4356 4357 4358 4359 4360

	DBUG_RETURN(error);
}

/*********************************************************************
4361
Creates an index in an InnoDB database. */
4362 4363
static
int
4364 4365
create_index(
/*=========*/
4366
	trx_t*		trx,		/* in: InnoDB transaction handle */
4367 4368 4369 4370 4371
	TABLE*		form,		/* in: information on table
					columns and indexes */
	const char*	table_name,	/* in: table name */
	uint		key_num)	/* in: index number */
{
unknown's avatar
unknown committed
4372
	Field*		field;
4373
	dict_index_t*	index;
4374
  	int 		error;
4375 4376 4377 4378
	ulint		n_fields;
	KEY*		key;
	KEY_PART_INFO*	key_part;
	ulint		ind_type;
unknown's avatar
unknown committed
4379 4380
	ulint		col_type;
	ulint		prefix_len;
unknown's avatar
unknown committed
4381
	ulint		is_unsigned;
4382
  	ulint		i;
unknown's avatar
unknown committed
4383
  	ulint		j;
4384

4385
  	DBUG_ENTER("create_index");
4386

4387 4388 4389
	key = form->key_info + key_num;

    	n_fields = key->key_parts;
4390

4391 4392
    	ind_type = 0;

4393
    	if (key_num == form->s->primary_key) {
4394 4395
		ind_type = ind_type | DICT_CLUSTERED;
	}
4396

4397 4398 4399 4400
	if (key->flags & HA_NOSAME ) {
		ind_type = ind_type | DICT_UNIQUE;
	}

unknown's avatar
unknown committed
4401 4402
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
4403 4404 4405 4406 4407 4408

	index = dict_mem_index_create((char*) table_name, key->name, 0,
						ind_type, n_fields);
	for (i = 0; i < n_fields; i++) {
		key_part = key->key_part + i;

4409
		/* (The flag HA_PART_KEY_SEG denotes in MySQL a column prefix
unknown's avatar
unknown committed
4410 4411 4412 4413 4414 4415
		field in an index: we only store a specified number of first
		bytes of the column to the index field.) The flag does not
		seem to be properly set by MySQL. Let us fall back on testing
		the length of the key part versus the column. */
		
		field = NULL;
4416
		for (j = 0; j < form->s->fields; j++) {
unknown's avatar
unknown committed
4417 4418 4419

			field = form->field[j];

4420 4421 4422
			if (0 == innobase_strcasecmp(
					field->field_name,
					key_part->field->field_name)) {
unknown's avatar
unknown committed
4423 4424 4425 4426 4427 4428
				/* Found the corresponding column */

				break;
			}
		}

4429
		ut_a(j < form->s->fields);
unknown's avatar
unknown committed
4430

unknown's avatar
unknown committed
4431 4432
		col_type = get_innobase_type_from_mysql_type(
					&is_unsigned, key_part->field);
unknown's avatar
unknown committed
4433 4434

		if (DATA_BLOB == col_type
unknown's avatar
unknown committed
4435 4436 4437 4438 4439
		    || (key_part->length < field->pack_length()
			&& field->type() != MYSQL_TYPE_VARCHAR)
		    || (field->type() == MYSQL_TYPE_VARCHAR
			&& key_part->length < field->pack_length()
			          - ((Field_varstring*)field)->length_bytes)) {
unknown's avatar
unknown committed
4440

unknown's avatar
unknown committed
4441 4442 4443 4444 4445 4446 4447 4448
		        prefix_len = key_part->length;

			if (col_type == DATA_INT
			    || col_type == DATA_FLOAT
			    || col_type == DATA_DOUBLE
			    || col_type == DATA_DECIMAL) {
			        fprintf(stderr,
"InnoDB: error: MySQL is trying to create a column prefix index field\n"
unknown's avatar
unknown committed
4449 4450
"InnoDB: on an inappropriate data type. Table name %s, column name %s.\n",
				  table_name, key_part->field->field_name);
unknown's avatar
unknown committed
4451 4452 4453 4454 4455
			        
			        prefix_len = 0;
			}
		} else {
		        prefix_len = 0;
unknown's avatar
unknown committed
4456 4457
		}

4458 4459
		/* We assume all fields should be sorted in ascending
		order, hence the '0': */
unknown's avatar
unknown committed
4460

4461
		dict_mem_index_add_field(index,
unknown's avatar
unknown committed
4462 4463
				(char*) key_part->field->field_name,
				0, prefix_len);
4464 4465 4466 4467
	}

	error = row_create_index_for_mysql(index, trx);

unknown's avatar
unknown committed
4468
	error = convert_error_code_to_mysql(error, NULL);
4469 4470 4471 4472 4473

	DBUG_RETURN(error);
}

/*********************************************************************
4474
Creates an index to an InnoDB table when the user has defined no
4475
primary index. */
4476 4477
static
int
4478 4479
create_clustered_index_when_no_primary(
/*===================================*/
4480
	trx_t*		trx,		/* in: InnoDB transaction handle */
4481 4482 4483
	const char*	table_name)	/* in: table name */
{
	dict_index_t*	index;
4484 4485
  	int 		error;

unknown's avatar
unknown committed
4486 4487
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
4488

unknown's avatar
unknown committed
4489 4490 4491
	index = dict_mem_index_create((char*) table_name,
				      (char*) "GEN_CLUST_INDEX",
				      0, DICT_CLUSTERED, 0);
4492 4493
	error = row_create_index_for_mysql(index, trx);

unknown's avatar
unknown committed
4494
	error = convert_error_code_to_mysql(error, NULL);
4495

4496
	return(error);
4497 4498 4499
}

/*********************************************************************
4500
Creates a new table to an InnoDB database. */
4501 4502 4503 4504 4505 4506 4507 4508

int
ha_innobase::create(
/*================*/
					/* out: error number */
	const char*	name,		/* in: table name */
	TABLE*		form,		/* in: information on table
					columns and indexes */
4509 4510 4511
	HA_CREATE_INFO*	create_info)	/* in: more information of the
					created table, contains also the
					create statement string */
4512 4513 4514
{
	int		error;
	dict_table_t*	innobase_table;
unknown's avatar
unknown committed
4515
	trx_t*		parent_trx;
4516
	trx_t*		trx;
unknown's avatar
unknown committed
4517
	int		primary_key_no;
4518
	uint		i;
unknown's avatar
unknown committed
4519 4520
	char		name2[FN_REFLEN];
	char		norm_name[FN_REFLEN];
unknown's avatar
unknown committed
4521
	THD		*thd= current_thd;
4522
	ib_longlong     auto_inc_value;
4523

4524 4525
  	DBUG_ENTER("ha_innobase::create");

unknown's avatar
unknown committed
4526
	DBUG_ASSERT(thd != NULL);
unknown's avatar
unknown committed
4527

4528
	if (form->s->fields > 1000) {
unknown's avatar
unknown committed
4529 4530 4531
		/* The limit probably should be REC_MAX_N_FIELDS - 3 = 1020,
		but we play safe here */

unknown's avatar
unknown committed
4532
	        DBUG_RETURN(HA_ERR_TO_BIG_ROW);
unknown's avatar
unknown committed
4533 4534
	} 

unknown's avatar
unknown committed
4535 4536 4537
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
unknown's avatar
unknown committed
4538
	parent_trx = check_trx_exists(current_thd);
unknown's avatar
unknown committed
4539 4540 4541 4542 4543 4544

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	
	
4545
	trx = trx_allocate_for_mysql();
unknown's avatar
unknown committed
4546 4547 4548
		
	trx->mysql_thd = thd;
	trx->mysql_query_str = &((*thd).query);
4549

unknown's avatar
unknown committed
4550
	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
unknown's avatar
unknown committed
4551 4552 4553
		trx->check_foreigns = FALSE;
	}

unknown's avatar
unknown committed
4554
	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
unknown's avatar
unknown committed
4555 4556 4557
		trx->check_unique_secondary = FALSE;
	}

unknown's avatar
unknown committed
4558 4559 4560 4561 4562
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}
unknown's avatar
unknown committed
4563

unknown's avatar
unknown committed
4564
	fn_format(name2, name, "", "", 2);	// Remove the .frm extension
4565 4566

	normalize_table_name(norm_name, name2);
4567

unknown's avatar
unknown committed
4568
	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
unknown's avatar
unknown committed
4569
	or lock waits can happen in it during a table create operation.
unknown's avatar
unknown committed
4570
	Drop table etc. do this latching in row0mysql.c. */
unknown's avatar
unknown committed
4571

unknown's avatar
unknown committed
4572
	row_mysql_lock_data_dictionary(trx);
unknown's avatar
unknown committed
4573 4574

	/* Create the table definition in InnoDB */
4575

unknown's avatar
unknown committed
4576 4577
	error = create_table_def(trx, form, norm_name,
		create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL,
4578
		form->s->row_type != ROW_TYPE_REDUNDANT);
unknown's avatar
unknown committed
4579

unknown's avatar
unknown committed
4580
  	if (error) {
unknown's avatar
unknown committed
4581
		innobase_commit_low(trx);
4582

unknown's avatar
unknown committed
4583
		row_mysql_unlock_data_dictionary(trx);
4584 4585 4586 4587 4588 4589

  		trx_free_for_mysql(trx);

 		DBUG_RETURN(error);
 	}

4590 4591
	/* Look for a primary key */

4592 4593
	primary_key_no= (table->s->primary_key != MAX_KEY ?
			 (int) table->s->primary_key : 
unknown's avatar
unknown committed
4594
			 -1);
4595

4596 4597 4598
	/* Our function row_get_mysql_key_number_for_index assumes
	the primary key is always number 0, if it exists */

unknown's avatar
unknown committed
4599
	DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0);
4600

4601 4602
	/* Create the keys */

4603
	if (form->s->keys == 0 || primary_key_no == -1) {
4604 4605
		/* Create an index which is used as the clustered index;
		order the rows by their row id which is internally generated
4606
		by InnoDB */
4607

4608
		error = create_clustered_index_when_no_primary(trx,
4609
							norm_name);
4610
  		if (error) {
unknown's avatar
unknown committed
4611 4612
			innobase_commit_low(trx);

unknown's avatar
unknown committed
4613
			row_mysql_unlock_data_dictionary(trx);
4614

4615 4616 4617 4618
			trx_free_for_mysql(trx);

			DBUG_RETURN(error);
      		}
4619 4620 4621
	}

	if (primary_key_no != -1) {
4622
		/* In InnoDB the clustered index must always be created
4623
		first */
unknown's avatar
unknown committed
4624 4625
	    	if ((error = create_index(trx, form, norm_name,
					  (uint) primary_key_no))) {
unknown's avatar
unknown committed
4626 4627
			innobase_commit_low(trx);

unknown's avatar
unknown committed
4628
			row_mysql_unlock_data_dictionary(trx);
4629 4630 4631 4632 4633 4634 4635

  			trx_free_for_mysql(trx);

			DBUG_RETURN(error);
      		}
      	}

4636
	for (i = 0; i < form->s->keys; i++) {
4637 4638 4639

		if (i != (uint) primary_key_no) {

unknown's avatar
unknown committed
4640
    			if ((error = create_index(trx, form, norm_name, i))) {
4641

unknown's avatar
unknown committed
4642
			  	innobase_commit_low(trx);
4643

unknown's avatar
unknown committed
4644
				row_mysql_unlock_data_dictionary(trx);
4645 4646 4647 4648 4649

  				trx_free_for_mysql(trx);

				DBUG_RETURN(error);
      			}
4650
      		}
4651
  	}
4652

unknown's avatar
unknown committed
4653
	if (current_thd->query != NULL) {
unknown's avatar
unknown committed
4654
		LEX_STRING q;
unknown's avatar
unknown committed
4655

unknown's avatar
unknown committed
4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666
		if (thd->convert_string(&q, system_charset_info,
					current_thd->query,
					current_thd->query_length,
					current_thd->charset())) {
			error = HA_ERR_OUT_OF_MEM;
		} else {
			error = row_table_add_foreign_constraints(trx,
					q.str, norm_name);

			error = convert_error_code_to_mysql(error, NULL);
		}
4667

4668 4669
		if (error) {
			innobase_commit_low(trx);
unknown's avatar
unknown committed
4670

4671
			row_mysql_unlock_data_dictionary(trx);
4672

4673
  			trx_free_for_mysql(trx);
4674

4675 4676
			DBUG_RETURN(error);
		}
4677 4678
	}

unknown's avatar
unknown committed
4679 4680
  	innobase_commit_low(trx);

unknown's avatar
unknown committed
4681
	row_mysql_unlock_data_dictionary(trx);
4682

unknown's avatar
Merge  
unknown committed
4683 4684 4685
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
4686

unknown's avatar
unknown committed
4687
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
4688

4689
	innobase_table = dict_table_get(norm_name, NULL);
4690

unknown's avatar
unknown committed
4691
	DBUG_ASSERT(innobase_table != 0);
4692

4693 4694
	if ((create_info->used_fields & HA_CREATE_USED_AUTO) &&
	   (create_info->auto_increment_value != 0)) {
4695

4696 4697
		/* Query was ALTER TABLE...AUTO_INCREMENT = x; or 
		CREATE TABLE ...AUTO_INCREMENT = x; Find out a table
4698 4699
		definition from the dictionary and get the current value
		of the auto increment field. Set a new value to the
4700 4701
		auto increment field if the value is greater than the
		maximum value in the column. */
4702

4703
		auto_inc_value = create_info->auto_increment_value;
4704 4705 4706
		dict_table_autoinc_initialize(innobase_table, auto_inc_value);
	}

4707
	/* Tell the InnoDB server that there might be work for
4708 4709 4710 4711 4712 4713 4714 4715 4716
	utility threads: */

	srv_active_wake_master_thread();

  	trx_free_for_mysql(trx);

	DBUG_RETURN(0);
}

unknown's avatar
unknown committed
4717 4718 4719 4720 4721 4722 4723 4724 4725 4726
/*********************************************************************
Discards or imports an InnoDB tablespace. */

int
ha_innobase::discard_or_import_tablespace(
/*======================================*/
				/* out: 0 == success, -1 == error */
	my_bool discard)	/* in: TRUE if discard, else import */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
4727
	dict_table_t*	dict_table;
unknown's avatar
unknown committed
4728 4729 4730 4731 4732 4733 4734
	trx_t*		trx;
	int		err;

 	DBUG_ENTER("ha_innobase::discard_or_import_tablespace");

	ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
	ut_a(prebuilt->trx ==
4735
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
4736

4737
	dict_table = prebuilt->table;
unknown's avatar
unknown committed
4738 4739 4740
	trx = prebuilt->trx;

	if (discard) {
4741
		err = row_discard_tablespace_for_mysql(dict_table->name, trx);
unknown's avatar
unknown committed
4742
	} else {
4743
		err = row_import_tablespace_for_mysql(dict_table->name, trx);
unknown's avatar
unknown committed
4744 4745
	}

unknown's avatar
unknown committed
4746
	err = convert_error_code_to_mysql(err, NULL);
unknown's avatar
unknown committed
4747

unknown's avatar
unknown committed
4748
	DBUG_RETURN(err);
unknown's avatar
unknown committed
4749 4750
}

4751 4752 4753 4754 4755 4756 4757 4758 4759 4760 4761 4762 4763 4764 4765 4766 4767 4768 4769 4770 4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786 4787 4788 4789 4790
/*********************************************************************
Deletes all rows of an InnoDB table. */

int
ha_innobase::delete_all_rows(void)
/*==============================*/
				/* out: error number */
{
	row_prebuilt_t*	prebuilt	= (row_prebuilt_t*)innobase_prebuilt;
	int		error;
	trx_t*		trx;
	THD*		thd		= current_thd;

	DBUG_ENTER("ha_innobase::delete_all_rows");

	if (thd->lex->sql_command != SQLCOM_TRUNCATE) {
	fallback:
		/* We only handle TRUNCATE TABLE t as a special case.
		DELETE FROM t will have to use ha_innobase::delete_row(). */
		DBUG_RETURN(my_errno=HA_ERR_WRONG_COMMAND);
	}

	/* Get the transaction associated with the current thd, or create one
	if not yet created */

	trx = check_trx_exists(thd);

	/* Truncate the table in InnoDB */

	error = row_truncate_table_for_mysql(prebuilt->table, trx);
	if (error == DB_ERROR) {
		/* Cannot truncate; resort to ha_innobase::delete_row() */
		goto fallback;
	}

	error = convert_error_code_to_mysql(error, NULL);

	DBUG_RETURN(error);
}

4791
/*********************************************************************
4792
Drops a table from an InnoDB database. Before calling this function,
unknown's avatar
unknown committed
4793 4794
MySQL calls innobase_commit to commit the transaction of the current user.
Then the current user cannot have locks set on the table. Drop table
4795 4796
operation inside InnoDB will remove all locks any user has on the table
inside InnoDB. */
4797 4798 4799 4800

int
ha_innobase::delete_table(
/*======================*/
unknown's avatar
unknown committed
4801 4802
				/* out: error number */
	const char*	name)	/* in: table name */
4803 4804 4805
{
	ulint	name_len;
	int	error;
unknown's avatar
unknown committed
4806
	trx_t*	parent_trx;
4807
	trx_t*	trx;
unknown's avatar
unknown committed
4808
	THD     *thd= current_thd;
4809
	char	norm_name[1000];
unknown's avatar
unknown committed
4810

unknown's avatar
unknown committed
4811
 	DBUG_ENTER("ha_innobase::delete_table");
4812

unknown's avatar
unknown committed
4813 4814 4815
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
unknown's avatar
unknown committed
4816
	parent_trx = check_trx_exists(current_thd);
unknown's avatar
unknown committed
4817 4818 4819 4820 4821 4822

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

unknown's avatar
unknown committed
4823 4824 4825 4826 4827 4828
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

4829 4830
	trx = trx_allocate_for_mysql();

unknown's avatar
unknown committed
4831 4832
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
unknown's avatar
unknown committed
4833

unknown's avatar
unknown committed
4834 4835 4836 4837 4838 4839 4840 4841
	if (thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

	if (thd->options & OPTION_RELAXED_UNIQUE_CHECKS) {
		trx->check_unique_secondary = FALSE;
	}

4842 4843 4844
	name_len = strlen(name);

	assert(name_len < 1000);
4845

4846 4847
	/* Strangely, MySQL passes the table name without the '.frm'
	extension, in contrast to ::create */
4848

4849 4850
	normalize_table_name(norm_name, name);

4851
  	/* Drop the table in InnoDB */
4852

4853
	error = row_drop_table_for_mysql(norm_name, trx,
unknown's avatar
unknown committed
4854
		thd->lex->sql_command == SQLCOM_DROP_DB);
4855

unknown's avatar
Merge  
unknown committed
4856 4857 4858
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
4859

unknown's avatar
unknown committed
4860
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
4861

4862
	/* Tell the InnoDB server that there might be work for
4863 4864 4865 4866
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
4867
  	innobase_commit_low(trx);
unknown's avatar
unknown committed
4868

4869 4870
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
4871
	error = convert_error_code_to_mysql(error, NULL);
4872 4873 4874 4875

	DBUG_RETURN(error);
}

4876 4877 4878 4879 4880 4881 4882 4883 4884 4885 4886 4887 4888
/*********************************************************************
Removes all tables in the named database inside InnoDB. */

int
innobase_drop_database(
/*===================*/
			/* out: error number */
	char*	path)	/* in: database path; inside InnoDB the name
			of the last directory in the path is used as
			the database name: for example, in 'mysql/data/test'
			the database name is 'test' */
{
	ulint	len		= 0;
unknown's avatar
unknown committed
4889
	trx_t*	parent_trx;
4890 4891 4892
	trx_t*	trx;
	char*	ptr;
	int	error;
4893
	char*	namebuf;
unknown's avatar
unknown committed
4894

unknown's avatar
unknown committed
4895 4896 4897
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
unknown's avatar
unknown committed
4898
	parent_trx = check_trx_exists(current_thd);
unknown's avatar
unknown committed
4899 4900 4901 4902 4903 4904

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

4905
	ptr = strend(path) - 2;
unknown's avatar
unknown committed
4906

4907 4908 4909 4910 4911 4912
	while (ptr >= path && *ptr != '\\' && *ptr != '/') {
		ptr--;
		len++;
	}

	ptr++;
unknown's avatar
unknown committed
4913
	namebuf = my_malloc((uint) len + 2, MYF(0));
4914 4915 4916 4917

	memcpy(namebuf, ptr, len);
	namebuf[len] = '/';
	namebuf[len + 1] = '\0';
unknown's avatar
unknown committed
4918
#ifdef  __WIN__
4919
	innobase_casedn_str(namebuf);
unknown's avatar
unknown committed
4920
#endif
4921
	trx = trx_allocate_for_mysql();
unknown's avatar
unknown committed
4922 4923
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
4924

unknown's avatar
unknown committed
4925 4926 4927 4928
	if (current_thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

4929
  	error = row_drop_database_for_mysql(namebuf, trx);
4930
	my_free(namebuf, MYF(0));
4931

unknown's avatar
Merge  
unknown committed
4932 4933 4934
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
4935

unknown's avatar
unknown committed
4936
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
4937

4938 4939 4940 4941 4942
	/* Tell the InnoDB server that there might be work for
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
4943
  	innobase_commit_low(trx);
4944 4945
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
4946
	error = convert_error_code_to_mysql(error, NULL);
4947 4948 4949 4950

	return(error);
}

4951
/*************************************************************************
4952
Renames an InnoDB table. */
4953 4954 4955 4956 4957 4958 4959 4960 4961 4962 4963

int
ha_innobase::rename_table(
/*======================*/
				/* out: 0 or error code */
	const char*	from,	/* in: old name of the table */
	const char*	to)	/* in: new name of the table */
{
	ulint	name_len1;
	ulint	name_len2;
	int	error;
unknown's avatar
unknown committed
4964
	trx_t*	parent_trx;
4965
	trx_t*	trx;
4966 4967
	char	norm_from[1000];
	char	norm_to[1000];
4968

4969 4970
  	DBUG_ENTER("ha_innobase::rename_table");

unknown's avatar
unknown committed
4971 4972 4973
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
	
unknown's avatar
unknown committed
4974
	parent_trx = check_trx_exists(current_thd);
unknown's avatar
unknown committed
4975 4976 4977 4978 4979 4980

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(parent_trx);	

unknown's avatar
unknown committed
4981 4982 4983 4984 4985 4986
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

4987
	trx = trx_allocate_for_mysql();
unknown's avatar
unknown committed
4988 4989
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
4990

4991 4992 4993 4994
	if (current_thd->options & OPTION_NO_FOREIGN_KEY_CHECKS) {
		trx->check_foreigns = FALSE;
	}

4995 4996 4997 4998 4999
	name_len1 = strlen(from);
	name_len2 = strlen(to);

	assert(name_len1 < 1000);
	assert(name_len2 < 1000);
5000

5001 5002 5003
	normalize_table_name(norm_from, from);
	normalize_table_name(norm_to, to);

5004
  	/* Rename the table in InnoDB */
5005

5006
  	error = row_rename_table_for_mysql(norm_from, norm_to, trx);
5007

unknown's avatar
Merge  
unknown committed
5008 5009 5010
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
unknown's avatar
unknown committed
5011

unknown's avatar
unknown committed
5012
	log_buffer_flush_to_disk();
unknown's avatar
Merge  
unknown committed
5013

5014
	/* Tell the InnoDB server that there might be work for
5015 5016 5017 5018
	utility threads: */

	srv_active_wake_master_thread();

unknown's avatar
unknown committed
5019
  	innobase_commit_low(trx);
5020 5021
  	trx_free_for_mysql(trx);

unknown's avatar
unknown committed
5022
	error = convert_error_code_to_mysql(error, NULL);
5023 5024 5025 5026 5027 5028 5029 5030 5031 5032

	DBUG_RETURN(error);
}

/*************************************************************************
Estimates the number of index records in a range. */

ha_rows
ha_innobase::records_in_range(
/*==========================*/
unknown's avatar
unknown committed
5033 5034
						/* out: estimated number of
						rows */
unknown's avatar
unknown committed
5035 5036 5037 5038 5039
	uint 			keynr,		/* in: index number */
        key_range		*min_key,	/* in: start key value of the
                                                   range, may also be 0 */
	key_range		*max_key)	/* in: range end key val, may
                                                   also be 0 */
5040 5041 5042 5043
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	KEY*		key;
	dict_index_t*	index;
5044
	mysql_byte*	key_val_buff2 	= (mysql_byte*) my_malloc(
5045
						  table->s->reclength
unknown's avatar
unknown committed
5046
      					+ table->s->max_key_length + 100,
5047
								MYF(MY_WME));
5048
	ulint		buff2_len = table->s->reclength
unknown's avatar
unknown committed
5049
      					+ table->s->max_key_length + 100;
5050
	dtuple_t*	range_start;
5051
	dtuple_t*	range_end;
unknown's avatar
unknown committed
5052
	ib_longlong	n_rows;
5053 5054
	ulint		mode1;
	ulint		mode2;
5055 5056
	void*           heap1;
	void*           heap2;
5057

5058
   	DBUG_ENTER("records_in_range");
5059

unknown's avatar
unknown committed
5060 5061
	prebuilt->trx->op_info = (char*)"estimating records in index range";

unknown's avatar
unknown committed
5062 5063 5064 5065
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
unknown's avatar
unknown committed
5066

5067 5068 5069
	active_index = keynr;

	key = table->key_info + active_index;
5070

5071
	index = dict_table_get_index_noninline(prebuilt->table, key->name);
5072

5073
	range_start = dtuple_create_for_mysql(&heap1, key->key_parts);
5074
 	dict_index_copy_types(range_start, index, key->key_parts);
5075

5076
	range_end = dtuple_create_for_mysql(&heap2, key->key_parts);
5077
 	dict_index_copy_types(range_end, index, key->key_parts);
5078

5079
	row_sel_convert_mysql_key_to_innobase(
unknown's avatar
unknown committed
5080 5081 5082
				range_start, (byte*) key_val_buff,
				(ulint)upd_and_key_val_buff_len,
				index,
unknown's avatar
unknown committed
5083 5084
				(byte*) (min_key ? min_key->key :
                                         (const mysql_byte*) 0),
5085 5086
				(ulint) (min_key ? min_key->length : 0),
				prebuilt->trx);
5087

5088
	row_sel_convert_mysql_key_to_innobase(
unknown's avatar
unknown committed
5089 5090
				range_end, (byte*) key_val_buff2,
				buff2_len, index,
unknown's avatar
unknown committed
5091 5092
				(byte*) (max_key ? max_key->key :
                                         (const mysql_byte*) 0),
5093 5094
				(ulint) (max_key ? max_key->length : 0),
				prebuilt->trx);
unknown's avatar
unknown committed
5095 5096 5097 5098 5099

	mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
                                                HA_READ_KEY_EXACT);
	mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
                                                HA_READ_KEY_EXACT);
5100

5101
	n_rows = btr_estimate_n_rows_in_range(index, range_start,
5102
						mode1, range_end, mode2);
5103 5104
	dtuple_free_for_mysql(heap1);
	dtuple_free_for_mysql(heap2);
5105

5106 5107
    	my_free((char*) key_val_buff2, MYF(0));

unknown's avatar
unknown committed
5108 5109
	prebuilt->trx->op_info = (char*)"";

unknown's avatar
unknown committed
5110 5111 5112 5113 5114 5115 5116 5117 5118 5119
	/* The MySQL optimizer seems to believe an estimate of 0 rows is
	always accurate and may return the result 'Empty set' based on that.
	The accuracy is not guaranteed, and even if it were, for a locking
	read we should anyway perform the search to set the next-key lock.
	Add 1 to the value to make sure MySQL does not make the assumption! */

	if (n_rows == 0) {
	        n_rows = 1;
	}

5120 5121 5122
	DBUG_RETURN((ha_rows) n_rows);
}

5123 5124
/*************************************************************************
Gives an UPPER BOUND to the number of rows in a table. This is used in
unknown's avatar
unknown committed
5125
filesort.cc. */
5126 5127

ha_rows
unknown's avatar
unknown committed
5128
ha_innobase::estimate_rows_upper_bound(void)
5129
/*======================================*/
5130
			/* out: upper bound of rows */
5131 5132
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
5133 5134
	dict_index_t*	index;
	ulonglong	estimate;
5135
	ulonglong	local_data_file_length;
unknown's avatar
unknown committed
5136

unknown's avatar
unknown committed
5137
 	DBUG_ENTER("estimate_rows_upper_bound");
5138

unknown's avatar
unknown committed
5139 5140 5141 5142 5143 5144
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

unknown's avatar
unknown committed
5145 5146 5147
	prebuilt->trx->op_info = (char*)
	                         "calculating upper bound for table rows";

unknown's avatar
unknown committed
5148 5149 5150 5151
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
5152

5153
	index = dict_table_get_first_index_noninline(prebuilt->table);
unknown's avatar
unknown committed
5154

5155
	local_data_file_length = ((ulonglong) index->stat_n_leaf_pages)
5156
    							* UNIV_PAGE_SIZE;
5157

unknown's avatar
unknown committed
5158 5159
	/* Calculate a minimum length for a clustered index record and from
	that an upper bound for the number of rows. Since we only calculate
unknown's avatar
unknown committed
5160 5161
	new statistics in row0mysql.c when a table has grown by a threshold
	factor, we must add a safety factor 2 in front of the formula below. */
unknown's avatar
unknown committed
5162

unknown's avatar
unknown committed
5163 5164
	estimate = 2 * local_data_file_length /
					 dict_index_calc_min_rec_len(index);
unknown's avatar
unknown committed
5165

unknown's avatar
unknown committed
5166 5167
	prebuilt->trx->op_info = (char*)"";

unknown's avatar
unknown committed
5168
	DBUG_RETURN((ha_rows) estimate);
5169 5170
}

5171 5172 5173 5174 5175 5176 5177 5178 5179 5180 5181 5182
/*************************************************************************
How many seeks it will take to read through the table. This is to be
comparable to the number returned by records_in_range so that we can
decide if we should scan the table or use keys. */

double
ha_innobase::scan_time()
/*====================*/
			/* out: estimated time measured in disk seeks */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;

unknown's avatar
unknown committed
5183 5184 5185 5186 5187 5188
	/* Since MySQL seems to favor table scans too much over index
	searches, we pretend that a sequential read takes the same time
	as a random disk read, that is, we do not divide the following
	by 10, which would be physically realistic. */
	
	return((double) (prebuilt->table->stat_clustered_index_size));
5189 5190
}

unknown's avatar
unknown committed
5191 5192 5193
/**********************************************************************
Calculate the time it takes to read a set of ranges through an index
This enables us to optimise reads for clustered indexes. */
unknown's avatar
unknown committed
5194

unknown's avatar
unknown committed
5195 5196 5197 5198 5199 5200 5201
double
ha_innobase::read_time(
/*===================*/
			/* out: estimated time measured in disk seeks */
	uint    index,	/* in: key number */
	uint	ranges,	/* in: how many ranges */
	ha_rows rows)	/* in: estimated number of rows in the ranges */
unknown's avatar
unknown committed
5202
{
unknown's avatar
unknown committed
5203 5204 5205
	ha_rows total_rows;
	double  time_for_scan;
  
unknown's avatar
unknown committed
5206 5207 5208 5209
	if (index != table->s->primary_key) {
		/* Not clustered */		
	  	return(handler::read_time(index, ranges, rows));
	}
unknown's avatar
unknown committed
5210

unknown's avatar
unknown committed
5211
	if (rows <= 2) {
unknown's avatar
unknown committed
5212

unknown's avatar
unknown committed
5213 5214
		return((double) rows);
	}
unknown's avatar
unknown committed
5215 5216 5217 5218

	/* Assume that the read time is proportional to the scan time for all
	rows + at most one seek per range. */

unknown's avatar
unknown committed
5219
	time_for_scan = scan_time();
unknown's avatar
unknown committed
5220

unknown's avatar
unknown committed
5221
	if ((total_rows = estimate_rows_upper_bound()) < rows) {
unknown's avatar
unknown committed
5222

unknown's avatar
unknown committed
5223 5224
	  	return(time_for_scan);
	}
unknown's avatar
unknown committed
5225

unknown's avatar
unknown committed
5226
	return(ranges + (double) rows / (double) total_rows * time_for_scan);
unknown's avatar
unknown committed
5227 5228
}

5229 5230 5231 5232 5233 5234 5235 5236 5237 5238 5239 5240
/*************************************************************************
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object. */

void
ha_innobase::info(
/*==============*/
	uint flag)	/* in: what information MySQL requests */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	dict_table_t*	ib_table;
	dict_index_t*	index;
5241
	ha_rows		rec_per_key;
5242 5243
	ulong		j;
	ulong		i;
5244 5245
	char		path[FN_REFLEN];
	os_file_stat_t  stat_info;
5246

5247 5248
 	DBUG_ENTER("info");

unknown's avatar
unknown committed
5249 5250 5251 5252 5253 5254
        /* If we are forcing recovery at a high level, we will suppress
	statistics calculation on tables, because that may crash the
	server if an index is badly corrupted. */

        if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {

5255
                DBUG_VOID_RETURN;
unknown's avatar
unknown committed
5256 5257
        }

unknown's avatar
unknown committed
5258 5259 5260 5261 5262 5263 5264 5265 5266
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

unknown's avatar
unknown committed
5267 5268
	prebuilt->trx->op_info = (char*)"returning various info to MySQL";

unknown's avatar
unknown committed
5269
	trx_search_latch_release_if_reserved(prebuilt->trx);
unknown's avatar
unknown committed
5270

5271 5272 5273 5274 5275 5276
 	ib_table = prebuilt->table;

 	if (flag & HA_STATUS_TIME) {
 		/* In sql_show we call with this flag: update then statistics
 		so that they are up-to-date */

unknown's avatar
unknown committed
5277 5278
	        prebuilt->trx->op_info = (char*)"updating table statistics";

5279
 		dict_update_statistics(ib_table);
unknown's avatar
unknown committed
5280 5281 5282

		prebuilt->trx->op_info = (char*)
		                          "returning various info to MySQL";
5283 5284 5285 5286 5287 5288 5289 5290 5291 5292 5293 5294 5295 5296

		if (ib_table->space != 0) {
			my_snprintf(path, sizeof(path), "%s/%s%s",
				    mysql_data_home, ib_table->name,
				    ".ibd");
			unpack_filename(path,path);
		} else {
			my_snprintf(path, sizeof(path), "%s/%s%s", 
				    mysql_data_home, ib_table->name,
				    reg_ext);
		
			unpack_filename(path,path);
		}

5297 5298 5299
		/* Note that we do not know the access time of the table, 
		nor the CHECK TABLE time, nor the UPDATE or INSERT time. */

5300 5301 5302
		if (os_file_get_status(path,&stat_info)) {
			create_time = stat_info.ctime;
		}
5303 5304 5305
 	}

	if (flag & HA_STATUS_VARIABLE) {
5306
    		records = (ha_rows)ib_table->stat_n_rows;
5307 5308 5309 5310 5311 5312 5313 5314 5315 5316 5317 5318 5319
    		deleted = 0;
    		data_file_length = ((ulonglong)
				ib_table->stat_clustered_index_size)
    					* UNIV_PAGE_SIZE;
    		index_file_length = ((ulonglong)
				ib_table->stat_sum_of_other_index_sizes)
    					* UNIV_PAGE_SIZE;
    		delete_length = 0;
    		check_time = 0;

    		if (records == 0) {
    			mean_rec_length = 0;
    		} else {
5320
    			mean_rec_length = (ulong) (data_file_length / records);
5321 5322 5323 5324 5325 5326 5327 5328 5329
    		}
    	}

	if (flag & HA_STATUS_CONST) {
		index = dict_table_get_first_index_noninline(ib_table);

		if (prebuilt->clust_index_was_generated) {
			index = dict_table_get_next_index_noninline(index);
		}
5330

5331
		for (i = 0; i < table->s->keys; i++) {
unknown's avatar
unknown committed
5332 5333 5334 5335 5336 5337 5338 5339 5340 5341 5342
			if (index == NULL) {
				ut_print_timestamp(stderr);
			        fprintf(stderr,
"  InnoDB: Error: table %s contains less indexes inside InnoDB\n"
"InnoDB: than are defined in the MySQL .frm file. Have you mixed up\n"
"InnoDB: .frm files from different installations? See section\n"
"InnoDB: 15.1 at http://www.innodb.com/ibman.html\n",
				   ib_table->name);
				break;
			}

5343 5344
			for (j = 0; j < table->key_info[i].key_parts; j++) {

unknown's avatar
unknown committed
5345 5346 5347 5348 5349 5350 5351 5352
				if (j + 1 > index->n_uniq) {
				        ut_print_timestamp(stderr);
			                fprintf(stderr,
"  InnoDB: Error: index %s of %s has %lu columns unique inside InnoDB\n"
"InnoDB: but MySQL is asking statistics for %lu columns. Have you mixed up\n"
"InnoDB: .frm files from different installations? See section\n"
"InnoDB: 15.1 at http://www.innodb.com/ibman.html\n",
						index->name,
5353 5354
						ib_table->name,
						(unsigned long) index->n_uniq,
unknown's avatar
unknown committed
5355 5356 5357 5358
						j + 1);
				        break;
				}

5359 5360 5361 5362
				if (index->stat_n_diff_key_vals[j + 1] == 0) {

					rec_per_key = records;
				} else {
5363
					rec_per_key = (ha_rows)(records /
5364 5365 5366
   				         index->stat_n_diff_key_vals[j + 1]);
				}

unknown's avatar
unknown committed
5367 5368 5369 5370 5371 5372 5373
				/* Since MySQL seems to favor table scans
				too much over index searches, we pretend
				index selectivity is 2 times better than
				our estimate: */

				rec_per_key = rec_per_key / 2;

5374 5375 5376
				if (rec_per_key == 0) {
					rec_per_key = 1;
				}
unknown's avatar
unknown committed
5377

5378 5379 5380
 				table->key_info[i].rec_per_key[j]=
				  rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
				  rec_per_key;
5381
			}
unknown's avatar
unknown committed
5382

5383
			index = dict_table_get_next_index_noninline(index);
5384 5385
		}
	}
5386 5387

  	if (flag & HA_STATUS_ERRKEY) {
unknown's avatar
unknown committed
5388 5389
		ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);

5390
		errkey = (unsigned int) row_get_mysql_key_number_for_index(
5391 5392
				       (dict_index_t*)
				       trx_get_error_info(prebuilt->trx));
5393 5394
  	}

unknown's avatar
unknown committed
5395 5396
	prebuilt->trx->op_info = (char*)"";

5397 5398 5399
  	DBUG_VOID_RETURN;
}

unknown's avatar
unknown committed
5400
/**************************************************************************
unknown's avatar
unknown committed
5401 5402
Updates index cardinalities of the table, based on 8 random dives into
each index tree. This does NOT calculate exact statistics on the table. */
unknown's avatar
unknown committed
5403 5404 5405 5406 5407 5408 5409 5410 5411 5412 5413 5414 5415 5416

int
ha_innobase::analyze(
/*=================*/			 
					/* out: returns always 0 (success) */
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
{
	/* Simply call ::info() with all the flags */
	info(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE);

	return(0);
}

unknown's avatar
unknown committed
5417
/**************************************************************************
5418 5419
This is mapped to "ALTER TABLE tablename TYPE=InnoDB", which rebuilds
the table in MySQL. */
5420

unknown's avatar
unknown committed
5421 5422 5423 5424 5425
int
ha_innobase::optimize(
/*==================*/
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
5426
{
5427
        return(HA_ADMIN_TRY_ALTER);
5428 5429
}

unknown's avatar
unknown committed
5430 5431 5432 5433 5434 5435 5436 5437 5438 5439 5440 5441 5442 5443 5444 5445
/***********************************************************************
Tries to check that an InnoDB table is not corrupted. If corruption is
noticed, prints to stderr information about it. In case of corruption
may also assert a failure and crash the server. */

int
ha_innobase::check(
/*===============*/
					/* out: HA_ADMIN_CORRUPT or
					HA_ADMIN_OK */
	THD* 		thd,		/* in: user thread handle */
	HA_CHECK_OPT* 	check_opt)	/* in: check options, currently
					ignored */
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	ulint		ret;
unknown's avatar
unknown committed
5446

unknown's avatar
unknown committed
5447
	ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
unknown's avatar
unknown committed
5448
	ut_a(prebuilt->trx ==
5449
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
5450

unknown's avatar
unknown committed
5451 5452 5453 5454 5455 5456 5457 5458 5459 5460 5461 5462
	if (prebuilt->mysql_template == NULL) {
		/* Build the template; we will use a dummy template
		in index scans done in checking */

		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}

	ret = row_check_table_for_mysql(prebuilt);

	if (ret == DB_SUCCESS) {
		return(HA_ADMIN_OK);
	}
unknown's avatar
unknown committed
5463

unknown's avatar
unknown committed
5464 5465 5466
  	return(HA_ADMIN_CORRUPT); 
}

5467
/*****************************************************************
unknown's avatar
Merge  
unknown committed
5468 5469 5470
Adds information about free space in the InnoDB tablespace to a table comment
which is printed out when a user calls SHOW TABLE STATUS. Adds also info on
foreign keys. */
5471 5472 5473 5474

char*
ha_innobase::update_table_comment(
/*==============================*/
unknown's avatar
Merge  
unknown committed
5475 5476 5477
				/* out: table comment + InnoDB free space +
				info on foreign keys */
        const char*	comment)/* in: table comment defined by user */
5478
{
unknown's avatar
unknown committed
5479
	uint	length			= (uint) strlen(comment);
5480 5481
	char*				str;
	row_prebuilt_t*	prebuilt	= (row_prebuilt_t*)innobase_prebuilt;
5482

unknown's avatar
unknown committed
5483 5484 5485 5486
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

5487 5488 5489 5490
	if(length > 64000 - 3) {
		return((char*)comment); /* string too long */
	}

unknown's avatar
unknown committed
5491 5492
	update_thd(current_thd);

unknown's avatar
unknown committed
5493 5494
	prebuilt->trx->op_info = (char*)"returning table comment";

unknown's avatar
unknown committed
5495 5496 5497 5498
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
5499
	str = NULL;
unknown's avatar
unknown committed
5500

5501
	if (FILE* file = os_file_create_tmpfile()) {
5502
		long	flen;
unknown's avatar
Merge  
unknown committed
5503

5504 5505
		/* output the data to a temporary file */
		fprintf(file, "InnoDB free: %lu kB",
unknown's avatar
unknown committed
5506 5507
      		   (ulong) fsp_get_available_space_in_free_extents(
      					prebuilt->table->space));
5508

5509 5510
		dict_print_info_on_foreign_keys(FALSE, file,
				prebuilt->trx, prebuilt->table);
5511
		flen = ftell(file);
5512 5513 5514
		if (flen < 0) {
			flen = 0;
		} else if (length + flen + 3 > 64000) {
5515 5516
			flen = 64000 - 3 - length;
		}
5517

5518 5519
		/* allocate buffer for the full string, and
		read the contents of the temporary file */
5520

5521
		str = my_malloc(length + flen + 3, MYF(0));
5522

5523 5524 5525 5526 5527 5528 5529 5530
		if (str) {
			char* pos	= str + length;
			if(length) {
				memcpy(str, comment, length);
				*pos++ = ';';
				*pos++ = ' ';
			}
			rewind(file);
unknown's avatar
unknown committed
5531
			flen = (uint) fread(pos, 1, flen, file);
5532 5533 5534 5535
			pos[flen] = 0;
		}

		fclose(file);
unknown's avatar
unknown committed
5536
	}
unknown's avatar
unknown committed
5537

unknown's avatar
unknown committed
5538 5539
        prebuilt->trx->op_info = (char*)"";

5540
  	return(str ? str : (char*) comment);
5541 5542
}

unknown's avatar
unknown committed
5543 5544 5545 5546 5547 5548 5549 5550 5551 5552 5553
/***********************************************************************
Gets the foreign key create info for a table stored in InnoDB. */

char*
ha_innobase::get_foreign_key_create_info(void)
/*==========================================*/
			/* out, own: character string in the form which
			can be inserted to the CREATE TABLE statement,
			MUST be freed with ::free_foreign_key_create_info */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
5554
	char*	str	= 0;
unknown's avatar
unknown committed
5555

unknown's avatar
unknown committed
5556
	ut_a(prebuilt != NULL);
5557

unknown's avatar
unknown committed
5558 5559 5560 5561 5562 5563
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(current_thd);

5564
	if (FILE* file = os_file_create_tmpfile()) {
5565
		long	flen;
unknown's avatar
unknown committed
5566

5567
		prebuilt->trx->op_info = (char*)"getting info on foreign keys";
unknown's avatar
unknown committed
5568

5569 5570 5571
		/* In case MySQL calls this in the middle of a SELECT query,
		release possible adaptive hash latch to avoid
		deadlocks of threads */
5572

5573
		trx_search_latch_release_if_reserved(prebuilt->trx);
unknown's avatar
unknown committed
5574

5575
		/* output the data to a temporary file */
5576 5577
		dict_print_info_on_foreign_keys(TRUE, file,
				prebuilt->trx, prebuilt->table);
5578 5579 5580
		prebuilt->trx->op_info = (char*)"";

		flen = ftell(file);
5581 5582 5583
		if (flen < 0) {
			flen = 0;
		} else if(flen > 64000 - 1) {
5584 5585 5586 5587 5588 5589 5590 5591 5592 5593
			flen = 64000 - 1;
		}

		/* allocate buffer for the string, and
		read the contents of the temporary file */

		str = my_malloc(flen + 1, MYF(0));

		if (str) {
			rewind(file);
unknown's avatar
unknown committed
5594
			flen = (uint) fread(str, 1, flen, file);
5595 5596 5597 5598 5599 5600
			str[flen] = 0;
		}

		fclose(file);
	} else {
		/* unable to create temporary file */
unknown's avatar
unknown committed
5601
          	str = my_malloc(1, MYF(MY_ZEROFILL));
5602
	}
unknown's avatar
unknown committed
5603

unknown's avatar
Merge  
unknown committed
5604
  	return(str);
unknown's avatar
unknown committed
5605
}
unknown's avatar
unknown committed
5606

5607 5608 5609 5610 5611 5612 5613 5614 5615 5616 5617 5618

int 
ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
{
  dict_foreign_t* foreign;

  DBUG_ENTER("get_foreign_key_list");
  row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
  ut_a(prebuilt != NULL);
  update_thd(current_thd);
  prebuilt->trx->op_info = (char*)"getting list of foreign keys";
  trx_search_latch_release_if_reserved(prebuilt->trx);
5619
  mutex_enter_noninline(&(dict_sys->mutex));
5620 5621 5622 5623 5624 5625 5626 5627 5628 5629 5630 5631 5632 5633
  foreign = UT_LIST_GET_FIRST(prebuilt->table->foreign_list);

  while (foreign != NULL) 
  {
    uint i;
    FOREIGN_KEY_INFO f_key_info;
    LEX_STRING *name= 0;
    const char *tmp_buff;

    tmp_buff= foreign->id;
    i= 0;
    while (tmp_buff[i] != '/')
      i++;
    tmp_buff+= i + 1;
unknown's avatar
unknown committed
5634 5635
    f_key_info.forein_id= make_lex_string(thd, 0, tmp_buff,
                                          (uint) strlen(tmp_buff), 1);
5636 5637 5638 5639
    tmp_buff= foreign->referenced_table_name;
    i= 0;
    while (tmp_buff[i] != '/')
      i++;
5640
    f_key_info.referenced_db= make_lex_string(thd, 0,
5641 5642
                                              tmp_buff, i, 1);
    tmp_buff+= i + 1;
unknown's avatar
unknown committed
5643 5644
    f_key_info.referenced_table= make_lex_string(thd, 0, tmp_buff, 
                                               (uint) strlen(tmp_buff), 1);
5645 5646 5647 5648

    for (i= 0;;)
    {
      tmp_buff= foreign->foreign_col_names[i];
unknown's avatar
unknown committed
5649
      name= make_lex_string(thd, name, tmp_buff, (uint) strlen(tmp_buff), 1);
5650 5651
      f_key_info.foreign_fields.push_back(name);
      tmp_buff= foreign->referenced_col_names[i];
unknown's avatar
unknown committed
5652
      name= make_lex_string(thd, name, tmp_buff, (uint) strlen(tmp_buff), 1);
5653 5654 5655 5656 5657 5658 5659 5660 5661 5662 5663 5664 5665 5666 5667 5668 5669 5670 5671 5672 5673 5674 5675 5676 5677 5678 5679 5680 5681 5682 5683 5684 5685 5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697 5698
      f_key_info.referenced_fields.push_back(name);
      if (++i >= foreign->n_fields)
        break;
    }

    ulong length= 0;
    if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE)
    {
      length=17;
      tmp_buff= "ON DELETE CASCADE";
    }	
    else if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL)
    {
      length=18;
      tmp_buff= "ON DELETE SET NULL";
    }
    else if (foreign->type == DICT_FOREIGN_ON_DELETE_NO_ACTION)
    {
      length=19;
      tmp_buff= "ON DELETE NO ACTION";
    }
    else if (foreign->type == DICT_FOREIGN_ON_UPDATE_CASCADE)
    {
      length=17;
      tmp_buff= "ON UPDATE CASCADE";
    }
    else if (foreign->type == DICT_FOREIGN_ON_UPDATE_SET_NULL)
    {
      length=18;
      tmp_buff= "ON UPDATE SET NULL";
    }
    else if (foreign->type == DICT_FOREIGN_ON_UPDATE_NO_ACTION)
    {
      length=19;
      tmp_buff= "ON UPDATE NO ACTION";
    }
    f_key_info.constraint_method= make_lex_string(thd,
                                                  f_key_info.constraint_method,
                                                  tmp_buff, length, 1);

    FOREIGN_KEY_INFO *pf_key_info= ((FOREIGN_KEY_INFO *) 
                                    thd->memdup((gptr) &f_key_info,
                                                sizeof(FOREIGN_KEY_INFO)));
    f_key_list->push_back(pf_key_info);
    foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
  }
5699
  mutex_exit_noninline(&(dict_sys->mutex));
5700 5701 5702 5703
  prebuilt->trx->op_info = (char*)"";
  DBUG_RETURN(0);
}

5704 5705 5706 5707 5708 5709 5710 5711 5712 5713 5714 5715 5716 5717 5718 5719 5720 5721 5722 5723 5724 5725 5726 5727 5728 5729
/*********************************************************************
Checks if ALTER TABLE may change the storage engine of the table.
Changing storage engines is not allowed for tables for which there
are foreign key constraints (parent or child tables). */

bool
ha_innobase::can_switch_engines(void)
/*=================================*/
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
	bool	can_switch;

 	DBUG_ENTER("ha_innobase::can_switch_engines");
	prebuilt->trx->op_info =
			"determining if there are foreign key constraints";
	row_mysql_lock_data_dictionary(prebuilt->trx);

	can_switch = !UT_LIST_GET_FIRST(prebuilt->table->referenced_list)
			&& !UT_LIST_GET_FIRST(prebuilt->table->foreign_list);

	row_mysql_unlock_data_dictionary(prebuilt->trx);
	prebuilt->trx->op_info = "";

	DBUG_RETURN(can_switch);
}

unknown's avatar
unknown committed
5730 5731 5732 5733 5734 5735 5736 5737 5738 5739 5740 5741 5742 5743 5744 5745 5746 5747 5748 5749
/***********************************************************************
Checks if a table is referenced by a foreign key. The MySQL manual states that
a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
delete is then allowed internally to resolve a duplicate key conflict in
REPLACE, not an update. */

uint
ha_innobase::referenced_by_foreign_key(void)
/*========================================*/
			/* out: > 0 if referenced by a FOREIGN KEY */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;

	if (dict_table_referenced_by_foreign_key(prebuilt->table)) {

		return(1);
	}

	return(0);
}
unknown's avatar
unknown committed
5750 5751 5752 5753 5754 5755 5756 5757 5758 5759 5760

/***********************************************************************
Frees the foreign key create info for a table stored in InnoDB, if it is
non-NULL. */

void
ha_innobase::free_foreign_key_create_info(
/*======================================*/
	char*	str)	/* in, own: create info string to free  */
{
	if (str) {
5761
		my_free(str, MYF(0));
unknown's avatar
unknown committed
5762
	}
5763 5764
}

unknown's avatar
unknown committed
5765 5766 5767 5768 5769 5770 5771 5772
/***********************************************************************
Tells something additional to the handler about how to do things. */

int
ha_innobase::extra(
/*===============*/
			   /* out: 0 or error number */
	enum ha_extra_function operation)
unknown's avatar
unknown committed
5773 5774
                           /* in: HA_EXTRA_RETRIEVE_ALL_COLS or some
			   other flag */
unknown's avatar
unknown committed
5775 5776 5777 5778 5779 5780 5781 5782
{
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;

	/* Warning: since it is not sure that MySQL calls external_lock
	before calling this function, the trx field in prebuilt can be
	obsolete! */

	switch (operation) {
unknown's avatar
unknown committed
5783 5784 5785 5786 5787 5788 5789 5790 5791
                case HA_EXTRA_FLUSH:
                        if (prebuilt->blob_heap) {
                                row_mysql_prebuilt_free_blob_heap(prebuilt);
                        }
                        break;
                case HA_EXTRA_RESET:
                        if (prebuilt->blob_heap) {
                                row_mysql_prebuilt_free_blob_heap(prebuilt);
                        }
5792
                        prebuilt->keep_other_fields_on_keyread = 0;
unknown's avatar
unknown committed
5793 5794 5795
                        prebuilt->read_just_key = 0;
                        break;
  		case HA_EXTRA_RESET_STATE:
5796
	        	prebuilt->keep_other_fields_on_keyread = 0;
unknown's avatar
unknown committed
5797
	        	prebuilt->read_just_key = 0;
unknown's avatar
unknown committed
5798
    	        	break;
unknown's avatar
unknown committed
5799 5800 5801
		case HA_EXTRA_NO_KEYREAD:
    			prebuilt->read_just_key = 0;
    			break;
unknown's avatar
unknown committed
5802
	        case HA_EXTRA_RETRIEVE_ALL_COLS:
unknown's avatar
unknown committed
5803 5804 5805 5806 5807 5808 5809 5810
			prebuilt->hint_need_to_fetch_extra_cols
					= ROW_RETRIEVE_ALL_COLS;
			break;
	        case HA_EXTRA_RETRIEVE_PRIMARY_KEY:
			if (prebuilt->hint_need_to_fetch_extra_cols == 0) {
				prebuilt->hint_need_to_fetch_extra_cols
					= ROW_RETRIEVE_PRIMARY_KEY;
			}
unknown's avatar
unknown committed
5811 5812 5813 5814
			break;
	        case HA_EXTRA_KEYREAD:
	        	prebuilt->read_just_key = 1;
	        	break;
5815 5816 5817
		case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
			prebuilt->keep_other_fields_on_keyread = 1;
			break;
unknown's avatar
unknown committed
5818 5819 5820 5821 5822 5823 5824
		default:/* Do nothing */
			;
	}

	return(0);
}

unknown's avatar
unknown committed
5825
/**********************************************************************
unknown's avatar
unknown committed
5826 5827 5828 5829
MySQL calls this function at the start of each SQL statement inside LOCK
TABLES. Inside LOCK TABLES the ::external_lock method does not work to
mark SQL statement borders. Note also a special case: if a temporary table
is created inside LOCK TABLES, MySQL has not called external_lock() at all
unknown's avatar
unknown committed
5830 5831 5832 5833 5834 5835
on that table.
MySQL-5.0 also calls this before each statement in an execution of a stored
procedure. To make the execution more deterministic for binlogging, MySQL-5.0
locks all tables involved in a stored procedure with full explicit table
locks (thd->in_lock_tables is true in ::store_lock()) before executing the
procedure. */
unknown's avatar
unknown committed
5836 5837

int
unknown's avatar
unknown committed
5838 5839
ha_innobase::start_stmt(
/*====================*/
unknown's avatar
unknown committed
5840 5841 5842 5843 5844 5845 5846 5847 5848 5849
	              /* out: 0 or error code */
	THD*    thd)  /* in: handle to the user thread */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

	update_thd(thd);

	trx = prebuilt->trx;

unknown's avatar
unknown committed
5850 5851 5852 5853 5854 5855 5856
	/* Here we release the search latch and the InnoDB thread FIFO ticket
	if they were reserved. They should have been released already at the
	end of the previous statement, but because inside LOCK TABLES the
	lock count method does not work to mark the end of a SELECT statement,
	that may not be the case. We MUST release the search latch before an
	INSERT, for example. */

unknown's avatar
unknown committed
5857 5858
	innobase_release_stat_resources(trx);

unknown's avatar
unknown committed
5859 5860 5861 5862 5863 5864 5865 5866
	if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
	    						&& trx->read_view) {
	    	/* At low transaction isolation levels we let
		each consistent read set its own snapshot */

	    	read_view_close_for_mysql(trx);
	}

unknown's avatar
unknown committed
5867
	prebuilt->sql_stat_start = TRUE;
unknown's avatar
unknown committed
5868
	prebuilt->hint_need_to_fetch_extra_cols = 0;
unknown's avatar
unknown committed
5869
	prebuilt->read_just_key = 0;
5870
        prebuilt->keep_other_fields_on_keyread = FALSE;
unknown's avatar
unknown committed
5871

5872
	if (!prebuilt->mysql_has_locked) {
unknown's avatar
unknown committed
5873 5874 5875 5876 5877 5878
	        /* This handle is for a temporary table created inside
	        this same LOCK TABLES; since MySQL does NOT call external_lock
	        in this case, we must use x-row locks inside InnoDB to be
	        prepared for an update of a row */
	  
	        prebuilt->select_lock_type = LOCK_X;
unknown's avatar
unknown committed
5879 5880 5881 5882
	} else {
		if (trx->isolation_level != TRX_ISO_SERIALIZABLE
		    && thd->lex->sql_command == SQLCOM_SELECT
		    && thd->lex->lock_option == TL_READ) {
unknown's avatar
unknown committed
5883
	
unknown's avatar
unknown committed
5884 5885 5886 5887 5888 5889 5890 5891 5892
			/* For other than temporary tables, we obtain
			no lock for consistent read (plain SELECT). */

			prebuilt->select_lock_type = LOCK_NONE;
		} else {
			/* Not a consistent read: restore the
			select_lock_type value. The value of
			stored_select_lock_type was decided in:
			1) ::store_lock(),
5893 5894 5895
			2) ::external_lock(), 
			3) ::init_table_handle_for_HANDLER(), and 
			4) :.transactional_table_lock(). */
unknown's avatar
unknown committed
5896 5897 5898 5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914

			prebuilt->select_lock_type =
				prebuilt->stored_select_lock_type;
		}

		if (prebuilt->stored_select_lock_type != LOCK_S
		    && prebuilt->stored_select_lock_type != LOCK_X) {
			fprintf(stderr,
"InnoDB: Error: stored_select_lock_type is %lu inside ::start_stmt()!\n",
			prebuilt->stored_select_lock_type);

			/* Set the value to LOCK_X: this is just fault
			tolerance, we do not know what the correct value
			should be! */

			prebuilt->select_lock_type = LOCK_X;
		}
	}

unknown's avatar
unknown committed
5915
	/* Set the MySQL flag to mark that there is an active transaction */
5916 5917
        if (trx->active_trans == 0) {

unknown's avatar
unknown committed
5918
                innobase_register_trx_and_stmt(thd);
5919
                trx->active_trans = 1;
unknown's avatar
unknown committed
5920 5921 5922
        } else {
		innobase_register_stmt(thd);
	}
unknown's avatar
unknown committed
5923 5924

	return(0);
unknown's avatar
unknown committed
5925 5926
}

unknown's avatar
unknown committed
5927 5928 5929 5930 5931 5932 5933 5934 5935 5936 5937
/**********************************************************************
Maps a MySQL trx isolation level code to the InnoDB isolation level code */
inline
ulint
innobase_map_isolation_level(
/*=========================*/
					/* out: InnoDB isolation level */
	enum_tx_isolation	iso)	/* in: MySQL isolation level code */
{
	switch(iso) {
		case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
unknown's avatar
unknown committed
5938
		case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
unknown's avatar
unknown committed
5939 5940 5941 5942 5943 5944
		case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
		case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
		default: ut_a(0); return(0);
	}	
}
	
unknown's avatar
unknown committed
5945 5946
/**********************************************************************
As MySQL will execute an external lock for every new table it uses when it
unknown's avatar
unknown committed
5947 5948 5949
starts to process an SQL statement (an exception is when MySQL calls
start_stmt for the handle) we can use this function to store the pointer to
the THD in the handle. We will also use this function to communicate
unknown's avatar
unknown committed
5950 5951 5952 5953 5954 5955 5956
to InnoDB that a new SQL statement has started and that we must store a
savepoint to our transaction handle, so that we are able to roll back
the SQL statement in case of an error. */

int
ha_innobase::external_lock(
/*=======================*/
5957
			        /* out: 0 */
unknown's avatar
unknown committed
5958 5959 5960 5961 5962 5963 5964
	THD*	thd,		/* in: handle to the user thread */
	int 	lock_type)	/* in: lock type */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

  	DBUG_ENTER("ha_innobase::external_lock");
unknown's avatar
unknown committed
5965
	DBUG_PRINT("enter",("lock_type: %d", lock_type));
unknown's avatar
unknown committed
5966 5967 5968 5969 5970 5971

	update_thd(thd);

	trx = prebuilt->trx;

	prebuilt->sql_stat_start = TRUE;
unknown's avatar
unknown committed
5972
	prebuilt->hint_need_to_fetch_extra_cols = 0;
unknown's avatar
unknown committed
5973 5974

	prebuilt->read_just_key = 0;
5975
	prebuilt->keep_other_fields_on_keyread = FALSE;
unknown's avatar
unknown committed
5976 5977 5978 5979 5980 5981

	if (lock_type == F_WRLCK) {

		/* If this is a SELECT, then it is in UPDATE TABLE ...
		or SELECT ... FOR UPDATE */
		prebuilt->select_lock_type = LOCK_X;
unknown's avatar
unknown committed
5982
		prebuilt->stored_select_lock_type = LOCK_X;
unknown's avatar
unknown committed
5983 5984 5985
	}

	if (lock_type != F_UNLCK) {
unknown's avatar
unknown committed
5986
		/* MySQL is setting a new table lock */
unknown's avatar
unknown committed
5987

unknown's avatar
unknown committed
5988 5989
		/* Set the MySQL flag to mark that there is an active
		transaction */
5990 5991
                if (trx->active_trans == 0) {

unknown's avatar
unknown committed
5992
                        innobase_register_trx_and_stmt(thd);
5993
                        trx->active_trans = 1;
unknown's avatar
unknown committed
5994 5995 5996
                } else if (trx->n_mysql_tables_in_use == 0) {
			innobase_register_stmt(thd);
		}
unknown's avatar
unknown committed
5997

unknown's avatar
unknown committed
5998
		trx->n_mysql_tables_in_use++;
5999
		prebuilt->mysql_has_locked = TRUE;
unknown's avatar
unknown committed
6000

unknown's avatar
unknown committed
6001 6002
		if (trx->n_mysql_tables_in_use == 1) {
		        trx->isolation_level = innobase_map_isolation_level(
unknown's avatar
unknown committed
6003 6004
						(enum_tx_isolation)
						thd->variables.tx_isolation);
unknown's avatar
unknown committed
6005
		}
unknown's avatar
unknown committed
6006 6007

		if (trx->isolation_level == TRX_ISO_SERIALIZABLE
unknown's avatar
unknown committed
6008 6009
		    && prebuilt->select_lock_type == LOCK_NONE
		    && (thd->options
unknown's avatar
unknown committed
6010
				& (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
unknown's avatar
unknown committed
6011

unknown's avatar
unknown committed
6012 6013
			/* To get serializable execution, we let InnoDB
			conceptually add 'LOCK IN SHARE MODE' to all SELECTs
unknown's avatar
unknown committed
6014 6015 6016 6017 6018
			which otherwise would have been consistent reads. An
			exception is consistent reads in the AUTOCOMMIT=1 mode:
			we know that they are read-only transactions, and they
			can be serialized also if performed as consistent
			reads. */
unknown's avatar
unknown committed
6019 6020

			prebuilt->select_lock_type = LOCK_S;
unknown's avatar
unknown committed
6021
			prebuilt->stored_select_lock_type = LOCK_S;
unknown's avatar
unknown committed
6022 6023
		}

unknown's avatar
unknown committed
6024 6025 6026 6027
		/* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
		TABLES if AUTOCOMMIT=1. It does not make much sense to acquire
		an InnoDB table lock if it is released immediately at the end
		of LOCK TABLES, and InnoDB's table locks in that case cause
unknown's avatar
unknown committed
6028 6029 6030
		VERY easily deadlocks. We do not set InnoDB table locks when
		MySQL sets them at the start of a stored procedure call
		(MySQL does have thd->in_lock_tables TRUE there). */
unknown's avatar
unknown committed
6031

unknown's avatar
unknown committed
6032
		if (prebuilt->select_lock_type != LOCK_NONE) {
unknown's avatar
unknown committed
6033

6034
			if (thd->in_lock_tables &&
unknown's avatar
unknown committed
6035
			    thd->lex->sql_command != SQLCOM_CALL &&
unknown's avatar
unknown committed
6036 6037
			    thd->variables.innodb_table_locks &&
			    (thd->options & OPTION_NOT_AUTOCOMMIT)) {
unknown's avatar
unknown committed
6038

6039
				ulint	error;
6040
				error = row_lock_table_for_mysql(prebuilt,
6041
							NULL, 0);
6042 6043 6044

				if (error != DB_SUCCESS) {
					error = convert_error_code_to_mysql(
unknown's avatar
unknown committed
6045 6046
						(int) error, user_thd);
					DBUG_RETURN((int) error);
6047 6048
				}
			}
unknown's avatar
unknown committed
6049 6050 6051 6052

		  	trx->mysql_n_tables_locked++;
		}

6053
		DBUG_RETURN(0);
unknown's avatar
unknown committed
6054
	}
unknown's avatar
unknown committed
6055

unknown's avatar
unknown committed
6056
	/* MySQL is releasing a table lock */
unknown's avatar
unknown committed
6057

unknown's avatar
unknown committed
6058 6059
	trx->n_mysql_tables_in_use--;
	prebuilt->mysql_has_locked = FALSE;
unknown's avatar
unknown committed
6060

unknown's avatar
unknown committed
6061 6062
	/* If the MySQL lock count drops to zero we know that the current SQL
	statement has ended */
unknown's avatar
unknown committed
6063

unknown's avatar
unknown committed
6064
	if (trx->n_mysql_tables_in_use == 0) {
unknown's avatar
unknown committed
6065

unknown's avatar
unknown committed
6066 6067 6068
	        trx->mysql_n_tables_locked = 0;
		prebuilt->used_in_HANDLER = FALSE;
			
unknown's avatar
unknown committed
6069 6070 6071 6072
		/* Release a possible FIFO ticket and search latch. Since we
		may reserve the kernel mutex, we have to release the search
		system latch first to obey the latching order. */

unknown's avatar
unknown committed
6073
		innobase_release_stat_resources(trx);
unknown's avatar
unknown committed
6074

unknown's avatar
unknown committed
6075
		if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
6076 6077
                        if (trx->active_trans != 0) {
                                innobase_commit(thd, TRUE);
unknown's avatar
unknown committed
6078 6079
			}
		} else {
unknown's avatar
unknown committed
6080 6081 6082
			if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
	    						&& trx->read_view) {

unknown's avatar
unknown committed
6083
				/* At low transaction isolation levels we let
unknown's avatar
unknown committed
6084 6085
				each consistent read set its own snapshot */

unknown's avatar
unknown committed
6086
				read_view_close_for_mysql(trx);
unknown's avatar
unknown committed
6087
			}
unknown's avatar
unknown committed
6088 6089 6090
		}
	}

6091
	DBUG_RETURN(0);
unknown's avatar
unknown committed
6092 6093
}

6094 6095 6096 6097 6098 6099 6100
/**********************************************************************
With this function MySQL request a transactional lock to a table when
user issued query LOCK TABLES..WHERE ENGINE = InnoDB. */

int
ha_innobase::transactional_table_lock(
/*==================================*/
6101
			        /* out: error code */
6102 6103 6104 6105 6106 6107 6108 6109 6110 6111 6112 6113 6114 6115 6116 6117 6118 6119 6120 6121 6122
	THD*	thd,		/* in: handle to the user thread */
	int 	lock_type)	/* in: lock type */
{
	row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
	trx_t*		trx;

  	DBUG_ENTER("ha_innobase::transactional_table_lock");
	DBUG_PRINT("enter",("lock_type: %d", lock_type));

	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(thd);

 	if (prebuilt->table->ibd_file_missing && !current_thd->tablespace_op) {
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"MySQL is trying to use a table handle but the .ibd file for\n"
"table %s does not exist.\n"
"Have you deleted the .ibd file from the database directory under\n"
unknown's avatar
unknown committed
6123
"the MySQL datadir?"
6124 6125 6126 6127 6128 6129 6130 6131 6132 6133 6134 6135 6136 6137 6138 6139 6140 6141
"Look from section 15.1 of http://www.innodb.com/ibman.html\n"
"how you can resolve the problem.\n",
				prebuilt->table->name);
		DBUG_RETURN(HA_ERR_CRASHED);
	}

	trx = prebuilt->trx;

	prebuilt->sql_stat_start = TRUE;
	prebuilt->hint_need_to_fetch_extra_cols = 0;

	prebuilt->read_just_key = 0;
	prebuilt->keep_other_fields_on_keyread = FALSE;

	if (lock_type == F_WRLCK) {
		prebuilt->select_lock_type = LOCK_X;
		prebuilt->stored_select_lock_type = LOCK_X;
	} else if (lock_type == F_RDLCK) {
unknown's avatar
unknown committed
6142 6143
		prebuilt->select_lock_type = LOCK_S;
		prebuilt->stored_select_lock_type = LOCK_S;
6144 6145 6146 6147 6148 6149 6150 6151 6152 6153 6154 6155
	} else {
	        ut_print_timestamp(stderr);
	        fprintf(stderr, "  InnoDB error:\n"
"MySQL is trying to set transactional table lock with corrupted lock type\n"
"to table %s, lock type %d does not exist.\n",
				prebuilt->table->name, lock_type);
		DBUG_RETURN(HA_ERR_CRASHED);
	}

	/* MySQL is setting a new transactional table lock */

	/* Set the MySQL flag to mark that there is an active transaction */
unknown's avatar
unknown committed
6156 6157
        if (trx->active_trans == 0) {

unknown's avatar
unknown committed
6158
                innobase_register_trx_and_stmt(thd);
unknown's avatar
unknown committed
6159 6160
                trx->active_trans = 1;
        }
6161 6162 6163 6164

	if (thd->in_lock_tables && thd->variables.innodb_table_locks) {
		ulint	error = DB_SUCCESS;

6165
		error = row_lock_table_for_mysql(prebuilt, NULL, 0);
6166 6167

		if (error != DB_SUCCESS) {
unknown's avatar
unknown committed
6168 6169
			error = convert_error_code_to_mysql((int) error, user_thd);
			DBUG_RETURN((int) error);
6170 6171 6172 6173 6174 6175 6176 6177 6178 6179 6180 6181 6182 6183 6184
		}

		if (thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {

			/* Store the current undo_no of the transaction 
			so that we know where to roll back if we have 
			to roll back the next SQL statement */

			trx_mark_sql_stat_end(trx);
		}
	}

	DBUG_RETURN(0);
}

6185 6186 6187 6188 6189 6190 6191 6192 6193 6194
/****************************************************************************
Here we export InnoDB status variables to MySQL.  */

void
innodb_export_status(void)
/*======================*/
{
  srv_export_innodb_status();
}

unknown's avatar
unknown committed
6195
/****************************************************************************
unknown's avatar
unknown committed
6196
Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
unknown's avatar
unknown committed
6197 6198
Monitor to the client. */

unknown's avatar
unknown committed
6199
bool
unknown's avatar
unknown committed
6200 6201 6202 6203
innodb_show_status(
/*===============*/
	THD*	thd)	/* in: the MySQL query thread of the caller */
{
6204 6205 6206 6207 6208 6209
	Protocol*		protocol = thd->protocol;
	trx_t*			trx;
	static const char	truncated_msg[] = "... truncated...\n";
	const long		MAX_STATUS_SIZE = 64000;
	ulint			trx_list_start = ULINT_UNDEFINED;
	ulint			trx_list_end = ULINT_UNDEFINED;
unknown's avatar
unknown committed
6210

unknown's avatar
unknown committed
6211 6212
        DBUG_ENTER("innodb_show_status");

6213
        if (have_innodb != SHOW_OPTION_YES) {
unknown's avatar
unknown committed
6214 6215 6216
                my_message(ER_NOT_SUPPORTED_YET,
          "Cannot call SHOW INNODB STATUS because skip-innodb is defined",
                           MYF(0));
unknown's avatar
unknown committed
6217
                DBUG_RETURN(TRUE);
unknown's avatar
unknown committed
6218
        }
unknown's avatar
unknown committed
6219

unknown's avatar
unknown committed
6220 6221 6222 6223
	trx = check_trx_exists(thd);

	innobase_release_stat_resources(trx);

6224 6225
	/* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE
	bytes of text. */
6226

6227
	long	flen, usable_len;
6228
	char*	str;
6229

6230
	mutex_enter_noninline(&srv_monitor_file_mutex);
6231
	rewind(srv_monitor_file);
6232 6233
	srv_printf_innodb_monitor(srv_monitor_file,
				&trx_list_start, &trx_list_end);
6234
	flen = ftell(srv_monitor_file);
6235
	os_file_set_eof(srv_monitor_file);
unknown's avatar
unknown committed
6236

6237 6238
	if (flen < 0) {
		flen = 0;
6239 6240 6241 6242 6243 6244
	}

	if (flen > MAX_STATUS_SIZE) {
		usable_len = MAX_STATUS_SIZE;
	} else {
		usable_len = flen;
6245
	}
unknown's avatar
unknown committed
6246

6247 6248
	/* allocate buffer for the string, and
	read the contents of the temporary file */
unknown's avatar
unknown committed
6249

6250
	if (!(str = my_malloc(usable_len + 1, MYF(0))))
unknown's avatar
unknown committed
6251
        {
unknown's avatar
unknown committed
6252
          mutex_exit_noninline(&srv_monitor_file_mutex);
unknown's avatar
Merge  
unknown committed
6253
          DBUG_RETURN(TRUE);
unknown's avatar
unknown committed
6254
        }
unknown's avatar
unknown committed
6255

unknown's avatar
unknown committed
6256
	rewind(srv_monitor_file);
6257 6258
	if (flen < MAX_STATUS_SIZE) {
		/* Display the entire output. */
unknown's avatar
unknown committed
6259
		flen = (long) fread(str, 1, flen, srv_monitor_file);
6260 6261 6262 6263 6264
	} else if (trx_list_end < (ulint) flen
			&& trx_list_start < trx_list_end
			&& trx_list_start + (flen - trx_list_end)
			< MAX_STATUS_SIZE - sizeof truncated_msg - 1) {
		/* Omit the beginning of the list of active transactions. */
unknown's avatar
unknown committed
6265
		long len = (long) fread(str, 1, trx_list_start, srv_monitor_file);
6266 6267 6268 6269
		memcpy(str + len, truncated_msg, sizeof truncated_msg - 1);
		len += sizeof truncated_msg - 1;
		usable_len = (MAX_STATUS_SIZE - 1) - len;
		fseek(srv_monitor_file, flen - usable_len, SEEK_SET);
unknown's avatar
unknown committed
6270
		len += (long) fread(str + len, 1, usable_len, srv_monitor_file);
6271 6272 6273
		flen = len;
	} else {
		/* Omit the end of the output. */
unknown's avatar
unknown committed
6274
		flen = (long) fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
6275
	}
unknown's avatar
unknown committed
6276

6277
	mutex_exit_noninline(&srv_monitor_file_mutex);
6278

unknown's avatar
unknown committed
6279 6280
	List<Item> field_list;

6281
	field_list.push_back(new Item_empty_string("Status", flen));
unknown's avatar
unknown committed
6282

6283 6284
	if (protocol->send_fields(&field_list, Protocol::SEND_NUM_ROWS |
                                               Protocol::SEND_EOF)) {
6285
		my_free(str, MYF(0));
unknown's avatar
unknown committed
6286

unknown's avatar
unknown committed
6287
		DBUG_RETURN(TRUE);
unknown's avatar
unknown committed
6288 6289
	}

unknown's avatar
unknown committed
6290 6291 6292
        protocol->prepare_for_resend();
        protocol->store(str, flen, system_charset_info);
        my_free(str, MYF(0));
unknown's avatar
unknown committed
6293

unknown's avatar
unknown committed
6294
        if (protocol->write()) {
unknown's avatar
unknown committed
6295

unknown's avatar
unknown committed
6296 6297
        	DBUG_RETURN(TRUE);
	}
unknown's avatar
unknown committed
6298
	send_eof(thd);
unknown's avatar
unknown committed
6299

unknown's avatar
unknown committed
6300
  	DBUG_RETURN(FALSE);
unknown's avatar
unknown committed
6301 6302
}

unknown's avatar
unknown committed
6303 6304 6305 6306 6307 6308 6309 6310 6311 6312 6313 6314 6315 6316 6317 6318 6319 6320 6321 6322 6323 6324 6325 6326 6327 6328 6329 6330 6331 6332 6333 6334
/****************************************************************************
Implements the SHOW MUTEX STATUS command. . */

bool
innodb_mutex_show_status(
/*===============*/
  THD*  thd)  /* in: the MySQL query thread of the caller */
{
  Protocol        *protocol= thd->protocol;
  List<Item> field_list;
  mutex_t*  mutex;
  ulint   rw_lock_count= 0;
  ulint   rw_lock_count_spin_loop= 0;
  ulint   rw_lock_count_spin_rounds= 0;
  ulint   rw_lock_count_os_wait= 0;
  ulint   rw_lock_count_os_yield= 0;
  ulonglong rw_lock_wait_time= 0;
  DBUG_ENTER("innodb_mutex_show_status");

  field_list.push_back(new Item_empty_string("Mutex", FN_REFLEN));
  field_list.push_back(new Item_empty_string("Module", FN_REFLEN));
  field_list.push_back(new Item_uint("Count", 21));
  field_list.push_back(new Item_uint("Spin_waits", 21));
  field_list.push_back(new Item_uint("Spin_rounds", 21));
  field_list.push_back(new Item_uint("OS_waits", 21));
  field_list.push_back(new Item_uint("OS_yields", 21));
  field_list.push_back(new Item_uint("OS_waits_time", 21));

  if (protocol->send_fields(&field_list,
                            Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
    DBUG_RETURN(TRUE);

unknown's avatar
unknown committed
6335 6336 6337
#ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER
    mutex_enter(&mutex_list_mutex);
#endif
unknown's avatar
unknown committed
6338 6339 6340 6341 6342 6343 6344 6345 6346 6347 6348 6349 6350 6351 6352 6353 6354 6355 6356 6357 6358

  mutex = UT_LIST_GET_FIRST(mutex_list);

  while ( mutex != NULL )
  {
    if (mutex->mutex_type != 1)
    {
      if (mutex->count_using > 0)
      {
        protocol->prepare_for_resend();
        protocol->store(mutex->cmutex_name, system_charset_info);
        protocol->store(mutex->cfile_name, system_charset_info);
        protocol->store((ulonglong)mutex->count_using);
        protocol->store((ulonglong)mutex->count_spin_loop);
        protocol->store((ulonglong)mutex->count_spin_rounds);
        protocol->store((ulonglong)mutex->count_os_wait);
        protocol->store((ulonglong)mutex->count_os_yield);
        protocol->store((ulonglong)mutex->lspent_time/1000);

        if (protocol->write())
        {
unknown's avatar
unknown committed
6359 6360 6361
#ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER
          mutex_exit(&mutex_list_mutex);
#endif
unknown's avatar
unknown committed
6362 6363 6364 6365 6366 6367 6368 6369 6370 6371 6372 6373 6374 6375 6376 6377 6378 6379 6380 6381 6382 6383 6384 6385 6386 6387 6388 6389 6390 6391 6392 6393
          DBUG_RETURN(1);
        }
      }
    }
    else
    {
      rw_lock_count += mutex->count_using;
      rw_lock_count_spin_loop += mutex->count_spin_loop;
      rw_lock_count_spin_rounds += mutex->count_spin_rounds;
      rw_lock_count_os_wait += mutex->count_os_wait;
      rw_lock_count_os_yield += mutex->count_os_yield;
      rw_lock_wait_time += mutex->lspent_time;
    }

    mutex = UT_LIST_GET_NEXT(list, mutex);
  }

  protocol->prepare_for_resend();
  protocol->store("rw_lock_mutexes", system_charset_info);
  protocol->store("", system_charset_info);
  protocol->store((ulonglong)rw_lock_count);
  protocol->store((ulonglong)rw_lock_count_spin_loop);
  protocol->store((ulonglong)rw_lock_count_spin_rounds);
  protocol->store((ulonglong)rw_lock_count_os_wait);
  protocol->store((ulonglong)rw_lock_count_os_yield);
  protocol->store((ulonglong)rw_lock_wait_time/1000);

  if (protocol->write())
  {
    DBUG_RETURN(1);
  }

unknown's avatar
unknown committed
6394 6395 6396
#ifdef MUTEX_PROTECT_TO_BE_ADDED_LATER
      mutex_exit(&mutex_list_mutex);
#endif
unknown's avatar
unknown committed
6397 6398 6399 6400
  send_eof(thd);
  DBUG_RETURN(FALSE);
}

6401 6402 6403 6404 6405
/****************************************************************************
 Handling the shared INNOBASE_SHARE structure that is needed to provide table
 locking.
****************************************************************************/

6406
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share,uint *length,
6407 6408 6409
			      my_bool not_used __attribute__((unused)))
{
  *length=share->table_name_length;
6410
  return (mysql_byte*) share->table_name;
6411 6412 6413 6414 6415
}

static INNOBASE_SHARE *get_share(const char *table_name)
{
  INNOBASE_SHARE *share;
unknown's avatar
unknown committed
6416
  pthread_mutex_lock(&innobase_share_mutex);
6417
  uint length=(uint) strlen(table_name);
unknown's avatar
unknown committed
6418
  if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables,
6419
					(mysql_byte*) table_name,
6420 6421 6422 6423 6424 6425 6426 6427
					    length)))
  {
    if ((share=(INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1,
				       MYF(MY_WME | MY_ZEROFILL))))
    {
      share->table_name_length=length;
      share->table_name=(char*) (share+1);
      strmov(share->table_name,table_name);
unknown's avatar
SCRUM  
unknown committed
6428
      if (my_hash_insert(&innobase_open_tables, (mysql_byte*) share))
6429
      {
unknown's avatar
unknown committed
6430
        pthread_mutex_unlock(&innobase_share_mutex);
6431 6432 6433 6434
	my_free((gptr) share,0);
	return 0;
      }
      thr_lock_init(&share->lock);
6435
      pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST);
6436 6437 6438
    }
  }
  share->use_count++;
unknown's avatar
unknown committed
6439
  pthread_mutex_unlock(&innobase_share_mutex);
6440 6441 6442 6443 6444
  return share;
}

static void free_share(INNOBASE_SHARE *share)
{
unknown's avatar
unknown committed
6445
  pthread_mutex_lock(&innobase_share_mutex);
6446 6447
  if (!--share->use_count)
  {
6448
    hash_delete(&innobase_open_tables, (mysql_byte*) share);
6449 6450 6451 6452
    thr_lock_delete(&share->lock);
    pthread_mutex_destroy(&share->mutex);
    my_free((gptr) share, MYF(0));
  }
unknown's avatar
unknown committed
6453
  pthread_mutex_unlock(&innobase_share_mutex);
6454
}
6455 6456

/*********************************************************************
unknown's avatar
unknown committed
6457
Converts a MySQL table lock stored in the 'lock' field of the handle to
unknown's avatar
unknown committed
6458 6459 6460 6461 6462 6463
a proper type before storing pointer to the lock into an array of pointers.
MySQL also calls this if it wants to reset some table locks to a not-locked
state during the processing of an SQL query. An example is that during a
SELECT the read lock is released early on the 'const' tables where we only
fetch one row. MySQL does not call this when it releases all locks at the
end of an SQL statement. */
6464 6465 6466 6467 6468 6469 6470 6471 6472 6473 6474 6475 6476

THR_LOCK_DATA**
ha_innobase::store_lock(
/*====================*/
						/* out: pointer to the next
						element in the 'to' array */
	THD*			thd,		/* in: user thread handle */
	THR_LOCK_DATA**		to,		/* in: pointer to an array
						of pointers to lock structs;
						pointer to the 'lock' field
						of current handle is stored
						next to this array */
	enum thr_lock_type 	lock_type)	/* in: lock type to store in
unknown's avatar
unknown committed
6477 6478
						'lock'; this may also be
						TL_IGNORE */
6479 6480 6481
{
	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;

unknown's avatar
unknown committed
6482 6483 6484 6485
	/* NOTE: MySQL  can call this function with lock 'type' TL_IGNORE!
	Be careful to ignore TL_IGNORE if we are going to do something with
	only 'real' locks! */

6486
	if ((lock_type == TL_READ && thd->in_lock_tables) ||           
unknown's avatar
unknown committed
6487 6488
	    (lock_type == TL_READ_HIGH_PRIORITY && thd->in_lock_tables) ||
	    lock_type == TL_READ_WITH_SHARED_LOCKS ||
unknown's avatar
unknown committed
6489
	    lock_type == TL_READ_NO_INSERT ||
unknown's avatar
unknown committed
6490 6491
	    (thd->lex->sql_command != SQLCOM_SELECT
	     && lock_type != TL_IGNORE)) {
unknown's avatar
unknown committed
6492

unknown's avatar
unknown committed
6493 6494 6495 6496 6497
		/* The OR cases above are in this order:
		1) MySQL is doing LOCK TABLES ... READ LOCAL, or
		2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
		3) this is a SELECT ... IN SHARE MODE, or
		4) we are doing a complex SQL statement like
unknown's avatar
unknown committed
6498
		INSERT INTO ... SELECT ... and the logical logging (MySQL
unknown's avatar
unknown committed
6499
		binlog) requires the use of a locking read, or
unknown's avatar
unknown committed
6500 6501 6502
		MySQL is doing LOCK TABLES ... READ.
		5) we let InnoDB do locking reads for all SQL statements that
		are not simple SELECTs; note that select_lock_type in this
unknown's avatar
unknown committed
6503 6504 6505 6506 6507 6508
		case may get strengthened in ::external_lock() to LOCK_X.
		Note that we MUST use a locking read in all data modifying
		SQL statements, because otherwise the execution would not be
		serializable, and also the results from the update could be
		unexpected if an obsolete consistent read view would be
		used. */
unknown's avatar
unknown committed
6509

6510 6511 6512
		if (srv_locks_unsafe_for_binlog &&
		    prebuilt->trx->isolation_level != TRX_ISO_SERIALIZABLE &&
		    (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT) &&
unknown's avatar
unknown committed
6513 6514
		    (thd->lex->sql_command == SQLCOM_INSERT_SELECT ||
		     thd->lex->sql_command == SQLCOM_UPDATE)) {
6515 6516 6517 6518

			/* In case we have innobase_locks_unsafe_for_binlog
			option set and isolation level of the transaction
			is not set to serializable and MySQL is doing
6519 6520 6521
			INSERT INTO...SELECT or UPDATE ... = (SELECT ...)
			without FOR UPDATE or IN SHARE MODE in select, then
			we use consistent read for select. */
6522 6523 6524 6525 6526 6527 6528

			prebuilt->select_lock_type = LOCK_NONE;
			prebuilt->stored_select_lock_type = LOCK_NONE;
		} else {
			prebuilt->select_lock_type = LOCK_S;
			prebuilt->stored_select_lock_type = LOCK_S;
		}
unknown's avatar
unknown committed
6529

unknown's avatar
unknown committed
6530 6531 6532
	} else if (lock_type != TL_IGNORE) {

	        /* We set possible LOCK_X value in external_lock, not yet
6533
		here even if this would be SELECT ... FOR UPDATE */
unknown's avatar
unknown committed
6534

6535
		prebuilt->select_lock_type = LOCK_NONE;
unknown's avatar
unknown committed
6536
		prebuilt->stored_select_lock_type = LOCK_NONE;
6537 6538 6539 6540
	}

	if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {

unknown's avatar
unknown committed
6541 6542 6543 6544 6545 6546 6547 6548
		/* Starting from 5.0.7, we weaken also the table locks
		set at the start of a MySQL stored procedure call, just like
		we weaken the locks set at the start of an SQL statement.
		MySQL does set thd->in_lock_tables TRUE there, but in reality
		we do not need table locks to make the execution of a
		single transaction stored procedure call deterministic
		(if it does not use a consistent read). */

unknown's avatar
unknown committed
6549
    		/* If we are not doing a LOCK TABLE or DISCARD/IMPORT
6550
		TABLESPACE or TRUNCATE TABLE, then allow multiple writers */
6551 6552

    		if ((lock_type >= TL_WRITE_CONCURRENT_INSERT &&
unknown's avatar
unknown committed
6553 6554 6555
	 	    lock_type <= TL_WRITE)
		    && (!thd->in_lock_tables
		        || thd->lex->sql_command == SQLCOM_CALL)
6556
		    && !thd->tablespace_op
unknown's avatar
unknown committed
6557
		    && thd->lex->sql_command != SQLCOM_TRUNCATE
6558
                    && thd->lex->sql_command != SQLCOM_CREATE_TABLE) {
6559 6560 6561 6562

      			lock_type = TL_WRITE_ALLOW_WRITE;
      		}

unknown's avatar
unknown committed
6563 6564 6565 6566 6567 6568
		/* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
		MySQL would use the lock TL_READ_NO_INSERT on t2, and that
		would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
		to t2. Convert the lock to a normal read lock to allow
		concurrent inserts to t2. */
      		
unknown's avatar
unknown committed
6569 6570 6571 6572
		if (lock_type == TL_READ_NO_INSERT
		    && (!thd->in_lock_tables
			|| thd->lex->sql_command == SQLCOM_CALL)) {

unknown's avatar
unknown committed
6573 6574 6575
			lock_type = TL_READ;
		}
		
unknown's avatar
unknown committed
6576
 		lock.type = lock_type;
6577 6578 6579
  	}

  	*to++= &lock;
6580

6581 6582 6583
	return(to);
}

6584
/***********************************************************************
unknown's avatar
unknown committed
6585 6586
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
unknown's avatar
unknown committed
6587
counter if it already has been initialized. In parameter ret returns
unknown's avatar
unknown committed
6588
the value of the auto-inc counter. */
6589

unknown's avatar
unknown committed
6590 6591 6592
int
ha_innobase::innobase_read_and_init_auto_inc(
/*=========================================*/
unknown's avatar
unknown committed
6593 6594
				/* out: 0 or error code: deadlock or lock wait
				timeout */
unknown's avatar
unknown committed
6595
	longlong*	ret)	/* out: auto-inc value */
6596
{
unknown's avatar
unknown committed
6597
  	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
unknown's avatar
unknown committed
6598
    	longlong        auto_inc;
unknown's avatar
unknown committed
6599 6600
	ulint		old_select_lock_type;
	ibool		trx_was_not_started	= FALSE;
unknown's avatar
unknown committed
6601
  	int     	error;
6602

unknown's avatar
unknown committed
6603
  	ut_a(prebuilt);
unknown's avatar
unknown committed
6604
	ut_a(prebuilt->trx ==
6605
                (trx_t*) current_thd->ha_data[innobase_hton.slot]);
unknown's avatar
unknown committed
6606 6607
	ut_a(prebuilt->table);
	
unknown's avatar
unknown committed
6608 6609 6610 6611
	if (prebuilt->trx->conc_state == TRX_NOT_STARTED) {
		trx_was_not_started = TRUE;
	}

unknown's avatar
unknown committed
6612 6613 6614 6615 6616
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);

unknown's avatar
unknown committed
6617
	auto_inc = dict_table_autoinc_read(prebuilt->table);
unknown's avatar
unknown committed
6618

unknown's avatar
unknown committed
6619 6620 6621 6622
	if (auto_inc != 0) {
		/* Already initialized */
		*ret = auto_inc;
	
unknown's avatar
unknown committed
6623 6624 6625
		error = 0;

		goto func_exit_early;
unknown's avatar
unknown committed
6626
	}
6627

unknown's avatar
unknown committed
6628
	error = row_lock_table_autoinc_for_mysql(prebuilt);
unknown's avatar
unknown committed
6629

unknown's avatar
unknown committed
6630 6631
	if (error != DB_SUCCESS) {
		error = convert_error_code_to_mysql(error, user_thd);
6632

unknown's avatar
unknown committed
6633
		goto func_exit_early;
unknown's avatar
unknown committed
6634
	}	
unknown's avatar
unknown committed
6635

unknown's avatar
unknown committed
6636 6637
	/* Check again if someone has initialized the counter meanwhile */
	auto_inc = dict_table_autoinc_read(prebuilt->table);
unknown's avatar
unknown committed
6638

unknown's avatar
unknown committed
6639 6640 6641
	if (auto_inc != 0) {
		*ret = auto_inc;
	
unknown's avatar
unknown committed
6642 6643 6644
		error = 0;

		goto func_exit_early;
unknown's avatar
unknown committed
6645
	}
6646

unknown's avatar
unknown committed
6647
  	(void) extra(HA_EXTRA_KEYREAD);
6648
  	index_init(table->s->next_number_index);
unknown's avatar
unknown committed
6649

unknown's avatar
unknown committed
6650 6651 6652 6653 6654 6655
	/* Starting from 5.0.9, we use a consistent read to read the auto-inc
	column maximum value. This eliminates the spurious deadlocks caused
	by the row X-lock that we previously used. Note the following flaw
	in our algorithm: if some other user meanwhile UPDATEs the auto-inc
	column, our consistent read will not return the largest value. We
	accept this flaw, since the deadlocks were a bigger trouble. */
6656

unknown's avatar
unknown committed
6657
  	/* Fetch all the columns in the key */
unknown's avatar
unknown committed
6658
  	
unknown's avatar
unknown committed
6659
	prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
6660

unknown's avatar
unknown committed
6661 6662 6663 6664 6665 6666 6667
	old_select_lock_type = prebuilt->select_lock_type;
  	prebuilt->select_lock_type = LOCK_NONE;

	/* Eliminate an InnoDB error print that happens when we try to SELECT
	from a table when no table has been locked in ::external_lock(). */
	prebuilt->trx->n_mysql_tables_in_use++;

unknown's avatar
unknown committed
6668
	error = index_last(table->record[1]);
6669

unknown's avatar
unknown committed
6670 6671 6672
	prebuilt->trx->n_mysql_tables_in_use--;
  	prebuilt->select_lock_type = old_select_lock_type;

unknown's avatar
unknown committed
6673
  	if (error) {
unknown's avatar
unknown committed
6674 6675 6676 6677 6678 6679
		if (error == HA_ERR_END_OF_FILE) {
			/* The table was empty, initialize to 1 */
			auto_inc = 1;

			error = 0;
		} else {
unknown's avatar
unknown committed
6680 6681 6682 6683
			/* This should not happen in a consistent read */
			fprintf(stderr,
"InnoDB: Error: consistent read of auto-inc column returned %lu\n",
								(ulong)error);
unknown's avatar
unknown committed
6684 6685 6686 6687
  			auto_inc = -1;

  			goto func_exit;
  		}
unknown's avatar
unknown committed
6688
  	} else {
unknown's avatar
unknown committed
6689 6690
		/* Initialize to max(col) + 1 */
    		auto_inc = (longlong) table->next_number_field->
6691
                        	val_int_offset(table->s->rec_buff_length) + 1;
unknown's avatar
unknown committed
6692
  	}
6693

unknown's avatar
unknown committed
6694 6695 6696
	dict_table_autoinc_initialize(prebuilt->table, auto_inc);

func_exit:
unknown's avatar
unknown committed
6697
  	(void) extra(HA_EXTRA_NO_KEYREAD);
6698

unknown's avatar
unknown committed
6699 6700 6701 6702
	index_end();

	*ret = auto_inc;

unknown's avatar
unknown committed
6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713 6714
func_exit_early:
	/* Since MySQL does not seem to call autocommit after SHOW TABLE
	STATUS (even if we would register the trx here), we must commit our
	transaction here if it was started here. This is to eliminate a
	dangling transaction. */

	if (trx_was_not_started) {

		innobase_commit_low(prebuilt->trx);
	}

 	return(error);
unknown's avatar
unknown committed
6715 6716 6717 6718 6719 6720 6721 6722
}

/***********************************************************************
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
counter if it already has been initialized. Returns the value of the
auto-inc counter. */

6723
ulonglong
unknown's avatar
unknown committed
6724 6725 6726 6727 6728 6729 6730 6731 6732 6733 6734
ha_innobase::get_auto_increment()
/*=============================*/
                         /* out: auto-increment column value, -1 if error
                         (deadlock or lock wait timeout) */
{
  	longlong        nr;
  	int     	error;
	
	error = innobase_read_and_init_auto_inc(&nr);

	if (error) {
unknown's avatar
unknown committed
6735 6736 6737 6738 6739 6740 6741 6742
		/* This should never happen in the current (5.0.6) code, since
		we call this function only after the counter has been
		initialized. */
	
		ut_print_timestamp(stderr);
		fprintf(stderr,
		"  InnoDB: Error: error %lu in ::get_auto_increment()\n",
						(ulong)error);
unknown's avatar
unknown committed
6743
          	return(~(ulonglong) 0);
unknown's avatar
unknown committed
6744
	}
6745

6746
	return((ulonglong) nr);
6747 6748
}

unknown's avatar
unknown committed
6749 6750 6751 6752
/***********************************************************************
Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
If there is no explicitly declared non-null unique key or a primary key, then
InnoDB internally uses the row id as the primary key. */
unknown's avatar
unknown committed
6753

6754 6755
int
ha_innobase::cmp_ref(
unknown's avatar
unknown committed
6756 6757 6758 6759 6760 6761 6762
/*=================*/
				/* out: < 0 if ref1 < ref2, 0 if equal, else
				> 0 */
	const mysql_byte* ref1,	/* in: an (internal) primary key value in the
				MySQL key value format */
	const mysql_byte* ref2)	/* in: an (internal) primary key value in the
				MySQL key value format */
6763
{
unknown's avatar
unknown committed
6764
	row_prebuilt_t*	prebuilt = (row_prebuilt_t*) innobase_prebuilt;
6765
	enum_field_types mysql_type;
unknown's avatar
unknown committed
6766 6767 6768 6769 6770 6771 6772 6773 6774 6775 6776 6777 6778 6779 6780 6781 6782 6783 6784 6785 6786
	Field*		field;
	KEY_PART_INFO*	key_part;
	KEY_PART_INFO*	key_part_end;
	uint		len1;
	uint		len2;
	int 		result;

	if (prebuilt->clust_index_was_generated) {
		/* The 'ref' is an InnoDB row id */

		return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
	}

	/* Do a type-aware comparison of primary key fields. PK fields
	are always NOT NULL, so no checks for NULL are performed. */

	key_part = table->key_info[table->s->primary_key].key_part;

	key_part_end = key_part
			+ table->key_info[table->s->primary_key].key_parts;

6787 6788 6789
	for (; key_part != key_part_end; ++key_part) {
		field = key_part->field;
		mysql_type = field->type();
unknown's avatar
unknown committed
6790

6791 6792 6793 6794 6795
		if (mysql_type == FIELD_TYPE_TINY_BLOB
		    || mysql_type == FIELD_TYPE_MEDIUM_BLOB
		    || mysql_type == FIELD_TYPE_BLOB
		    || mysql_type == FIELD_TYPE_LONG_BLOB) {
		    
unknown's avatar
unknown committed
6796 6797 6798 6799 6800 6801
			/* In the MySQL key value format, a column prefix of
			a BLOB is preceded by a 2-byte length field */

			len1 = innobase_read_from_2_little_endian(ref1);
			len2 = innobase_read_from_2_little_endian(ref2);

6802 6803
			ref1 += 2;
			ref2 += 2;
unknown's avatar
unknown committed
6804 6805
			result = ((Field_blob*)field)->cmp(
						    (const char*)ref1, len1,
6806 6807
			                            (const char*)ref2, len2);
		} else {
unknown's avatar
unknown committed
6808 6809 6810 6811 6812 6813 6814
			result = field->cmp((const char*)ref1,
					    (const char*)ref2);
		}

		if (result) {

			return(result);
6815 6816 6817 6818 6819
		}

		ref1 += key_part->length;
		ref2 += key_part->length;
	}
unknown's avatar
unknown committed
6820 6821

	return(0);
6822 6823
}

unknown's avatar
unknown committed
6824 6825
char*
ha_innobase::get_mysql_bin_log_name()
unknown's avatar
unknown committed
6826
{
unknown's avatar
unknown committed
6827
	return(trx_sys_mysql_bin_log_name);
unknown's avatar
unknown committed
6828 6829
}

unknown's avatar
unknown committed
6830 6831
ulonglong
ha_innobase::get_mysql_bin_log_pos()
unknown's avatar
unknown committed
6832
{
unknown's avatar
unknown committed
6833 6834 6835 6836
  	/* trx... is ib_longlong, which is a typedef for a 64-bit integer
	(__int64 or longlong) so it's ok to cast it to ulonglong. */

  	return(trx_sys_mysql_bin_log_pos);
unknown's avatar
unknown committed
6837 6838
}

6839
extern "C" {
6840
/**********************************************************************
unknown's avatar
unknown committed
6841 6842 6843 6844 6845 6846 6847
This function is used to find the storage length in bytes of the first n
characters for prefix indexes using a multibyte character set. The function
finds charset information and returns length of prefix_len characters in the
index field in bytes.

NOTE: the prototype of this function is copied to data0type.c! If you change
this function, you MUST change also data0type.c! */
6848

unknown's avatar
unknown committed
6849 6850 6851 6852 6853
ulint
innobase_get_at_most_n_mbchars(
/*===========================*/
				/* out: number of bytes occupied by the first
				n characters */
6854
	ulint charset_id,	/* in: character set id */
unknown's avatar
unknown committed
6855 6856 6857 6858 6859
	ulint prefix_len,	/* in: prefix length in bytes of the index
				(this has to be divided by mbmaxlen to get the
				number of CHARACTERS n in the prefix) */
	ulint data_len,         /* in: length of the string in bytes */
	const char* str)	/* in: character string */
6860
{
6861
	ulint char_length;	/* character length in bytes */
unknown's avatar
unknown committed
6862
	ulint n_chars;		/* number of characters in prefix */
6863
	CHARSET_INFO* charset;	/* charset used in the field */
6864

unknown's avatar
unknown committed
6865
	charset = get_charset((uint) charset_id, MYF(MY_WME));
6866

6867 6868
	ut_ad(charset);
	ut_ad(charset->mbmaxlen);
6869

unknown's avatar
unknown committed
6870
	/* Calculate how many characters at most the prefix index contains */
6871

unknown's avatar
unknown committed
6872
	n_chars = prefix_len / charset->mbmaxlen;
6873

unknown's avatar
unknown committed
6874 6875 6876
	/* If the charset is multi-byte, then we must find the length of the
	first at most n chars in the string. If the string contains less
	characters than n, then we return the length to the end of the last
unknown's avatar
unknown committed
6877
	character. */
6878

unknown's avatar
unknown committed
6879 6880
	if (charset->mbmaxlen > 1) {
		/* my_charpos() returns the byte length of the first n_chars
unknown's avatar
unknown committed
6881 6882 6883 6884 6885 6886 6887 6888 6889 6890 6891 6892 6893 6894 6895 6896
		characters, or a value bigger than the length of str, if
		there were not enough full characters in str.

		Why does the code below work:
		Suppose that we are looking for n UTF-8 characters.

		1) If the string is long enough, then the prefix contains at
		least n complete UTF-8 characters + maybe some extra
		characters + an incomplete UTF-8 character. No problem in
		this case. The function returns the pointer to the
		end of the nth character.

		2) If the string is not long enough, then the string contains
		the complete value of a column, that is, only complete UTF-8
		characters, and we can store in the column prefix index the
		whole string. */
unknown's avatar
unknown committed
6897

unknown's avatar
unknown committed
6898
		char_length = my_charpos(charset, str,
unknown's avatar
unknown committed
6899
						str + data_len, (int) n_chars);
unknown's avatar
unknown committed
6900 6901 6902
		if (char_length > data_len) {
			char_length = data_len;
		}		
unknown's avatar
unknown committed
6903
	} else {
unknown's avatar
unknown committed
6904 6905 6906 6907 6908
		if (data_len < prefix_len) {
			char_length = data_len;
		} else {
			char_length = prefix_len;
		}
6909
	}
6910

unknown's avatar
unknown committed
6911
	return(char_length);
6912 6913 6914
}
}

6915 6916
extern "C" {
/**********************************************************************
6917 6918 6919
This function returns true if 

1) SQL-query in the current thread
6920
is either REPLACE or LOAD DATA INFILE REPLACE. 
6921 6922 6923 6924

2) SQL-query in the current thread
is INSERT ON DUPLICATE KEY UPDATE.

6925 6926 6927 6928
NOTE that /mysql/innobase/row/row0ins.c must contain the 
prototype for this function ! */

ibool
6929
innobase_query_is_update(void)
unknown's avatar
unknown committed
6930
/*==========================*/
6931 6932 6933 6934 6935
{
	THD*	thd;
	
	thd = (THD *)innobase_current_thd();
	
unknown's avatar
unknown committed
6936 6937 6938 6939 6940 6941
	if (thd->lex->sql_command == SQLCOM_REPLACE ||
	    thd->lex->sql_command == SQLCOM_REPLACE_SELECT ||
	    (thd->lex->sql_command == SQLCOM_LOAD &&
	     thd->lex->duplicates == DUP_REPLACE)) {

		return(1);
6942
	}
6943

unknown's avatar
unknown committed
6944 6945 6946 6947
	if (thd->lex->sql_command == SQLCOM_INSERT &&
	    thd->lex->duplicates  == DUP_UPDATE) {

		return(1);
6948 6949
	}

unknown's avatar
unknown committed
6950
	return(0);
6951
}
unknown's avatar
unknown committed
6952 6953
}

6954 6955 6956
/***********************************************************************
This function is used to prepare X/Open XA distributed transaction   */

6957 6958 6959
int 
innobase_xa_prepare(
/*================*/
6960 6961 6962 6963 6964 6965 6966
			/* out: 0 or error number */
	THD*	thd,	/* in: handle to the MySQL thread of the user
			whose XA transaction should be prepared */
	bool	all)	/* in: TRUE - commit transaction
			FALSE - the current SQL statement ended */
{
	int error = 0;
unknown's avatar
unknown committed
6967 6968 6969 6970
        trx_t* trx = check_trx_exists(thd);

        if (thd->lex->sql_command != SQLCOM_XA_PREPARE) {

unknown's avatar
unknown committed
6971 6972 6973 6974 6975 6976 6977 6978 6979 6980 6981 6982 6983 6984 6985 6986 6987 6988 6989
                /* For ibbackup to work the order of transactions in binlog
                and InnoDB must be the same. Consider the situation

                  thread1> prepare; write to binlog; ...
                          <context switch>
                  thread2> prepare; write to binlog; commit
                  thread1>                           ... commit

                To ensure this will not happen we're taking the mutex on
                prepare, and releasing it on commit.

                Note: only do it for normal commits, done via ha_commit_trans.
                If 2pc protocol is executed by external transaction
                coordinator, it will be just a regular MySQL client
                executing XA PREPARE and XA COMMIT commands.
                In this case we cannot know how many minutes or hours
                will be between XA PREPARE and XA COMMIT, and we don't want
                to block for undefined period of time.
                */
unknown's avatar
unknown committed
6990 6991 6992
                pthread_mutex_lock(&prepare_commit_mutex);
                trx->active_trans = 2;
        }
6993

6994 6995 6996 6997 6998
	if (!thd->variables.innodb_support_xa) {

		return(0);
	}

6999
        trx->xid=thd->transaction.xid;
7000 7001 7002 7003 7004 7005 7006 7007 7008 7009

	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

	if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) {

		fprintf(stderr,
7010
"InnoDB: Error: trx->active_trans == 0\n"
7011 7012 7013
"InnoDB: but trx->conc_state != TRX_NOT_STARTED\n");
	}

unknown's avatar
unknown committed
7014 7015
	if (all
	    || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) {
unknown's avatar
unknown committed
7016 7017 7018

                /* We were instructed to prepare the whole transaction, or
                this is an SQL statement end and autocommit is on */
7019

unknown's avatar
unknown committed
7020
                ut_ad(trx->active_trans);
unknown's avatar
unknown committed
7021

unknown's avatar
unknown committed
7022
		error = (int) trx_prepare_for_mysql(trx);
7023 7024 7025 7026 7027 7028 7029
	} else {
	        /* We just mark the SQL statement ended and do not do a
		transaction prepare */

		if (trx->auto_inc_lock) {
			/* If we had reserved the auto-inc lock for some
			table in this SQL statement we release it now */
unknown's avatar
unknown committed
7030

7031 7032 7033 7034 7035 7036 7037 7038 7039 7040 7041 7042 7043 7044 7045 7046 7047 7048 7049 7050
			row_unlock_table_autoinc_for_mysql(trx);
		}
		/* Store the current undo_no of the transaction so that we
		know where to roll back if we have to roll back the next
		SQL statement */

		trx_mark_sql_stat_end(trx);
	}

	/* Tell the InnoDB server that there might be work for utility
	threads: */

	srv_active_wake_master_thread();

        return error;
}

/***********************************************************************
This function is used to recover X/Open XA distributed transactions   */

7051 7052 7053
int 
innobase_xa_recover(
/*================*/
7054 7055 7056 7057 7058 7059
				/* out: number of prepared transactions 
				stored in xid_list */
	XID*    xid_list, 	/* in/out: prepared transactions */
	uint	len)		/* in: number of slots in xid_list */
{
	if (len == 0 || xid_list == NULL) {
unknown's avatar
unknown committed
7060 7061

		return(0);
7062 7063
	}

unknown's avatar
unknown committed
7064
	return(trx_recover_for_mysql(xid_list, len));
7065 7066 7067 7068 7069 7070
}

/***********************************************************************
This function is used to commit one X/Open XA distributed transaction
which is in the prepared state */

7071 7072 7073
int 
innobase_commit_by_xid(
/*===================*/
7074
			/* out: 0 or error number */
unknown's avatar
unknown committed
7075
	XID*	xid)	/* in: X/Open XA transaction identification */
7076 7077 7078 7079 7080 7081 7082 7083 7084 7085 7086 7087 7088 7089 7090 7091 7092 7093
{
	trx_t*	trx;

	trx = trx_get_trx_by_xid(xid);

	if (trx) {
		innobase_commit_low(trx);
		
		return(XA_OK);
	} else {
		return(XAER_NOTA);
	}
}

/***********************************************************************
This function is used to rollback one X/Open XA distributed transaction
which is in the prepared state */

7094 7095 7096
int 
innobase_rollback_by_xid(
/*=====================*/
7097
			/* out: 0 or error number */
unknown's avatar
unknown committed
7098
	XID	*xid)	/* in: X/Open XA transaction idenfification */
7099 7100 7101 7102 7103 7104 7105 7106 7107 7108 7109 7110
{
	trx_t*	trx;

	trx = trx_get_trx_by_xid(xid);

	if (trx) {
		return(innobase_rollback_trx(trx));
	} else {
		return(XAER_NOTA);
	}
}

7111
#endif /* HAVE_INNOBASE_DB */