ha_innodb.cc 227 KB
Newer Older
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1
/* Copyright (C) 2000-2005 MySQL AB & Innobase Oy
2

3 4
   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
5
   the Free Software Foundation; version 2 of the License.
6

7 8
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
9
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
10
   GNU General Public License for more details.
11

12 13
   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
14
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307	 USA */
15

16
/* This file defines the InnoDB handler: the interface between MySQL and InnoDB
17 18
NOTE: You can only use noninlined InnoDB functions in this file, because we
have disables the InnoDB inlining in this file. */
19

20 21
/* TODO list for the InnoDB handler in 5.0:
  - Remove the flag trx->active_trans and look at the InnoDB
22
    trx struct state field
serg@serg.mylan's avatar
serg@serg.mylan committed
23
  - fix savepoint functions to use savepoint storage area
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
24 25 26
  - Find out what kind of problems the OS X case-insensitivity causes to
    table and database names; should we 'normalize' the names like we do
    in Windows?
27
*/
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
28

29
#ifdef USE_PRAGMA_IMPLEMENTATION
30 31 32
#pragma implementation				// gcc: Class implementation
#endif

33
#include <mysql_priv.h>
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
34

35 36
#ifdef WITH_INNOBASE_STORAGE_ENGINE

37 38 39
#include <m_ctype.h>
#include <hash.h>
#include <myisampack.h>
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
40
#include <mysys_err.h>
41
#include <my_sys.h>
42
#include "ha_innodb.h"
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
43

44 45 46
pthread_mutex_t innobase_share_mutex,	/* to protect innobase_open_files */
		prepare_commit_mutex;	/* to force correct commit order in
					binlog */
47 48 49 50
ulong commit_threads= 0;
pthread_mutex_t commit_threads_m;
pthread_cond_t commit_cond;
pthread_mutex_t commit_cond_m;
51
bool innodb_inited= 0;
52

53 54 55 56
/*
  This needs to exist until the query cache callback is removed
  or learns to pass hton.
*/
57
static handlerton *innodb_hton_ptr;
58

59
/* Store MySQL definition of 'byte': in Linux it is char while InnoDB
60 61 62
uses unsigned char; the header univ.i which we include next defines
'byte' as a macro which expands to 'unsigned char' */

63
typedef uchar mysql_byte;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
64

65 66
#define INSIDE_HA_INNOBASE_CC

67
/* Include necessary InnoDB headers */
68
extern "C" {
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
#include "../storage/innobase/include/univ.i"
#include "../storage/innobase/include/os0file.h"
#include "../storage/innobase/include/os0thread.h"
#include "../storage/innobase/include/srv0start.h"
#include "../storage/innobase/include/srv0srv.h"
#include "../storage/innobase/include/trx0roll.h"
#include "../storage/innobase/include/trx0trx.h"
#include "../storage/innobase/include/trx0sys.h"
#include "../storage/innobase/include/mtr0mtr.h"
#include "../storage/innobase/include/row0ins.h"
#include "../storage/innobase/include/row0mysql.h"
#include "../storage/innobase/include/row0sel.h"
#include "../storage/innobase/include/row0upd.h"
#include "../storage/innobase/include/log0log.h"
#include "../storage/innobase/include/lock0lock.h"
#include "../storage/innobase/include/dict0crea.h"
#include "../storage/innobase/include/btr0cur.h"
#include "../storage/innobase/include/btr0btr.h"
#include "../storage/innobase/include/fsp0fsp.h"
#include "../storage/innobase/include/sync0sync.h"
#include "../storage/innobase/include/fil0fil.h"
#include "../storage/innobase/include/trx0xa.h"
91
#include "../storage/innobase/include/thr0loc.h"
92
#include "../storage/innobase/include/ha_prototypes.h"
93 94 95
}


96 97
/* The default values for the following, type long or longlong, start-up
parameters are declared in mysqld.cc: */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
98

99
long innobase_mirrored_log_groups, innobase_log_files_in_group,
100 101 102 103
	innobase_log_buffer_size, innobase_buffer_pool_awe_mem_mb,
	innobase_additional_mem_pool_size, innobase_file_io_threads,
	innobase_lock_wait_timeout, innobase_force_recovery,
	innobase_open_files;
104

105
long long innobase_buffer_pool_size, innobase_log_file_size;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
106

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
107 108
/* The default values for the following char* start-up parameters
are determined in innobase_init below: */
109

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
110
char*	innobase_data_home_dir			= NULL;
111
char*	innobase_data_file_path			= NULL;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
112
char*	innobase_log_group_home_dir		= NULL;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
113
char*	innobase_log_arch_dir			= NULL;/* unused */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
114 115
/* The following has a misleading name: starting from 4.0.5, this also
affects Windows: */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
116 117 118 119 120
char*	innobase_unix_file_flush_method		= NULL;

/* Below we have boolean-valued start-up parameters, and their default
values */

antony@ppcg5.local's avatar
antony@ppcg5.local committed
121
static
122
ulong	innobase_fast_shutdown			= 1;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
123 124
#ifdef UNIV_LOG_ARCHIVE
static
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
125
my_bool innobase_log_archive			= FALSE;/* unused */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
126 127
#endif /* UNIG_LOG_ARCHIVE */
static
128
my_bool innobase_use_doublewrite		= TRUE;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
129
static
130
my_bool innobase_use_checksums			= TRUE;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
131
static
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
132
my_bool	innobase_file_per_table			= FALSE;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
133
static
134
my_bool innobase_locks_unsafe_for_binlog	= FALSE;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
135
static
136
my_bool innobase_rollback_on_timeout		= FALSE;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
137
static
138
my_bool innobase_create_status_file		= FALSE;
139
static
140
my_bool innobase_stats_on_metadata		= TRUE;
141

antony@ppcg5.local's avatar
antony@ppcg5.local committed
142 143
static
char*	internal_innobase_data_file_path	= NULL;
144

145
/* The following counter is used to convey information to InnoDB
146 147 148 149 150
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
it every INNOBASE_WAKE_INTERVAL'th step. */

#define INNOBASE_WAKE_INTERVAL	32
151
static ulong	innobase_active_counter	= 0;
152

153
static HASH	innobase_open_tables;
154

155
#ifdef __NETWARE__	/* some special cleanup for NetWare */
156 157 158
bool nw_panic = FALSE;
#endif

159
static mysql_byte* innobase_get_key(INNOBASE_SHARE *share, size_t *length,
160
	my_bool not_used __attribute__((unused)));
161 162
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
163 164 165 166 167 168 169 170
static int innobase_close_connection(handlerton *hton, THD* thd);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd, 
           void *savepoint);
static int innobase_savepoint(handlerton *hton, THD* thd, void *savepoint);
static int innobase_release_savepoint(handlerton *hton, THD* thd, 
           void *savepoint);
171 172
static handler *innobase_create_handler(handlerton *hton,
                                        TABLE_SHARE *table,
173
                                        MEM_ROOT *mem_root);
174

175
static const char innobase_hton_name[]= "InnoDB";
176

antony@ppcg5.local's avatar
antony@ppcg5.local committed
177 178 179 180 181 182 183 184 185 186 187

static MYSQL_THDVAR_BOOL(support_xa, PLUGIN_VAR_OPCMDARG,
  "Enable InnoDB support for the XA two-phase commit",
  /* check_func */ NULL, /* update_func */ NULL,
  /* default */ TRUE);

static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG,
  "Enable InnoDB locking in LOCK TABLES",
  /* check_func */ NULL, /* update_func */ NULL,
  /* default */ TRUE);

188 189 190
static handler *innobase_create_handler(handlerton *hton,
                                        TABLE_SHARE *table, 
                                        MEM_ROOT *mem_root)
191
{
192
  return new (mem_root) ha_innobase(hton, table);
193 194
}

195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
/***********************************************************************
This function is used to prepare X/Open XA distributed transaction   */
static
int
innobase_xa_prepare(
/*================*/
			/* out: 0 or error number */
	handlerton* hton,
	THD*	thd,	/* in: handle to the MySQL thread of the user
			whose XA transaction should be prepared */
	bool	all);	/* in: TRUE - commit transaction
			FALSE - the current SQL statement ended */
/***********************************************************************
This function is used to recover X/Open XA distributed transactions   */
static
int
innobase_xa_recover(
/*================*/
				/* out: number of prepared transactions
				stored in xid_list */
	handlerton* hton,
	XID*	xid_list,	/* in/out: prepared transactions */
	uint	len);		/* in: number of slots in xid_list */
/***********************************************************************
This function is used to commit one X/Open XA distributed transaction
which is in the prepared state */
static
int
innobase_commit_by_xid(
/*===================*/
			/* out: 0 or error number */
	handlerton* hton,
	XID*	xid);	/* in: X/Open XA transaction identification */
/***********************************************************************
This function is used to rollback one X/Open XA distributed transaction
which is in the prepared state */
static
int
innobase_rollback_by_xid(
/*=====================*/
			/* out: 0 or error number */
	handlerton* hton,
	XID	*xid);	/* in: X/Open XA transaction identification */
/***********************************************************************
Create a consistent view for a cursor based on current transaction
which is created if the corresponding MySQL thread still lacks one.
This consistent view is then used inside of MySQL when accessing records
using a cursor. */
static
void*
innobase_create_cursor_view(
/*========================*/
				/* out: pointer to cursor view or NULL */
	handlerton*	hton,	/* in: innobase hton */
	THD*		thd);	/* in: user thread handle */
/***********************************************************************
Set the given consistent cursor view to a transaction which is created
if the corresponding MySQL thread still lacks one. If the given
consistent cursor view is NULL global read view of a transaction is
restored to a transaction read view. */
static
void
innobase_set_cursor_view(
/*=====================*/
	handlerton* hton,
	THD*	thd,	/* in: user thread handle */
	void*	curview);/* in: Consistent cursor view to be set */
/***********************************************************************
Close the given consistent cursor view of a transaction and restore
global read view to a transaction read view. Transaction is created if the
corresponding MySQL thread still lacks one. */
static
void
innobase_close_cursor_view(
/*=======================*/
	handlerton* hton,
	THD*	thd,	/* in: user thread handle */
	void*	curview);/* in: Consistent read view to be closed */
/*********************************************************************
Removes all tables in the named database inside InnoDB. */
static
void
innobase_drop_database(
/*===================*/
			/* out: error number */
	handlerton* hton, /* in: handlerton of Innodb */
	char*	path);	/* in: database path; inside InnoDB the name
			of the last directory in the path is used as
			the database name: for example, in 'mysql/data/test'
			the database name is 'test' */
/***********************************************************************
Closes an InnoDB database. */
static
int
innobase_end(handlerton *hton, ha_panic_function type);

/*********************************************************************
Creates an InnoDB transaction struct for the thd if it does not yet have one.
Starts a new InnoDB transaction if a transaction is not yet started. And
assigns a new snapshot for a consistent read if the transaction does not yet
have one. */
static
int
innobase_start_trx_and_assign_read_view(
/*====================================*/
			/* out: 0 */
	handlerton* hton, /* in: Innodb handlerton */ 
	THD*	thd);	/* in: MySQL thread handle of the user for whom
			the transaction should be committed */
/********************************************************************
Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
the logs, and the name of this function should be innobase_checkpoint. */
static
bool
innobase_flush_logs(
/*================*/
				/* out: TRUE if error */
	handlerton*	hton);	/* in: InnoDB handlerton */

/****************************************************************************
Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
Monitor to the client. */
static
bool
innodb_show_status(
/*===============*/
	handlerton*	hton,	/* in: the innodb handlerton */
	THD*	thd,	/* in: the MySQL query thread of the caller */
	stat_print_fn *stat_print);
static
bool innobase_show_status(handlerton *hton, THD* thd, 
                          stat_print_fn* stat_print,
                          enum ha_stat_type stat_type);
328

329 330
/*********************************************************************
Commits a transaction in an InnoDB database. */
331
static
332 333 334 335 336
void
innobase_commit_low(
/*================*/
	trx_t*	trx);	/* in: transaction handle */

337
static SHOW_VAR innodb_status_variables[]= {
338
  {"buffer_pool_pages_data",
339
  (char*) &export_vars.innodb_buffer_pool_pages_data,	  SHOW_LONG},
340
  {"buffer_pool_pages_dirty",
341
  (char*) &export_vars.innodb_buffer_pool_pages_dirty,	  SHOW_LONG},
342 343 344
  {"buffer_pool_pages_flushed",
  (char*) &export_vars.innodb_buffer_pool_pages_flushed,  SHOW_LONG},
  {"buffer_pool_pages_free",
345
  (char*) &export_vars.innodb_buffer_pool_pages_free,	  SHOW_LONG},
346 347 348
  {"buffer_pool_pages_latched",
  (char*) &export_vars.innodb_buffer_pool_pages_latched,  SHOW_LONG},
  {"buffer_pool_pages_misc",
349
  (char*) &export_vars.innodb_buffer_pool_pages_misc,	  SHOW_LONG},
350
  {"buffer_pool_pages_total",
351
  (char*) &export_vars.innodb_buffer_pool_pages_total,	  SHOW_LONG},
352 353 354 355 356 357 358
  {"buffer_pool_read_ahead_rnd",
  (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG},
  {"buffer_pool_read_ahead_seq",
  (char*) &export_vars.innodb_buffer_pool_read_ahead_seq, SHOW_LONG},
  {"buffer_pool_read_requests",
  (char*) &export_vars.innodb_buffer_pool_read_requests,  SHOW_LONG},
  {"buffer_pool_reads",
359
  (char*) &export_vars.innodb_buffer_pool_reads,	  SHOW_LONG},
360
  {"buffer_pool_wait_free",
361
  (char*) &export_vars.innodb_buffer_pool_wait_free,	  SHOW_LONG},
362 363 364
  {"buffer_pool_write_requests",
  (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG},
  {"data_fsyncs",
365
  (char*) &export_vars.innodb_data_fsyncs,		  SHOW_LONG},
366
  {"data_pending_fsyncs",
367
  (char*) &export_vars.innodb_data_pending_fsyncs,	  SHOW_LONG},
368
  {"data_pending_reads",
369
  (char*) &export_vars.innodb_data_pending_reads,	  SHOW_LONG},
370
  {"data_pending_writes",
371
  (char*) &export_vars.innodb_data_pending_writes,	  SHOW_LONG},
372
  {"data_read",
373
  (char*) &export_vars.innodb_data_read,		  SHOW_LONG},
374
  {"data_reads",
375
  (char*) &export_vars.innodb_data_reads,		  SHOW_LONG},
376
  {"data_writes",
377
  (char*) &export_vars.innodb_data_writes,		  SHOW_LONG},
378
  {"data_written",
379
  (char*) &export_vars.innodb_data_written,		  SHOW_LONG},
380
  {"dblwr_pages_written",
381
  (char*) &export_vars.innodb_dblwr_pages_written,	  SHOW_LONG},
382
  {"dblwr_writes",
383
  (char*) &export_vars.innodb_dblwr_writes,		  SHOW_LONG},
384
  {"log_waits",
385
  (char*) &export_vars.innodb_log_waits,		  SHOW_LONG},
386
  {"log_write_requests",
387
  (char*) &export_vars.innodb_log_write_requests,	  SHOW_LONG},
388
  {"log_writes",
389
  (char*) &export_vars.innodb_log_writes,		  SHOW_LONG},
390
  {"os_log_fsyncs",
391
  (char*) &export_vars.innodb_os_log_fsyncs,		  SHOW_LONG},
392
  {"os_log_pending_fsyncs",
393
  (char*) &export_vars.innodb_os_log_pending_fsyncs,	  SHOW_LONG},
394
  {"os_log_pending_writes",
395
  (char*) &export_vars.innodb_os_log_pending_writes,	  SHOW_LONG},
396
  {"os_log_written",
397
  (char*) &export_vars.innodb_os_log_written,		  SHOW_LONG},
398
  {"page_size",
399
  (char*) &export_vars.innodb_page_size,		  SHOW_LONG},
400
  {"pages_created",
401
  (char*) &export_vars.innodb_pages_created,		  SHOW_LONG},
402
  {"pages_read",
403
  (char*) &export_vars.innodb_pages_read,		  SHOW_LONG},
404
  {"pages_written",
405
  (char*) &export_vars.innodb_pages_written,		  SHOW_LONG},
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
406
  {"row_lock_current_waits",
407
  (char*) &export_vars.innodb_row_lock_current_waits,	  SHOW_LONG},
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
408
  {"row_lock_time",
409
  (char*) &export_vars.innodb_row_lock_time,		  SHOW_LONGLONG},
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
410
  {"row_lock_time_avg",
411
  (char*) &export_vars.innodb_row_lock_time_avg,	  SHOW_LONG},
paul@kite-hub.kitebird.com's avatar
paul@kite-hub.kitebird.com committed
412
  {"row_lock_time_max",
413
  (char*) &export_vars.innodb_row_lock_time_max,	  SHOW_LONG},
paul@kite-hub.kitebird.com's avatar
paul@kite-hub.kitebird.com committed
414
  {"row_lock_waits",
415
  (char*) &export_vars.innodb_row_lock_waits,		  SHOW_LONG},
416
  {"rows_deleted",
417
  (char*) &export_vars.innodb_rows_deleted,		  SHOW_LONG},
418
  {"rows_inserted",
419
  (char*) &export_vars.innodb_rows_inserted,		  SHOW_LONG},
420
  {"rows_read",
421
  (char*) &export_vars.innodb_rows_read,		  SHOW_LONG},
422
  {"rows_updated",
423
  (char*) &export_vars.innodb_rows_updated,		  SHOW_LONG},
brian@zim.(none)'s avatar
brian@zim.(none) committed
424 425
  {NullS, NullS, SHOW_LONG}
};
426

427 428
/* General functions */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
429 430 431 432 433 434 435 436 437
/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_enter_innodb(
/*=========================*/
	trx_t*	trx)	/* in: transaction handle */
{
438
	if (UNIV_LIKELY(!srv_thread_concurrency)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454

		return;
	}

	srv_conc_enter_innodb(trx);
}

/**********************************************************************
Save some CPU by testing the value of srv_thread_concurrency in inline
functions. */
inline
void
innodb_srv_conc_exit_innodb(
/*========================*/
	trx_t*	trx)	/* in: transaction handle */
{
455
	if (UNIV_LIKELY(!srv_thread_concurrency)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
456 457 458 459 460 461 462

		return;
	}

	srv_conc_exit_innodb(trx);
}

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
463
/**********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
464
Releases possible search latch and InnoDB thread FIFO ticket. These should
465 466 467 468
be released at each SQL statement end, and also when mysqld passes the
control to the client. It does no harm to release these also in the middle
of an SQL statement. */
inline
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484
void
innobase_release_stat_resources(
/*============================*/
	trx_t*	trx)	/* in: transaction object */
{
	if (trx->has_search_latch) {
		trx_search_latch_release_if_reserved(trx);
	}

	if (trx->declared_to_be_inside_innodb) {
		/* Release our possible ticket in the FIFO */

		srv_conc_force_exit_innodb(trx);
	}
}

485 486 487 488 489 490 491
/************************************************************************
Obtain the InnoDB transaction of a MySQL thread. */
inline
trx_t*&
thd_to_trx(
/*=======*/
				/* out: reference to transaction pointer */
492
	THD*		thd)	/* in: MySQL thread */
493
{
494
	return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr));
495 496
}

497 498 499 500
/************************************************************************
Call this function when mysqld passes control to the client. That is to
avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
documentation, see handler.cc. */
501
static
502
int
503 504
innobase_release_temporary_latches(
/*===============================*/
505 506
         handlerton *hton,
	 THD *thd)
507
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
508 509
	trx_t*	trx;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
510
	if (!innodb_inited) {
511

512
		return 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
513 514
	}

515
	trx = thd_to_trx(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
516 517

	if (trx) {
518
		innobase_release_stat_resources(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
519
	}
520
	return 0;
521 522
}

523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539
/************************************************************************
Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth
time calls srv_active_wake_master_thread. This function should be used
when a single database operation may introduce a small need for
server utility activity, like checkpointing. */
inline
void
innobase_active_small(void)
/*=======================*/
{
	innobase_active_counter++;

	if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) {
		srv_active_wake_master_thread();
	}
}

540
/************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
541 542 543
Converts an InnoDB error code to a MySQL error code and also tells to MySQL
about a possible transaction rollback inside InnoDB caused by a lock wait
timeout or a deadlock. */
544 545 546 547 548
static
int
convert_error_code_to_mysql(
/*========================*/
			/* out: MySQL error code */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
549 550
	int	error,	/* in: InnoDB error code */
	THD*	thd)	/* in: user thread handle or NULL */
551 552 553 554 555
{
	if (error == DB_SUCCESS) {

		return(0);

556
	} else if (error == (int) DB_DUPLICATE_KEY) {
557

558
		return(HA_ERR_FOUND_DUPP_KEY);
559

560
	} else if (error == (int) DB_FOREIGN_DUPLICATE_KEY) {
561 562 563

		return(HA_ERR_FOREIGN_DUPLICATE_KEY);

564
	} else if (error == (int) DB_RECORD_NOT_FOUND) {
565

566
		return(HA_ERR_NO_ACTIVE_RECORD);
567

568
	} else if (error == (int) DB_ERROR) {
569

570
		return(-1); /* unspecified error */
571

572 573 574 575
	} else if (error == (int) DB_DEADLOCK) {
		/* Since we rolled back the whole transaction, we must
		tell it also to MySQL so that MySQL knows to empty the
		cached binlog for this transaction */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
576

577 578 579
		if (thd) {
			ha_rollback(thd);
		}
580

581
		return(HA_ERR_LOCK_DEADLOCK);
582

583
	} else if (error == (int) DB_LOCK_WAIT_TIMEOUT) {
584

585 586 587
		/* Starting from 5.0.13, we let MySQL just roll back the
		latest SQL statement in a lock wait timeout. Previously, we
		rolled back the whole transaction. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
588

589 590 591 592
		if (thd && row_rollback_on_timeout) {
			ha_rollback(thd);
		}

593
		return(HA_ERR_LOCK_WAIT_TIMEOUT);
594

595
	} else if (error == (int) DB_NO_REFERENCED_ROW) {
596

597
		return(HA_ERR_NO_REFERENCED_ROW);
598

599
	} else if (error == (int) DB_ROW_IS_REFERENCED) {
600

601
		return(HA_ERR_ROW_IS_REFERENCED);
602

603
	} else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) {
604

605
		return(HA_ERR_CANNOT_ADD_FOREIGN);
606

607
	} else if (error == (int) DB_CANNOT_DROP_CONSTRAINT) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
608

609
		return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
610 611
						misleading, a new MySQL error
						code should be introduced */
612
	} else if (error == (int) DB_COL_APPEARS_TWICE_IN_INDEX) {
613

614
		return(HA_ERR_CRASHED);
615

616
	} else if (error == (int) DB_OUT_OF_FILE_SPACE) {
617

618
		return(HA_ERR_RECORD_FILE_FULL);
619

620
	} else if (error == (int) DB_TABLE_IS_BEING_USED) {
621

622
		return(HA_ERR_WRONG_COMMAND);
623

624
	} else if (error == (int) DB_TABLE_NOT_FOUND) {
625

626
		return(HA_ERR_KEY_NOT_FOUND);
627

628
	} else if (error == (int) DB_TOO_BIG_RECORD) {
629

630
		return(HA_ERR_TO_BIG_ROW);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
631

632
	} else if (error == (int) DB_CORRUPTION) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
633

634 635
		return(HA_ERR_CRASHED);
	} else if (error == (int) DB_NO_SAVEPOINT) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
636

637 638 639 640 641
		return(HA_ERR_NO_SAVEPOINT);
	} else if (error == (int) DB_LOCK_TABLE_FULL) {
 		/* Since we rolled back the whole transaction, we must
 		tell it also to MySQL so that MySQL knows to empty the
 		cached binlog for this transaction */
642

643 644 645
 		if (thd) {
 			ha_rollback(thd);
 		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
646 647

    		return(HA_ERR_LOCK_TABLE_FULL);
648
    	} else {
649
    		return(-1);			// Unknown error
650 651 652
    	}
}

653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670
/*****************************************************************
If you want to print a thd that is not associated with the current thread,
you must call this function before reserving the InnoDB kernel_mutex, to
protect MySQL from setting thd->query NULL. If you print a thd of the current
thread, we know that MySQL cannot modify thd->query, and it is not necessary
to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release
the kernel_mutex.
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_prepare_print_arbitrary_thd(void)
/*============================================*/
{
	VOID(pthread_mutex_lock(&LOCK_thread_count));
}

/*****************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
671
Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd().
672 673 674 675 676 677 678 679 680 681
NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this
function! */
extern "C"
void
innobase_mysql_end_print_arbitrary_thd(void)
/*========================================*/
{
	VOID(pthread_mutex_unlock(&LOCK_thread_count));
}

682
/*****************************************************************
683 684 685
Prints info of a THD object (== user session thread) to the given file.
NOTE that /mysql/innobase/trx/trx0trx.c must contain the prototype for
this function! */
686
extern "C"
687 688 689
void
innobase_mysql_print_thd(
/*=====================*/
690 691
	FILE*	f,		/* in: output stream */
	void*	input_thd,	/* in: pointer to a MySQL THD object */
692 693
	uint	max_query_len)	/* in: max query length to print, or 0 to
				   use the default max length */
694
{
antony@ppcg5.local's avatar
antony@ppcg5.local committed
695
	THD*	thd;
696
        char	buffer[1024];
697

698 699
        thd = (THD*) input_thd;
        fputs(thd_security_context(thd, buffer, sizeof(buffer), 
antony@ppcg5.local's avatar
antony@ppcg5.local committed
700
				   max_query_len), f);
701
        putc('\n', f);
702 703
}

704
/**********************************************************************
705
Get the variable length bounds of the given character set.
706 707 708 709

NOTE that the exact prototype of this function has to be in
/innobase/data/data0type.ic! */
extern "C"
710
void
711 712
innobase_get_cset_width(
/*====================*/
713 714 715
	ulint	cset,		/* in: MySQL charset-collation code */
	ulint*	mbminlen,	/* out: minimum length of a char (in bytes) */
	ulint*	mbmaxlen)	/* out: maximum length of a char (in bytes) */
716 717 718
{
	CHARSET_INFO*	cs;
	ut_ad(cset < 256);
719 720
	ut_ad(mbminlen);
	ut_ad(mbmaxlen);
721 722

	cs = all_charsets[cset];
723 724 725 726 727 728 729
	if (cs) {
		*mbminlen = cs->mbminlen;
		*mbmaxlen = cs->mbmaxlen;
	} else {
		ut_a(cset == 0);
		*mbminlen = *mbmaxlen = 0;
	}
730 731
}

732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747
/**********************************************************************
Converts an identifier to a table name.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
void
innobase_convert_from_table_id(
/*===========================*/
	char*		to,	/* out: converted identifier */
	const char*	from,	/* in: identifier to convert */
	ulint		len)	/* in: length of 'to', in bytes */
{
	uint	errors;

	strconvert(current_thd->charset(), from,
748
		   &my_charset_filename, to, (uint) len, &errors);
749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766
}

/**********************************************************************
Converts an identifier to UTF-8.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
void
innobase_convert_from_id(
/*=====================*/
	char*		to,	/* out: converted identifier */
	const char*	from,	/* in: identifier to convert */
	ulint		len)	/* in: length of 'to', in bytes */
{
	uint	errors;

	strconvert(current_thd->charset(), from,
767
		   system_charset_info, to, (uint) len, &errors);
768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786
}

/**********************************************************************
Removes the filename encoding of a table or database name.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
void
innobase_convert_from_filename(
/*===========================*/
	char*		s)	/* in: identifier; out: decoded identifier */
{
	uint	errors;

	strconvert(&my_charset_filename, s,
		   system_charset_info, s, strlen(s), &errors);
}

787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816
/**********************************************************************
Compares NUL-terminated UTF-8 strings case insensitively.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
int
innobase_strcasecmp(
/*================*/
				/* out: 0 if a=b, <0 if a<b, >1 if a>b */
	const char*	a,	/* in: first string to compare */
	const char*	b)	/* in: second string to compare */
{
	return(my_strcasecmp(system_charset_info, a, b));
}

/**********************************************************************
Makes all characters in a NUL-terminated UTF-8 string lower case.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
void
innobase_casedn_str(
/*================*/
	char*	a)	/* in/out: string to put in lower case */
{
	my_casedn_str(system_charset_info, a);
}

817 818 819 820 821 822 823 824 825 826 827 828 829 830 831
/**************************************************************************
Determines the connection character set.

NOTE that the exact prototype of this function has to be in
/innobase/dict/dict0dict.c! */
extern "C"
struct charset_info_st*
innobase_get_charset(
/*=================*/
				/* out: connection character set */
	void*	mysql_thd)	/* in: MySQL thread handle */
{
	return(((THD*) mysql_thd)->charset());
}

832 833 834 835 836 837 838 839 840
/*************************************************************************
Creates a temporary file. */
extern "C"
int
innobase_mysql_tmpfile(void)
/*========================*/
			/* out: temporary file descriptor, or < 0 on error */
{
	char	filename[FN_REFLEN];
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
841
	int	fd2 = -1;
842
	File	fd = create_temp_file(filename, mysql_tmpdir, "ib",
843 844 845 846 847 848 849
#ifdef __WIN__
				O_BINARY | O_TRUNC | O_SEQUENTIAL |
				O_TEMPORARY | O_SHORT_LIVED |
#endif /* __WIN__ */
				O_CREAT | O_EXCL | O_RDWR,
				MYF(MY_WME));
	if (fd >= 0) {
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
850 851 852 853
#ifndef __WIN__
		/* On Windows, open files cannot be removed, but files can be
		created with the O_TEMPORARY flag to the same effect
		("delete on close"). */
854 855
		unlink(filename);
#endif /* !__WIN__ */
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
856 857 858 859 860 861 862 863 864 865 866 867
		/* Copy the file descriptor, so that the additional resources
		allocated by create_temp_file() can be freed by invoking
		my_close().

		Because the file descriptor returned by this function
		will be passed to fdopen(), it will be closed by invoking
		fclose(), which in turn will invoke close() instead of
		my_close(). */
		fd2 = dup(fd);
		if (fd2 < 0) {
			DBUG_PRINT("error",("Got error %d on dup",fd2));
			my_errno=errno;
868 869 870 871
			my_error(EE_OUT_OF_FILERESOURCES,
				 MYF(ME_BELL+ME_WAITTANG),
				 filename, my_errno);
		}
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
872 873 874
		my_close(fd, MYF(MY_WME));
	}
	return(fd2);
875 876
}

877 878 879 880 881 882 883 884 885 886 887 888 889 890 891
/*************************************************************************
Wrapper around MySQL's copy_and_convert function, see it for
documentation. */
extern "C"
ulint
innobase_convert_string(
/*====================*/
	void*		to,
	ulint		to_length,
	CHARSET_INFO*	to_cs,
	const void*	from,
	ulint		from_length,
	CHARSET_INFO*	from_cs,
	uint*		errors)
{
892 893 894
  return(copy_and_convert((char*)to, (uint32) to_length, to_cs,
                          (const char*)from, (uint32) from_length, from_cs,
                          errors));
895 896
}

897
/*************************************************************************
898 899
Gets the InnoDB transaction handle for a MySQL handler object, creates
an InnoDB transaction struct if the corresponding MySQL thread struct still
900
lacks one. */
901
static
902 903 904
trx_t*
check_trx_exists(
/*=============*/
905
			/* out: InnoDB transaction handle */
906
	handlerton*	hton,	/* in: handlerton for innodb */
907 908
	THD*	thd)	/* in: user thread handle */
{
909
	trx_t*&	trx = thd_to_trx(thd);
910

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
911
	ut_ad(thd == current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
912

913
	if (trx == NULL) {
914
		DBUG_ASSERT(thd != NULL);
915
		trx = trx_allocate_for_mysql();
916

917
		trx->mysql_thd = thd;
918
		trx->mysql_query_str = &(thd->query);
919
		trx->active_trans = 0;
920

921 922
		/* Update the info whether we should skip XA steps that eat
		CPU time */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
923
		trx->support_xa = THDVAR(thd, support_xa);
924

925
		thd_to_trx(thd) = trx;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
926
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
927
		if (trx->magic_n != TRX_MAGIC_N) {
928
			mem_analyze_corruption(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
929 930 931 932 933

			ut_a(0);
		}
	}

antony@ppcg5.local's avatar
antony@ppcg5.local committed
934
	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
935 936 937 938 939
		trx->check_foreigns = FALSE;
	} else {
		trx->check_foreigns = TRUE;
	}

antony@ppcg5.local's avatar
antony@ppcg5.local committed
940
	if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
941 942 943
		trx->check_unique_secondary = FALSE;
	} else {
		trx->check_unique_secondary = TRUE;
944 945 946 947 948
	}

	return(trx);
}

949 950 951 952

/*************************************************************************
Construct ha_innobase handler. */

953
ha_innobase::ha_innobase(handlerton *hton, TABLE_SHARE *table_arg)
954
  :handler(hton, table_arg),
955
  int_table_flags(HA_REC_NOT_IN_SEQ |
956 957 958
		  HA_NULL_IN_KEY |
		  HA_CAN_INDEX_BLOBS |
		  HA_CAN_SQL_HANDLER |
959
		  HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
960
		  HA_PRIMARY_KEY_IN_READ_INDEX |
961
                  HA_BINLOG_ROW_CAPABLE |
962
		  HA_CAN_GEOMETRY | HA_PARTIAL_COLUMN_READ |
963
		  HA_TABLE_SCAN_ON_INDEX),
964 965 966 967
  start_of_scan(0),
  num_write_row(0)
{}

968
/*************************************************************************
969
Updates the user_thd field in a handle and also allocates a new InnoDB
970 971
transaction handle if needed, and updates the transaction fields in the
prebuilt struct. */
972
inline
973 974 975 976 977 978
int
ha_innobase::update_thd(
/*====================*/
			/* out: 0 or error code */
	THD*	thd)	/* in: thd to use the handle */
{
979
	trx_t*		trx;
980

981
	trx = check_trx_exists(ht, thd);
982

983
	if (prebuilt->trx != trx) {
984

985
		row_update_prebuilt_trx(prebuilt, trx);
986 987 988
	}

	user_thd = thd;
989

990 991 992
	return(0);
}

993
/*************************************************************************
994 995 996 997 998
Registers that InnoDB takes part in an SQL statement, so that MySQL knows to
roll back the statement if the statement results in an error. This MUST be
called for every SQL statement that may be rolled back by MySQL. Calling this
several times to register the same statement is allowed, too. */
inline
999
void
1000 1001
innobase_register_stmt(
/*===================*/
1002
        handlerton*	hton,	/* in: Innobase hton */
1003
	THD*	thd)	/* in: MySQL thd (connection) object */
1004
{
1005
	/* Register the statement */
1006
	trans_register_ha(thd, FALSE, hton);
1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019
}

/*************************************************************************
Registers an InnoDB transaction in MySQL, so that the MySQL XA code knows
to call the InnoDB prepare and commit, or rollback for the transaction. This
MUST be called for every transaction for which the user may call commit or
rollback. Calling this several times to register the same transaction is
allowed, too.
This function also registers the current SQL statement. */
inline
void
innobase_register_trx_and_stmt(
/*===========================*/
1020
        handlerton *hton, /* in: Innobase handlerton */
1021 1022 1023 1024 1025
	THD*	thd)	/* in: MySQL thd (connection) object */
{
	/* NOTE that actually innobase_register_stmt() registers also
	the transaction in the AUTOCOMMIT=1 mode. */

1026
	innobase_register_stmt(hton, thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1027

antony@ppcg5.local's avatar
antony@ppcg5.local committed
1028
	if (thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
1029

1030
		/* No autocommit mode, register for a transaction */
1031
		trans_register_ha(thd, TRUE, hton);
1032
	}
1033
}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1034 1035 1036 1037 1038 1039 1040 1041 1042 1043

/*   BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB
     ------------------------------------------------------------

1) The use of the query cache for TBL is disabled when there is an
uncommitted change to TBL.

2) When a change to TBL commits, InnoDB stores the current value of
its global trx id counter, let us denote it by INV_TRX_ID, to the table object
in the InnoDB data dictionary, and does only allow such transactions whose
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1044
id <= INV_TRX_ID to use the query cache.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087

3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit
modification because an ON DELETE CASCADE, we invalidate the MySQL query cache
of TBL immediately.

How this is implemented inside InnoDB:

1) Since every modification always sets an IX type table lock on the InnoDB
table, it is easy to check if there can be uncommitted modifications for a
table: just check if there are locks in the lock list of the table.

2) When a transaction inside InnoDB commits, it reads the global trx id
counter and stores the value INV_TRX_ID to the tables on which it had a lock.

3) If there is an implicit table change from ON DELETE CASCADE or SET NULL,
InnoDB calls an invalidate method for the MySQL query cache for that table.

How this is implemented inside sql_cache.cc:

1) The query cache for an InnoDB table TBL is invalidated immediately at an
INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay
invalidation to the transaction commit.

2) To store or retrieve a value from the query cache of an InnoDB table TBL,
any query must first ask InnoDB's permission. We must pass the thd as a
parameter because InnoDB will look at the trx id, if any, associated with
that thd.

3) Use of the query cache for InnoDB tables is now allowed also when
AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer
put restrictions on the use of the query cache.
*/

/**********************************************************************
The MySQL query cache uses this to check from InnoDB if the query cache at
the moment is allowed to operate on an InnoDB table. The SQL query must
be a non-locking SELECT.

The query cache is allowed to operate on certain query only if this function
returns TRUE for all tables in the query.

If thd is not in the autocommit state, this function also starts a new
transaction for thd if there is no active trx yet, and assigns a consistent
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1088 1089 1090 1091 1092 1093 1094 1095
read view to it if there is no read view yet.

Why a deadlock of threads is not possible: the query cache calls this function
at the start of a SELECT processing. Then the calling thread cannot be
holding any InnoDB semaphores. The calling thread is holding the
query cache mutex, and this function will reserver the InnoDB kernel mutex.
Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above
the InnoDB kernel mutex. */
1096
static
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1097
my_bool
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109
innobase_query_caching_of_table_permitted(
/*======================================*/
				/* out: TRUE if permitted, FALSE if not;
				note that the value FALSE does not mean
				we should invalidate the query cache:
				invalidation is called explicitly */
	THD*	thd,		/* in: thd of the user who is trying to
				store a result to the query cache or
				retrieve it */
	char*	full_name,	/* in: concatenation of database name,
				the null character '\0', and the table
				name */
1110
	uint	full_name_len,	/* in: length of the full name, i.e.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1111
				len(dbname) + len(tablename) + 1 */
1112
	ulonglong *unused)	/* unused for this engine */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1113 1114 1115 1116 1117 1118 1119
{
	ibool	is_autocommit;
	trx_t*	trx;
	char	norm_name[1000];

	ut_a(full_name_len < 999);

1120
	trx = check_trx_exists(innodb_hton_ptr, thd);
antony@ppcg5.local's avatar
antony@ppcg5.local committed
1121 1122

	if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1123
		/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1124
		plain SELECT if AUTOCOMMIT is not on. */
1125

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1126 1127 1128
		return((my_bool)FALSE);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1129 1130
	if (trx->has_search_latch) {
		ut_print_timestamp(stderr);
1131 1132 1133
		sql_print_error("The calling thread is holding the adaptive "
				"search, latch though calling "
				"innobase_query_caching_of_table_permitted.");
1134 1135 1136 1137

		mutex_enter_noninline(&kernel_mutex);
		trx_print(stderr, trx, 1024);
		mutex_exit_noninline(&kernel_mutex);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1138 1139
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1140 1141
	innobase_release_stat_resources(trx);

antony@ppcg5.local's avatar
antony@ppcg5.local committed
1142
	if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1143 1144 1145 1146 1147 1148 1149

		is_autocommit = TRUE;
	} else {
		is_autocommit = FALSE;

	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
	if (is_autocommit && trx->n_mysql_tables_in_use == 0) {
		/* We are going to retrieve the query result from the query
		cache. This cannot be a store operation to the query cache
		because then MySQL would have locks on tables already.

		TODO: if the user has used LOCK TABLES to lock the table,
		then we open a transaction in the call of row_.. below.
		That trx can stay open until UNLOCK TABLES. The same problem
		exists even if we do not use the query cache. MySQL should be
		modified so that it ALWAYS calls some cleanup function when
		the processing of a query ends!

		We can imagine we instantaneously serialize this consistent
		read trx to the current trx id counter. If trx2 would have
		changed the tables of a query result stored in the cache, and
		trx2 would have already committed, making the result obsolete,
		then trx2 would have already invalidated the cache. Thus we
		can trust the result in the cache is ok for this query. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1168 1169 1170

		return((my_bool)TRUE);
	}
1171

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1172 1173 1174 1175 1176 1177 1178 1179
	/* Normalize the table name to InnoDB format */

	memcpy(norm_name, full_name, full_name_len);

	norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the
					    separator between db and table */
	norm_name[full_name_len] = '\0';
#ifdef __WIN__
1180
	innobase_casedn_str(norm_name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1181
#endif
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1182 1183 1184
	/* The call of row_search_.. will start a new transaction if it is
	not yet started */

1185
	if (trx->active_trans == 0) {
1186

1187
		innobase_register_trx_and_stmt(innodb_hton_ptr, thd);
1188 1189
		trx->active_trans = 1;
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1190

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1191 1192
	if (row_search_check_if_query_cache_permitted(trx, norm_name)) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1193
		/* printf("Query cache for %s permitted\n", norm_name); */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1194 1195 1196 1197

		return((my_bool)TRUE);
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1198
	/* printf("Query cache for %s NOT permitted\n", norm_name); */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1199 1200 1201 1202 1203 1204 1205 1206

	return((my_bool)FALSE);
}

/*********************************************************************
Invalidates the MySQL query cache for the table.
NOTE that the exact prototype of this function has to be in
/innobase/row/row0ins.c! */
1207
extern "C"
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1208 1209 1210 1211 1212
void
innobase_invalidate_query_cache(
/*============================*/
	trx_t*	trx,		/* in: transaction which modifies the table */
	char*	full_name,	/* in: concatenation of database name, null
1213 1214 1215 1216 1217
				char '\0', table name, null char'\0';
				NOTE that in Windows this is always
				in LOWER CASE! */
	ulint	full_name_len)	/* in: full name length where also the null
				chars count */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1218
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1219 1220 1221 1222
	/* Note that the sync0sync.h rank of the query cache mutex is just
	above the InnoDB kernel mutex. The caller of this function must not
	have latches of a lower rank. */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1223
	/* Argument TRUE below means we are using transactions */
1224
#ifdef HAVE_QUERY_CACHE
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1225 1226 1227 1228
	query_cache.invalidate((THD*)(trx->mysql_thd),
					(const char*)full_name,
					(uint32)full_name_len,
					TRUE);
1229
#endif
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1230
}
1231 1232

/*********************************************************************
1233
Display an SQL identifier.
1234
This definition must match the one in innobase/ut/ut0ut.c! */
1235
extern "C"
1236 1237 1238 1239
void
innobase_print_identifier(
/*======================*/
	FILE*		f,	/* in: output stream */
1240
	trx_t*		trx,	/* in: transaction */
1241 1242
	ibool		table_id,/* in: TRUE=print a table name,
				FALSE=print other identifier */
1243 1244 1245
	const char*	name,	/* in: name to print */
	ulint		namelen)/* in: length of name */
{
1246 1247 1248 1249 1250 1251 1252 1253 1254 1255
	const char*	s	= name;
	char*		qname	= NULL;
	int		q;

	if (table_id) {
		/* Decode the table name.  The filename_to_tablename()
		function expects a NUL-terminated string.  The input and
		output strings buffers must not be shared.  The function
		only produces more output when the name contains other
		characters than [0-9A-Z_a-z]. */
1256
          char*	temp_name = (char*) my_malloc((uint) namelen + 1, MYF(MY_WME));
1257 1258
          uint	qnamelen = (uint) (namelen
                                   + (1 + sizeof srv_mysql50_table_name_prefix));
1259 1260

		if (temp_name) {
1261
                  qname = (char*) my_malloc(qnamelen, MYF(MY_WME));
1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272
			if (qname) {
				memcpy(temp_name, name, namelen);
				temp_name[namelen] = 0;
				s = qname;
				namelen = filename_to_tablename(temp_name,
						qname, qnamelen);
			}
			my_free(temp_name, MYF(0));
		}
	}

1273
	if (!trx || !trx->mysql_thd) {
1274 1275 1276 1277 1278

		q = '"';
	} else {
		q = get_quote_char_for_identifier((THD*) trx->mysql_thd,
						s, (int) namelen);
1279
	}
1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296

	if (q == EOF) {
		fwrite(s, 1, namelen, f);
	} else {
		const char*	e = s + namelen;
		putc(q, f);
		while (s < e) {
			int	c = *s++;
			if (c == q) {
				putc(c, f);
			}
			putc(c, f);
		}
		putc(q, f);
	}

	my_free(qname, MYF(MY_ALLOW_ZERO_PTR));
1297 1298
}

1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310
/**************************************************************************
Determines if the currently running transaction has been interrupted. */
extern "C"
ibool
trx_is_interrupted(
/*===============*/
			/* out: TRUE if interrupted */
	trx_t*	trx)	/* in: transaction */
{
	return(trx && trx->mysql_thd && ((THD*) trx->mysql_thd)->killed);
}

1311 1312 1313 1314
/*********************************************************************
Call this when you have opened a new table handle in HANDLER, before you
call index_read_idx() etc. Actually, we can let the cursor stay open even
over a transaction commit! Then you should call this before every operation,
1315
fetch next etc. This function inits the necessary things even after a
1316 1317 1318 1319 1320 1321
transaction commit. */

void
ha_innobase::init_table_handle_for_HANDLER(void)
/*============================================*/
{
1322 1323 1324 1325
	/* If current thd does not yet have a trx struct, create one.
	If the current handle does not yet have a prebuilt struct, create
	one. Update the trx pointers in the prebuilt struct. Normally
	this operation is done in external_lock. */
1326

1327
	update_thd(ha_thd());
1328

1329 1330
	/* Initialize the prebuilt struct much like it would be inited in
	external_lock */
1331

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1332 1333
	innobase_release_stat_resources(prebuilt->trx);

1334
	/* If the transaction is not started yet, start it */
1335

1336
	trx_start_if_not_started_noninline(prebuilt->trx);
1337

1338
	/* Assign a read view if the transaction does not have it yet */
1339

1340
	trx_assign_read_view(prebuilt->trx);
1341

1342 1343
	/* Set the MySQL flag to mark that there is an active transaction */

1344
	if (prebuilt->trx->active_trans == 0) {
1345

1346
		innobase_register_trx_and_stmt(ht, ha_thd());
1347

1348 1349
		prebuilt->trx->active_trans = 1;
	}
1350

1351 1352
	/* We did the necessary inits in this function, no need to repeat them
	in row_search_for_mysql */
1353

1354
	prebuilt->sql_stat_start = FALSE;
1355

1356 1357
	/* We let HANDLER always to do the reads as consistent reads, even
	if the trx isolation level would have been specified as SERIALIZABLE */
1358

1359 1360
	prebuilt->select_lock_type = LOCK_NONE;
	prebuilt->stored_select_lock_type = LOCK_NONE;
1361

1362
	/* Always fetch all columns in the index record */
1363

1364
	prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
1365

1366
	/* We want always to fetch all columns in the whole row? Or do
1367 1368
	we???? */

1369
	prebuilt->read_just_key = FALSE;
1370 1371

	prebuilt->used_in_HANDLER = TRUE;
1372 1373

	prebuilt->keep_other_fields_on_keyread = FALSE;
1374 1375
}

1376
/*************************************************************************
1377
Opens an InnoDB database. */
1378
static
1379
int
1380
innobase_init(void *p)
1381 1382
/*===============*/
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1383
	static char	current_dir[3];		/* Set if using current lib */
1384 1385
	int		err;
	bool		ret;
1386
	char		*default_path;
monty@hundin.mysql.fi's avatar
merge  
monty@hundin.mysql.fi committed
1387

1388
	DBUG_ENTER("innobase_init");
1389
        handlerton *innobase_hton= (handlerton *)p;
1390
        innodb_hton_ptr= innobase_hton;
1391

antony@ppcg5.local's avatar
antony@ppcg5.local committed
1392
        innobase_hton->state= SHOW_OPTION_YES;
1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415
        innobase_hton->db_type= DB_TYPE_INNODB;
        innobase_hton->savepoint_offset=sizeof(trx_named_savept_t);
        innobase_hton->close_connection=innobase_close_connection;
        innobase_hton->savepoint_set=innobase_savepoint;
        innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint;
        innobase_hton->savepoint_release=innobase_release_savepoint;
        innobase_hton->commit=innobase_commit;
        innobase_hton->rollback=innobase_rollback;
        innobase_hton->prepare=innobase_xa_prepare;
        innobase_hton->recover=innobase_xa_recover;
        innobase_hton->commit_by_xid=innobase_commit_by_xid;
        innobase_hton->rollback_by_xid=innobase_rollback_by_xid;
        innobase_hton->create_cursor_read_view=innobase_create_cursor_view;
        innobase_hton->set_cursor_read_view=innobase_set_cursor_view;
        innobase_hton->close_cursor_read_view=innobase_close_cursor_view;
        innobase_hton->create=innobase_create_handler;
        innobase_hton->drop_database=innobase_drop_database;
        innobase_hton->panic=innobase_end;
        innobase_hton->start_consistent_snapshot=innobase_start_trx_and_assign_read_view;
        innobase_hton->flush_logs=innobase_flush_logs;
        innobase_hton->show_status=innobase_show_status;
        innobase_hton->flags=HTON_NO_FLAGS;
        innobase_hton->release_temporary_latches=innobase_release_temporary_latches;
1416

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1417 1418
	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);

1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436
#ifdef UNIV_DEBUG
	static const char	test_filename[] = "-@";
	char			test_tablename[sizeof test_filename
				+ sizeof srv_mysql50_table_name_prefix];
	if ((sizeof test_tablename) - 1
			!= filename_to_tablename(test_filename, test_tablename,
			sizeof test_tablename)
			|| strncmp(test_tablename,
			srv_mysql50_table_name_prefix,
			sizeof srv_mysql50_table_name_prefix)
			|| strcmp(test_tablename
			+ sizeof srv_mysql50_table_name_prefix,
			test_filename)) {
		sql_print_error("tablename encoding has been changed");
		goto error;
	}
#endif /* UNIV_DEBUG */

1437 1438 1439 1440 1441 1442 1443
	/* Check that values don't overflow on 32-bit systems. */
	if (sizeof(ulint) == 4) {
		if (innobase_buffer_pool_size > UINT_MAX32) {
			sql_print_error(
				"innobase_buffer_pool_size can't be over 4GB"
				" on 32-bit systems");

1444
			goto error;
1445 1446 1447 1448 1449 1450 1451
		}

		if (innobase_log_file_size > UINT_MAX32) {
			sql_print_error(
				"innobase_log_file_size can't be over 4GB"
				" on 32-bit systems");

1452
			goto error;
1453 1454 1455
		}
	}

1456
	os_innodb_umask = (ulint)my_umask;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1457

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1458 1459 1460 1461 1462 1463
	/* First calculate the default path for innodb_data_home_dir etc.,
	in case the user has not given any value.

	Note that when using the embedded server, the datadirectory is not
	necessarily the current directory of this program. */

1464
	if (mysqld_embedded) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1465
		default_path = mysql_real_data_home;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1466
		fil_path_to_mysql_datadir = mysql_real_data_home;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1467
	} else {
1468 1469 1470 1471 1472
		/* It's better to use current lib, to keep paths short */
		current_dir[0] = FN_CURLIB;
		current_dir[1] = FN_LIBCHAR;
		current_dir[2] = 0;
		default_path = current_dir;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1473 1474
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1475 1476
	ut_a(default_path);

1477
	if (specialflag & SPECIAL_NO_PRIOR) {
1478
		srv_set_thread_priorities = FALSE;
1479
	} else {
1480 1481
		srv_set_thread_priorities = TRUE;
		srv_query_thread_priority = QUERY_PRIOR;
1482
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1483

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1484 1485
	/* Set InnoDB initialization parameters according to the values
	read from MySQL .cnf file */
1486

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1487
	/*--------------- Data files -------------------------*/
1488

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1489
	/* The default dir for data files is the datadir of MySQL */
1490 1491

	srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1492
			 default_path);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1493

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1494
	/* Set default InnoDB data file size to 10 MB and let it be
1495
	auto-extending. Thus users can use InnoDB in >= 4.0 without having
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1496 1497 1498
	to specify any startup options. */

	if (!innobase_data_file_path) {
1499
		innobase_data_file_path = (char*) "ibdata1:10M:autoextend";
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1500 1501 1502 1503 1504 1505
	}

	/* Since InnoDB edits the argument in the next call, we make another
	copy of it: */

	internal_innobase_data_file_path = my_strdup(innobase_data_file_path,
osku@127.(none)'s avatar
osku@127.(none) committed
1506
						   MYF(MY_FAE));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1507 1508 1509

	ret = (bool) srv_parse_data_file_paths_and_sizes(
				internal_innobase_data_file_path,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1510 1511 1512 1513 1514 1515
				&srv_data_file_names,
				&srv_data_file_sizes,
				&srv_data_file_is_raw_partition,
				&srv_n_data_files,
				&srv_auto_extend_last_data_file,
				&srv_last_file_size_max);
1516
	if (ret == FALSE) {
1517
		sql_print_error(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1518
			"InnoDB: syntax error in innodb_data_file_path");
1519
		my_free(internal_innobase_data_file_path,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1520
						MYF(MY_ALLOW_ZERO_PTR));
1521
		goto error;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1522
	}
1523

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1524 1525 1526
	/* -------------- Log files ---------------------------*/

	/* The default dir for log files is the datadir of MySQL */
1527

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1528
	if (!innobase_log_group_home_dir) {
1529
		innobase_log_group_home_dir = default_path;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1530
	}
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
1531

1532
#ifdef UNIV_LOG_ARCHIVE
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1533 1534 1535 1536 1537 1538 1539
	/* Since innodb_log_arch_dir has no relevance under MySQL,
	starting from 4.0.6 we always set it the same as
	innodb_log_group_home_dir: */

	innobase_log_arch_dir = innobase_log_group_home_dir;

	srv_arch_dir = innobase_log_arch_dir;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
1540
#endif /* UNIG_LOG_ARCHIVE */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1541

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1542 1543 1544
	ret = (bool)
		srv_parse_log_group_home_dirs(innobase_log_group_home_dir,
						&srv_log_group_home_dirs);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1545

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1546
	if (ret == FALSE || innobase_mirrored_log_groups != 1) {
1547 1548
	  sql_print_error("syntax error in innodb_log_group_home_dir, or a "
			  "wrong number of mirrored log groups");
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1549

1550
		my_free(internal_innobase_data_file_path,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1551
						MYF(MY_ALLOW_ZERO_PTR));
1552
		goto error;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1553
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1554

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1555 1556 1557
	/* --------------------------------------------------*/

	srv_file_flush_method_str = innobase_unix_file_flush_method;
1558

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1559
	srv_n_log_groups = (ulint) innobase_mirrored_log_groups;
1560
	srv_n_log_files = (ulint) innobase_log_files_in_group;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1561 1562
	srv_log_file_size = (ulint) innobase_log_file_size;

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
1563
#ifdef UNIV_LOG_ARCHIVE
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1564
	srv_log_archive_on = (ulint) innobase_log_archive;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
1565
#endif /* UNIV_LOG_ARCHIVE */
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1566
	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
1567

1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586
	/* We set srv_pool_size here in units of 1 kB. InnoDB internally
	changes the value so that it becomes the number of database pages. */

	if (innobase_buffer_pool_awe_mem_mb == 0) {
		/* Careful here: we first convert the signed long int to ulint
		and only after that divide */

		srv_pool_size = ((ulint) innobase_buffer_pool_size) / 1024;
	} else {
		srv_use_awe = TRUE;
		srv_pool_size = (ulint)
				(1024 * innobase_buffer_pool_awe_mem_mb);
		srv_awe_window_size = (ulint) innobase_buffer_pool_size;

		/* Note that what the user specified as
		innodb_buffer_pool_size is actually the AWE memory window
		size in this case, and the real buffer pool size is
		determined by .._awe_mem_mb. */
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1587

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1588 1589 1590
	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;

	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
1591

1592
	srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
1593 1594
	srv_force_recovery = (ulint) innobase_force_recovery;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1595 1596
	srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
	srv_use_checksums = (ibool) innobase_use_checksums;
1597

antony@ppcg5.local's avatar
antony@ppcg5.local committed
1598 1599 1600 1601
#ifdef HAVE_LARGE_PAGES
        if ((os_use_large_pages = (ibool) my_use_large_pages))
		os_large_page_size = (ulint) opt_large_page_size;
#endif
1602

1603 1604
	row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1605
	srv_file_per_table = (ibool) innobase_file_per_table;
1606
	srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1607 1608

	srv_max_n_open_files = (ulint) innobase_open_files;
1609
	srv_innodb_status = (ibool) innobase_create_status_file;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1610

1611 1612
	srv_stats_on_metadata = (ibool) innobase_stats_on_metadata;

1613
	srv_print_verbose_log = mysqld_embedded ? 0 : 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1614

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1615
	/* Store the default charset-collation number of this MySQL
1616
	installation */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1617

1618
	data_mysql_default_charset_coll = (ulint)default_charset_info->number;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1619

1620 1621
	ut_a(DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL ==
					my_charset_latin1.number);
1622
	ut_a(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number);
1623

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1624 1625 1626 1627
	/* Store the latin1_swedish_ci character ordering table to InnoDB. For
	non-latin1_swedish_ci charsets we use the MySQL comparison functions,
	and consequently we do not need to know the ordering internally in
	InnoDB. */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
1628

1629
	ut_a(0 == strcmp((char*)my_charset_latin1.name,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1630 1631
						(char*)"latin1_swedish_ci"));
	memcpy(srv_latin1_ordering, my_charset_latin1.sort_order, 256);
1632

1633
	/* Since we in this module access directly the fields of a trx
1634
	struct, and due to different headers and flags it might happen that
1635 1636 1637 1638 1639 1640
	mutex_t has a different size in this module and in InnoDB
	modules, we check at run time that the size is the same in
	these compilation modules. */

	srv_sizeof_trx_t_in_ha_innodb_cc = sizeof(trx_t);

monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1641
	err = innobase_start_or_create_for_mysql();
1642 1643

	if (err != DB_SUCCESS) {
1644
		my_free(internal_innobase_data_file_path,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1645
						MYF(MY_ALLOW_ZERO_PTR));
1646
		goto error;
1647
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1648 1649

	(void) hash_init(&innobase_open_tables,system_charset_info, 32, 0, 0,
1650 1651 1652 1653 1654 1655
					(hash_get_key) innobase_get_key, 0, 0);
	pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST);
	pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST);
	pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST);
	pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST);
	pthread_cond_init(&commit_cond, NULL);
1656
	innodb_inited= 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1657

1658 1659
	DBUG_RETURN(FALSE);
error:
1660
	DBUG_RETURN(TRUE);
1661 1662 1663
}

/***********************************************************************
1664
Closes an InnoDB database. */
1665
static
1666
int
1667
innobase_end(handlerton *hton, ha_panic_function type)
1668
/*==============*/
1669
				/* out: TRUE if error */
1670
{
1671
	int	err= 0;
1672 1673 1674

	DBUG_ENTER("innobase_end");

1675
#ifdef __NETWARE__	/* some special cleanup for NetWare */
1676 1677 1678 1679
	if (nw_panic) {
		set_panic_flag_for_netware();
	}
#endif
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1680
	if (innodb_inited) {
1681

1682 1683 1684 1685
		srv_fast_shutdown = (ulint) innobase_fast_shutdown;
		innodb_inited = 0;
		if (innobase_shutdown_for_mysql() != DB_SUCCESS) {
			err = 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1686
		}
1687 1688
		hash_free(&innobase_open_tables);
		my_free(internal_innobase_data_file_path,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1689
						MYF(MY_ALLOW_ZERO_PTR));
1690 1691 1692 1693 1694
		pthread_mutex_destroy(&innobase_share_mutex);
		pthread_mutex_destroy(&prepare_commit_mutex);
		pthread_mutex_destroy(&commit_threads_m);
		pthread_mutex_destroy(&commit_cond_m);
		pthread_cond_destroy(&commit_cond);
1695
	}
1696

1697
	DBUG_RETURN(err);
1698 1699 1700
}

/********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1701 1702
Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes
the logs, and the name of this function should be innobase_checkpoint. */
1703
static
1704
bool
1705
innobase_flush_logs(handlerton *hton)
1706
/*=====================*/
1707
				/* out: TRUE if error */
1708
{
1709
	bool	result = 0;
1710

1711
	DBUG_ENTER("innobase_flush_logs");
1712

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1713
	log_buffer_flush_to_disk();
1714

1715
	DBUG_RETURN(result);
1716 1717 1718
}

/*********************************************************************
1719
Commits a transaction in an InnoDB database. */
1720
static
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1721 1722 1723 1724 1725
void
innobase_commit_low(
/*================*/
	trx_t*	trx)	/* in: transaction handle */
{
1726
	if (trx->conc_state == TRX_NOT_STARTED) {
1727

1728 1729
		return;
	}
1730

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1731
	trx_commit_for_mysql(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1732 1733
}

1734 1735 1736 1737 1738
/*********************************************************************
Creates an InnoDB transaction struct for the thd if it does not yet have one.
Starts a new InnoDB transaction if a transaction is not yet started. And
assigns a new snapshot for a consistent read if the transaction does not yet
have one. */
1739
static
1740 1741 1742 1743
int
innobase_start_trx_and_assign_read_view(
/*====================================*/
			/* out: 0 */
1744
        handlerton *hton, /* in: Innodb handlerton */ 
1745 1746 1747 1748 1749
	THD*	thd)	/* in: MySQL thread handle of the user for whom
			the transaction should be committed */
{
	trx_t*	trx;

1750
	DBUG_ENTER("innobase_start_trx_and_assign_read_view");
1751 1752 1753

	/* Create a new trx struct for thd, if it does not yet have one */

1754
	trx = check_trx_exists(hton, thd);
1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771

	/* This is just to play safe: release a possible FIFO ticket and
	search latch. Since we will reserve the kernel mutex, we have to
	release the search system latch first to obey the latching order. */

	innobase_release_stat_resources(trx);

	/* If the transaction is not started yet, start it */

	trx_start_if_not_started_noninline(trx);

	/* Assign a read view if the transaction does not have it yet */

	trx_assign_read_view(trx);

	/* Set the MySQL flag to mark that there is an active transaction */

1772
	if (trx->active_trans == 0) {
1773
		innobase_register_trx_and_stmt(hton, current_thd);
1774 1775
		trx->active_trans = 1;
	}
1776 1777 1778 1779

	DBUG_RETURN(0);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1780
/*********************************************************************
1781 1782
Commits a transaction in an InnoDB database or marks an SQL statement
ended. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1783 1784
static
int
1785 1786
innobase_commit(
/*============*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1787
			/* out: 0 */
1788 1789
        handlerton *hton, /* in: Innodb handlerton */ 
	THD* 	thd,	/* in: MySQL thread handle of the user for whom
1790
			the transaction should be committed */
1791 1792
	bool	all)	/* in:	TRUE - commit transaction
				FALSE - the current SQL statement ended */
1793
{
1794
	trx_t*		trx;
1795

1796 1797
	DBUG_ENTER("innobase_commit");
	DBUG_PRINT("trans", ("ending transaction"));
1798

1799
	trx = check_trx_exists(hton, thd);
1800

1801
	/* Update the info whether we should skip XA steps that eat CPU time */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
1802
	trx->support_xa = THDVAR(thd, support_xa);
1803

1804 1805
	/* Since we will reserve the kernel mutex, we have to release
	the search system latch first to obey the latching order. */
1806

1807
	if (trx->has_search_latch) {
1808
		trx_search_latch_release_if_reserved(trx);
1809 1810 1811
	}

	/* The flag trx->active_trans is set to 1 in
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1812 1813 1814

	1. ::external_lock(),
	2. ::start_stmt(),
1815
	3. innobase_query_caching_of_table_permitted(),
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1816
	4. innobase_savepoint(),
1817
	5. ::init_table_handle_for_HANDLER(),
1818 1819
	6. innobase_start_trx_and_assign_read_view(),
	7. ::transactional_table_lock()
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1820 1821 1822 1823 1824

	and it is only set to 0 in a commit or a rollback. If it is 0 we know
	there cannot be resources to be freed and we could return immediately.
	For the time being, we play safe and do the cleanup though there should
	be nothing to clean up. */
1825

1826 1827 1828 1829 1830
	if (trx->active_trans == 0
		&& trx->conc_state != TRX_NOT_STARTED) {

		sql_print_error("trx->active_trans == 0, but"
			" trx->conc_state != TRX_NOT_STARTED");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1831
	}
1832
	if (all
antony@ppcg5.local's avatar
antony@ppcg5.local committed
1833
		|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
1834 1835

		/* We were instructed to commit the whole transaction, or
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1836 1837
		this is an SQL statement end and autocommit is on */

1838 1839 1840
		/* We need current binlog position for ibbackup to work.
		Note, the position is current because of
		prepare_commit_mutex */
1841
retry:
1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860
		if (srv_commit_concurrency > 0) {
			pthread_mutex_lock(&commit_cond_m);
			commit_threads++;

			if (commit_threads > srv_commit_concurrency) {
				commit_threads--;
				pthread_cond_wait(&commit_cond,
					&commit_cond_m);
				pthread_mutex_unlock(&commit_cond_m);
				goto retry;
			}
			else {
				pthread_mutex_unlock(&commit_cond_m);
			}
		}

		trx->mysql_log_file_name = mysql_bin_log.get_log_fname();
		trx->mysql_log_offset =
			(ib_longlong)mysql_bin_log.get_log_file()->pos_in_file;
serg@serg.mylan's avatar
serg@serg.mylan committed
1861

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1862
		innobase_commit_low(trx);
1863

1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877
		if (srv_commit_concurrency > 0) {
			pthread_mutex_lock(&commit_cond_m);
			commit_threads--;
			pthread_cond_signal(&commit_cond);
			pthread_mutex_unlock(&commit_cond_m);
		}

		if (trx->active_trans == 2) {

			pthread_mutex_unlock(&prepare_commit_mutex);
		}

		trx->active_trans = 0;

1878
	} else {
1879
		/* We just mark the SQL statement ended and do not do a
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1880 1881
		transaction commit */

1882 1883 1884
		if (trx->auto_inc_lock) {
			/* If we had reserved the auto-inc lock for some
			table in this SQL statement we release it now */
1885

1886 1887 1888 1889 1890 1891 1892
			row_unlock_table_autoinc_for_mysql(trx);
		}
		/* Store the current undo_no of the transaction so that we
		know where to roll back if we have to roll back the next
		SQL statement */

		trx_mark_sql_stat_end(trx);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
1893
	}
1894

1895
	if (trx->declared_to_be_inside_innodb) {
1896
		/* Release our possible ticket in the FIFO */
1897

1898
		srv_conc_force_exit_innodb(trx);
1899
	}
1900 1901 1902

	/* Tell the InnoDB server that there might be work for utility
	threads: */
1903 1904
	srv_active_wake_master_thread();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1905
	DBUG_RETURN(0);
1906 1907
}

1908
#if 0
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1909
/* TODO: put the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1910 1911
MySQL-4.1 functionality back to 5.0. This is needed to get InnoDB Hot Backup
to work. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1912

1913 1914 1915 1916
/*********************************************************************
This is called when MySQL writes the binlog entry for the current
transaction. Writes to the InnoDB tablespace info which tells where the
MySQL binlog entry for the current transaction ended. Also commits the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1917
transaction inside InnoDB but does NOT flush InnoDB log files to disk.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1918
To flush you have to call innobase_commit_complete(). We have separated
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1919 1920
flushing to eliminate the bottleneck of LOCK_log in log.cc which disabled
InnoDB's group commit capability. */
1921
static
1922 1923 1924
int
innobase_report_binlog_offset_and_commit(
/*=====================================*/
1925
				/* out: 0 */
1926
        handlerton *hton, /* in: Innodb handlerton */ 
1927 1928 1929 1930 1931
	THD*	thd,		/* in: user thread */
	void*	trx_handle,	/* in: InnoDB trx handle */
	char*	log_file_name,	/* in: latest binlog file name */
	my_off_t end_offset)	/* in: the offset in the binlog file
				   up to which we wrote */
1932
{
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1933 1934 1935
	trx_t*	trx;

	trx = (trx_t*)trx_handle;
1936

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1937 1938
	ut_a(trx != NULL);

1939
	trx->mysql_log_file_name = log_file_name;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
1940
	trx->mysql_log_offset = (ib_longlong)end_offset;
1941

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1942 1943
	trx->flush_log_later = TRUE;

1944
	innobase_commit(hton, thd, TRUE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1945 1946 1947 1948 1949 1950

	trx->flush_log_later = FALSE;

	return(0);
}

1951 1952
/***********************************************************************
This function stores the binlog offset and flushes logs. */
1953
static
1954
void
1955 1956
innobase_store_binlog_offset_and_flush_log(
/*=======================================*/
1957 1958
	char*		binlog_name,	/* in: binlog name */
	longlong	offset)		/* in: binlog offset */
1959 1960
{
	mtr_t mtr;
1961

1962 1963 1964
	assert(binlog_name != NULL);

	/* Start a mini-transaction */
1965
	mtr_start_noninline(&mtr);
1966 1967

	/* Update the latest MySQL binlog name and offset info
1968
	in trx sys header */
1969

1970 1971 1972 1973
	trx_sys_update_mysql_binlog_offset(
		binlog_name,
		offset,
		TRX_SYS_MYSQL_LOG_INFO, &mtr);
1974

1975 1976
	/* Commits the mini-transaction */
	mtr_commit(&mtr);
1977

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1978
	/* Synchronous flush of the log buffer to disk */
1979 1980 1981
	log_buffer_flush_to_disk();
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1982 1983 1984
/*********************************************************************
This is called after MySQL has written the binlog entry for the current
transaction. Flushes the InnoDB log files to disk if required. */
1985
static
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1986 1987 1988
int
innobase_commit_complete(
/*=====================*/
1989
				/* out: 0 */
1990
        handlerton *hton, /* in: Innodb handlerton */ 
1991
	THD*	thd)		/* in: user thread */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1992 1993 1994
{
	trx_t*	trx;

1995
	trx = thd_to_trx(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1996

1997
	if (trx && trx->active_trans) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
1998

1999
		trx->active_trans = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2000

2001
		if (UNIV_UNLIKELY(srv_flush_log_at_trx_commit == 0)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2002

2003 2004
			return(0);
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2005

2006 2007
		trx_commit_complete_for_mysql(trx);
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2008 2009

	return(0);
2010
}
2011
#endif
2012

2013
/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2014
Rolls back a transaction or the latest SQL statement. */
2015

2016
static int
2017 2018 2019
innobase_rollback(
/*==============*/
			/* out: 0 or error number */
2020
        handlerton *hton, /* in: Innodb handlerton */ 
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
2021
	THD*	thd,	/* in: handle to the MySQL thread of the user
2022
			whose transaction should be rolled back */
2023 2024
	bool	all)	/* in:	TRUE - commit transaction
				FALSE - the current SQL statement ended */
2025 2026
{
	int	error = 0;
2027
	trx_t*	trx;
2028

2029 2030 2031
	DBUG_ENTER("innobase_rollback");
	DBUG_PRINT("trans", ("aborting transaction"));

2032
	trx = check_trx_exists(hton, thd);
2033

2034
	/* Update the info whether we should skip XA steps that eat CPU time */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
2035
	trx->support_xa = THDVAR(thd, support_xa);
2036

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2037 2038 2039 2040 2041 2042
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

2043
	if (trx->auto_inc_lock) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2044 2045 2046
		/* If we had reserved the auto-inc lock for some table (if
		we come here to roll back the latest SQL statement) we
		release it now before a possibly lengthy rollback */
2047

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2048 2049 2050
		row_unlock_table_autoinc_for_mysql(trx);
	}

2051
	if (all
antony@ppcg5.local's avatar
antony@ppcg5.local committed
2052
		|| !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2053

2054
		error = trx_rollback_for_mysql(trx);
2055
		trx->active_trans = 0;
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
2056
	} else {
2057
		error = trx_rollback_last_sql_stat_for_mysql(trx);
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
2058
	}
2059

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2060 2061 2062
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

2063 2064
/*********************************************************************
Rolls back a transaction */
2065
static
2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082
int
innobase_rollback_trx(
/*==================*/
			/* out: 0 or error number */
	trx_t*	trx)	/*  in: transaction */
{
	int	error = 0;

	DBUG_ENTER("innobase_rollback_trx");
	DBUG_PRINT("trans", ("aborting transaction"));

	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

2083
	if (trx->auto_inc_lock) {
2084 2085 2086
		/* If we had reserved the auto-inc lock for some table (if
		we come here to roll back the latest SQL statement) we
		release it now before a possibly lengthy rollback */
2087

2088 2089 2090 2091 2092 2093 2094 2095
		row_unlock_table_autoinc_for_mysql(trx);
	}

	error = trx_rollback_for_mysql(trx);

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2096 2097 2098
/*********************************************************************
Rolls back a transaction to a savepoint. */

2099
static int
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2100 2101 2102 2103
innobase_rollback_to_savepoint(
/*===========================*/
				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
				no savepoint with the given name */
2104
        handlerton *hton,       /* in: Innodb handlerton */ 
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2105 2106
	THD*	thd,		/* in: handle to the MySQL thread of the user
				whose transaction should be rolled back */
2107
	void*	savepoint)	/* in: savepoint data */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2108
{
2109 2110 2111 2112
	ib_longlong	mysql_binlog_cache_pos;
	int		error = 0;
	trx_t*		trx;
	char		name[64];
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2113 2114 2115

	DBUG_ENTER("innobase_rollback_to_savepoint");

2116
	trx = check_trx_exists(hton, thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2117

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2118 2119 2120
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2121 2122

	innobase_release_stat_resources(trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2123

2124
	/* TODO: use provided savepoint data area to store savepoint data */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2125

2126
	longlong2str((ulint)savepoint, name, 36);
2127

2128
	error = (int) trx_rollback_to_savepoint_for_mysql(trx, name,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2129
						&mysql_binlog_cache_pos);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2130
	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
2131 2132
}

2133 2134
/*********************************************************************
Release transaction savepoint name. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2135 2136
static
int
serg@serg.mylan's avatar
serg@serg.mylan committed
2137
innobase_release_savepoint(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2138
/*=======================*/
2139 2140
				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
				no savepoint with the given name */
2141
        handlerton*	hton,	/* in: handlerton for Innodb */
2142 2143
	THD*	thd,		/* in: handle to the MySQL thread of the user
				whose transaction should be rolled back */
2144
	void*	savepoint)	/* in: savepoint data */
2145
{
2146 2147 2148
	int		error = 0;
	trx_t*		trx;
	char		name[64];
2149

serg@serg.mylan's avatar
serg@serg.mylan committed
2150
	DBUG_ENTER("innobase_release_savepoint");
2151

2152
	trx = check_trx_exists(hton, thd);
2153

2154
	/* TODO: use provided savepoint data area to store savepoint data */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2155

2156
	longlong2str((ulint)savepoint, name, 36);
2157

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2158
	error = (int) trx_release_savepoint_for_mysql(trx, name);
2159 2160 2161 2162

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

2163
/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2164
Sets a transaction savepoint. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2165 2166
static
int
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2167 2168 2169
innobase_savepoint(
/*===============*/
				/* out: always 0, that is, always succeeds */
2170
	handlerton*	hton,   /* in: handle to the Innodb handlerton */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2171
	THD*	thd,		/* in: handle to the MySQL thread */
2172
	void*	savepoint)	/* in: savepoint data */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2173 2174 2175 2176 2177 2178
{
	int	error = 0;
	trx_t*	trx;

	DBUG_ENTER("innobase_savepoint");

2179 2180 2181 2182 2183
	/*
	  In the autocommit mode there is no sense to set a savepoint
	  (unless we are in sub-statement), so SQL layer ensures that
	  this method is never called in such situation.
	*/
antony@ppcg5.local's avatar
antony@ppcg5.local committed
2184
	DBUG_ASSERT(thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN) ||
2185
		thd->in_sub_stmt);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2186

2187
	trx = check_trx_exists(hton, thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2188

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2189 2190 2191 2192 2193 2194
	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

2195 2196
	/* cannot happen outside of transaction */
	DBUG_ASSERT(trx->active_trans);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2197

2198 2199 2200
	/* TODO: use provided savepoint data area to store savepoint data */
	char name[64];
	longlong2str((ulint)savepoint,name,36);
2201

2202
	error = (int) trx_savepoint_for_mysql(trx, name, (ib_longlong)0);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2203 2204 2205 2206

	DBUG_RETURN(convert_error_code_to_mysql(error, NULL));
}

2207
/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2208
Frees a possible InnoDB trx object associated with the current THD. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2209 2210
static
int
2211 2212
innobase_close_connection(
/*======================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2213
			/* out: 0 or error number */
2214
        handlerton*	hton,	/* in:  innobase handlerton */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2215
	THD*	thd)	/* in: handle to the MySQL thread of the user
2216
			whose resources should be free'd */
2217
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2218 2219
	trx_t*	trx;

2220
	trx = thd_to_trx(thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2221 2222 2223

	ut_a(trx);

2224 2225 2226 2227 2228
	if (trx->active_trans == 0
		&& trx->conc_state != TRX_NOT_STARTED) {

		sql_print_error("trx->active_trans == 0, but"
			" trx->conc_state != TRX_NOT_STARTED");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2229 2230
	}

2231

2232
	if (trx->conc_state != TRX_NOT_STARTED &&
2233 2234 2235 2236 2237 2238 2239
		global_system_variables.log_warnings) {
		sql_print_warning(
			"MySQL is closing a connection that has an active "
			"InnoDB transaction.  %lu row modifications will "
			"roll back.",
			(ulong) trx->undo_no.low);
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2240 2241 2242

	innobase_rollback_trx(trx);

2243
	thr_local_free(trx->mysql_thread_id);
2244
	trx_free_for_mysql(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2245

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2246
	return(0);
2247
}
2248 2249 2250


/*****************************************************************************
2251
** InnoDB database tables
2252 2253
*****************************************************************************/

2254 2255 2256 2257 2258 2259 2260 2261
/********************************************************************
Get the record format from the data dictionary. */
enum row_type
ha_innobase::get_row_type() const
/*=============================*/
			/* out: ROW_TYPE_REDUNDANT or ROW_TYPE_COMPACT */
{
	if (prebuilt && prebuilt->table) {
2262
		if (dict_table_is_comp_noninline(prebuilt->table)) {
2263 2264 2265 2266 2267 2268 2269 2270 2271
			return(ROW_TYPE_COMPACT);
		} else {
			return(ROW_TYPE_REDUNDANT);
		}
	}
	ut_ad(0);
	return(ROW_TYPE_NOT_USED);
}

2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287


/********************************************************************
Get the table flags to use for the statement. */
handler::Table_flags
ha_innobase::table_flags() const
{
	THD *const thd= current_thd;
        /* We are using thd->variables.tx_isolation here instead of
           trx->isolation_level since store_lock() has not been called
           yet.

           The trx->isolation_level is set inside store_lock() (which
           is called from mysql_lock_tables()) until after this
           function has been called (which is called in lock_tables()
           before that function calls mysql_lock_tables()). */
2288
        ulong const tx_isolation= thd_tx_isolation(thd);
2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310
        if (tx_isolation <= ISO_READ_COMMITTED)
        {
	        ulong const binlog_format= thd->variables.binlog_format;
                /* Statement based binlogging does not work in these
                   isolation levels since the necessary locks cannot
                   be taken */
        	if (binlog_format == BINLOG_FORMAT_STMT)
          	{
			char buf[256];
	                my_snprintf(buf, sizeof(buf),
                                    "Transaction level '%s' in InnoDB is"
                                    " not safe for binlog mode '%s'",
                                    tx_isolation_names[tx_isolation],
                                    binlog_format_names[binlog_format]);
                        my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(0), buf);
                }
                return int_table_flags;
        }

        return int_table_flags | HA_BINLOG_STMT_CAPABLE;
}

2311
/********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2312
Gives the file extension of an InnoDB single-table tablespace. */
2313 2314 2315 2316
static const char* ha_innobase_exts[] = {
  ".ibd",
  NullS
};
2317 2318 2319 2320

const char**
ha_innobase::bas_ext() const
/*========================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2321
				/* out: file extension string */
2322
{
2323
  return ha_innobase_exts;
2324 2325
}

2326

2327 2328 2329
/*********************************************************************
Normalizes a table name string. A normalized name consists of the
database name catenated to '/' and table name. An example:
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2330 2331
test/mytable. On Windows normalization puts both the database name and the
table name always to lower case. */
2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345
static
void
normalize_table_name(
/*=================*/
	char*		norm_name,	/* out: normalized name as a
					null-terminated string */
	const char*	name)		/* in: table name string */
{
	char*	name_ptr;
	char*	db_ptr;
	char*	ptr;

	/* Scan name from the end */

2346
	ptr = strend(name)-1;
2347 2348 2349 2350 2351 2352 2353

	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	name_ptr = ptr + 1;

monty@bitch.mysql.fi's avatar
monty@bitch.mysql.fi committed
2354
	DBUG_ASSERT(ptr > name);
2355 2356

	ptr--;
2357

2358 2359 2360 2361 2362 2363 2364 2365 2366
	while (ptr >= name && *ptr != '\\' && *ptr != '/') {
		ptr--;
	}

	db_ptr = ptr + 1;

	memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name));

	norm_name[name_ptr - db_ptr - 1] = '/';
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2367 2368

#ifdef __WIN__
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
2369
	innobase_casedn_str(norm_name);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
2370
#endif
2371
}
2372

2373
/*********************************************************************
2374
Creates and opens a handle to a table which already exists in an InnoDB
2375 2376 2377 2378 2379 2380 2381
database. */

int
ha_innobase::open(
/*==============*/
					/* out: 1 if error, 0 if success */
	const char*	name,		/* in: table name */
2382 2383
	int		mode,		/* in: not used */
	uint		test_if_locked)	/* in: not used */
2384
{
2385
	dict_table_t*	ib_table;
2386
	char		norm_name[1000];
2387
	THD*		thd;
2388 2389 2390 2391 2392 2393

	DBUG_ENTER("ha_innobase::open");

	UT_NOT_USED(mode);
	UT_NOT_USED(test_if_locked);

2394
	thd = ha_thd();
2395 2396
	normalize_table_name(norm_name, name);

2397 2398
	user_thd = NULL;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2399 2400 2401 2402
	if (!(share=get_share(name))) {

		DBUG_RETURN(1);
	}
2403

2404 2405 2406 2407
	/* Create buffers for packing the fields of a record. Why
	table->reclength did not work here? Obviously, because char
	fields when packed actually became 1 byte longer, when we also
	stored the string length as the first byte. */
2408

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2409 2410
	upd_and_key_val_buff_len =
				table->s->reclength + table->s->max_key_length
2411
							+ MAX_REF_PARTS * 3;
2412
	if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME),
2413 2414 2415 2416
			&upd_buff, upd_and_key_val_buff_len,
			&key_val_buff, upd_and_key_val_buff_len,
			NullS)) {
		free_share(share);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2417

2418 2419
		DBUG_RETURN(1);
	}
2420

2421
	/* Get pointer to a table object in InnoDB dictionary cache */
2422

2423
	ib_table = dict_table_get(norm_name, TRUE);
2424 2425 2426

	if (NULL == ib_table) {
		ut_print_timestamp(stderr);
2427 2428 2429 2430 2431 2432 2433
		sql_print_error("Cannot find table %s from the internal data "
				"dictionary\nof InnoDB though the .frm file "
				"for the table exists. Maybe you\nhave "
				"deleted and recreated InnoDB data files but "
				"have forgotten\nto delete the corresponding "
				".frm files of InnoDB tables, or you\n"
				"have moved .frm files to another database?\n"
2434
				"See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
2435 2436
				"how you can resolve the problem.\n",
				norm_name);
2437
		free_share(share);
2438
		my_free(upd_buff, MYF(0));
2439
		my_errno = ENOENT;
2440

2441 2442
		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2443

2444
	if (ib_table->ibd_file_missing && !thd_tablespace_op(thd)) {
2445
		ut_print_timestamp(stderr);
2446 2447 2448 2449 2450
		sql_print_error("MySQL is trying to open a table handle but "
				"the .ibd file for\ntable %s does not exist.\n"
				"Have you deleted the .ibd file from the "
				"database directory under\nthe MySQL datadir, "
				"or have you used DISCARD TABLESPACE?\n"
2451
				"See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
2452 2453
				"how you can resolve the problem.\n",
				norm_name);
2454
		free_share(share);
2455
		my_free(upd_buff, MYF(0));
2456
		my_errno = ENOENT;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2457

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2458
		dict_table_decrement_handle_count(ib_table);
2459 2460
		DBUG_RETURN(HA_ERR_NO_SUCH_TABLE);
	}
2461

2462
	prebuilt = row_create_prebuilt(ib_table);
2463

2464
	prebuilt->mysql_row_len = table->s->reclength;
2465

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2466 2467
	/* Looks like MySQL-3.23 sometimes has primary key number != 0 */

2468
	primary_key = table->s->primary_key;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2469
	key_used_on_scan = primary_key;
2470

2471 2472
	/* Allocate a buffer for a 'row reference'. A row reference is
	a string of bytes of length ref_length which uniquely specifies
2473 2474 2475
	a row in our table. Note that MySQL may also compare two row
	references for equality by doing a simple memcmp on the strings
	of length ref_length! */
2476

2477 2478
	if (!row_table_got_default_clust_index(ib_table)) {
		if (primary_key >= MAX_KEY) {
2479 2480
		  sql_print_error("Table %s has a primary key in InnoDB data "
				  "dictionary, but not in MySQL!", name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2481
		}
2482

2483 2484
		prebuilt->clust_index_was_generated = FALSE;

2485
		/* MySQL allocates the buffer for ref. key_info->key_length
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2486 2487 2488 2489
		includes space for all key columns + one byte for each column
		that may be NULL. ref_length must be as exact as possible to
		save space, because all row reference buffers are allocated
		based on ref_length. */
2490 2491

		ref_length = table->key_info[primary_key].key_length;
2492
	} else {
2493
		if (primary_key != MAX_KEY) {
2494 2495 2496 2497 2498 2499 2500 2501 2502
		  sql_print_error("Table %s has no primary key in InnoDB data "
				  "dictionary, but has one in MySQL! If you "
				  "created the table with a MySQL version < "
				  "3.23.54 and did not define a primary key, "
				  "but defined a unique key with all non-NULL "
				  "columns, then MySQL internally treats that "
				  "key as the primary key. You can fix this "
				  "error by dump + DROP + CREATE + reimport "
				  "of the table.", name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2503 2504
		}

2505
		prebuilt->clust_index_was_generated = TRUE;
2506

2507
		ref_length = DATA_ROW_ID_LEN;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
2508

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2509 2510 2511 2512 2513 2514 2515
		/* If we automatically created the clustered index, then
		MySQL does not know about it, and MySQL must NOT be aware
		of the index used on scan, to make it avoid checking if we
		update the column of the index. That is why we assert below
		that key_used_on_scan is the undefined value MAX_KEY.
		The column is the row id in the automatical generation case,
		and it will never be updated anyway. */
2516

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2517
		if (key_used_on_scan != MAX_KEY) {
2518 2519 2520 2521
			sql_print_warning(
				"Table %s key_used_on_scan is %lu even "
				"though there is no primary key inside "
				"InnoDB.", name, (ulong) key_used_on_scan);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2522
		}
2523
	}
2524

2525
	stats.block_size = 16 * 1024;	/* Index block size in InnoDB: used by MySQL
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2526 2527
				in query optimization */

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
2528
	/* Init table lock structure */
2529
	thr_lock_data_init(&share->lock,&lock,(void*) 0);
2530

2531
	info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST);
2532

2533
	DBUG_RETURN(0);
2534 2535
}

2536 2537 2538 2539 2540 2541
uint
ha_innobase::max_supported_key_part_length() const
{
	return(DICT_MAX_INDEX_COL_LEN - 1);
}

2542
/**********************************************************************
2543
Closes a handle to an InnoDB table. */
2544 2545 2546 2547

int
ha_innobase::close(void)
/*====================*/
2548
				/* out: 0 */
2549
{
2550
	DBUG_ENTER("ha_innobase::close");
2551

2552
	row_prebuilt_free(prebuilt);
2553

2554
	my_free(upd_buff, MYF(0));
2555
	free_share(share);
2556

2557
	/* Tell InnoDB server that there might be work for
2558 2559 2560 2561
	utility threads: */

	srv_active_wake_master_thread();

2562
	DBUG_RETURN(0);
2563 2564
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576
/* The following accessor functions should really be inside MySQL code! */

/******************************************************************
Gets field offset for a field in a table. */
inline
uint
get_field_offset(
/*=============*/
			/* out: offset */
	TABLE*	table,	/* in: MySQL table object */
	Field*	field)	/* in: MySQL field object */
{
2577
	return((uint) (field->ptr - table->record[0]));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2578 2579 2580 2581 2582
}

/******************************************************************
Checks if a field in a record is SQL NULL. Uses the record format
information in table to track the null bit in record. */
2583
static inline
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628
uint
field_in_record_is_null(
/*====================*/
			/* out: 1 if NULL, 0 otherwise */
	TABLE*	table,	/* in: MySQL table object */
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	if (!field->null_ptr) {

		return(0);
	}

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	if (record[null_offset] & field->null_bit) {

		return(1);
	}

	return(0);
}

/******************************************************************
Sets a field in a record to SQL NULL. Uses the record format
information in table to track the null bit in record. */
inline
void
set_field_in_record_to_null(
/*========================*/
	TABLE*	table,	/* in: MySQL table object */
	Field*	field,	/* in: MySQL field object */
	char*	record)	/* in: a row in MySQL format */
{
	int	null_offset;

	null_offset = (uint) ((char*) field->null_ptr
					- (char*) table->record[0]);

	record[null_offset] = record[null_offset] | field->null_bit;
}

2629 2630
extern "C" {
/*****************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2631 2632 2633 2634
InnoDB uses this function to compare two data fields for which the data type
is such that we must use MySQL code to compare them. NOTE that the prototype
of this function is in rem0cmp.c in InnoDB source code! If you change this
function, remember to update the prototype there! */
2635 2636 2637

int
innobase_mysql_cmp(
2638
/*===============*/
2639 2640
					/* out: 1, 0, -1, if a is greater,
					equal, less than b, respectively */
2641
	int		mysql_type,	/* in: MySQL type */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2642
	uint		charset_number,	/* in: number of the charset */
2643 2644 2645 2646 2647 2648 2649
	unsigned char*	a,		/* in: data field */
	unsigned int	a_length,	/* in: data field length,
					not UNIV_SQL_NULL */
	unsigned char*	b,		/* in: data field */
	unsigned int	b_length)	/* in: data field length,
					not UNIV_SQL_NULL */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2650
	CHARSET_INFO*		charset;
2651
	enum_field_types	mysql_tp;
2652
	int			ret;
2653

monty@bitch.mysql.fi's avatar
monty@bitch.mysql.fi committed
2654 2655
	DBUG_ASSERT(a_length != UNIV_SQL_NULL);
	DBUG_ASSERT(b_length != UNIV_SQL_NULL);
2656 2657 2658 2659 2660

	mysql_tp = (enum_field_types) mysql_type;

	switch (mysql_tp) {

2661
	case MYSQL_TYPE_BIT:
2662
	case MYSQL_TYPE_STRING:
2663
	case MYSQL_TYPE_VAR_STRING:
2664 2665 2666 2667
	case MYSQL_TYPE_TINY_BLOB:
	case MYSQL_TYPE_MEDIUM_BLOB:
	case MYSQL_TYPE_BLOB:
	case MYSQL_TYPE_LONG_BLOB:
2668
	case MYSQL_TYPE_VARCHAR:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681
		/* Use the charset number to pick the right charset struct for
		the comparison. Since the MySQL function get_charset may be
		slow before Bar removes the mutex operation there, we first
		look at 2 common charsets directly. */

		if (charset_number == default_charset_info->number) {
			charset = default_charset_info;
		} else if (charset_number == my_charset_latin1.number) {
			charset = &my_charset_latin1;
		} else {
			charset = get_charset(charset_number, MYF(MY_WME));

			if (charset == NULL) {
2682 2683 2684 2685
			  sql_print_error("InnoDB needs charset %lu for doing "
					  "a comparison, but MySQL cannot "
					  "find that charset.",
					  (ulong) charset_number);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2686 2687 2688 2689
				ut_a(0);
			}
		}

2690 2691 2692 2693
		/* Starting from 4.1.3, we use strnncollsp() in comparisons of
		non-latin1_swedish_ci strings. NOTE that the collation order
		changes then: 'b\0\0...' is ordered BEFORE 'b  ...'. Users
		having indexes on such data need to rebuild their tables! */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2694

2695 2696 2697
		ret = charset->coll->strnncollsp(charset,
				  a, a_length,
						 b, b_length, 0);
2698
		if (ret < 0) {
2699
			return(-1);
2700
		} else if (ret > 0) {
2701
			return(1);
2702
		} else {
2703 2704
			return(0);
		}
2705 2706 2707 2708 2709 2710 2711 2712 2713
	default:
		assert(0);
	}

	return(0);
}
}

/******************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2714 2715 2716
Converts a MySQL type to an InnoDB type. Note that this function returns
the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1
VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. */
2717 2718
inline
ulint
2719 2720
get_innobase_type_from_mysql_type(
/*==============================*/
2721 2722 2723 2724 2725
				/* out: DATA_BINARY, DATA_VARCHAR, ... */
	ulint*	unsigned_flag,	/* out: DATA_UNSIGNED if an 'unsigned type';
				at least ENUM and SET, and unsigned integer
				types are 'unsigned types' */
	Field*	field)		/* in: MySQL field */
2726
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2727 2728 2729
	/* The following asserts try to check that the MySQL type code fits in
	8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to
	the type */
2730

2731 2732 2733 2734 2735
	DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256);
	DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256);
	DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256);
	DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256);
	DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256);
2736 2737 2738 2739 2740 2741 2742 2743

	if (field->flags & UNSIGNED_FLAG) {

		*unsigned_flag = DATA_UNSIGNED;
	} else {
		*unsigned_flag = 0;
	}

2744 2745
	if (field->real_type() == MYSQL_TYPE_ENUM
		|| field->real_type() == MYSQL_TYPE_SET) {
2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757

		/* MySQL has field->type() a string type for these, but the
		data is actually internally stored as an unsigned integer
		code! */

		*unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned
						flag set to zero, even though
						internally this is an unsigned
						integer type */
		return(DATA_INT);
	}

2758
	switch (field->type()) {
2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782
		/* NOTE that we only allow string types in DATA_MYSQL and
		DATA_VARMYSQL */
	case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */
	case MYSQL_TYPE_VARCHAR:    /* new >= 5.0.3 true VARCHAR */
		if (field->binary()) {
			return(DATA_BINARY);
		} else if (strcmp(
				   field->charset()->name,
				   "latin1_swedish_ci") == 0) {
			return(DATA_VARCHAR);
		} else {
			return(DATA_VARMYSQL);
		}
	case MYSQL_TYPE_BIT:
	case MYSQL_TYPE_STRING: if (field->binary()) {

			return(DATA_FIXBINARY);
		} else if (strcmp(
				   field->charset()->name,
				   "latin1_swedish_ci") == 0) {
			return(DATA_CHAR);
		} else {
			return(DATA_MYSQL);
		}
2783
	case MYSQL_TYPE_NEWDECIMAL:
2784
		return(DATA_FIXBINARY);
2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795
	case MYSQL_TYPE_LONG:
	case MYSQL_TYPE_LONGLONG:
	case MYSQL_TYPE_TINY:
	case MYSQL_TYPE_SHORT:
	case MYSQL_TYPE_INT24:
	case MYSQL_TYPE_DATE:
	case MYSQL_TYPE_DATETIME:
	case MYSQL_TYPE_YEAR:
	case MYSQL_TYPE_NEWDATE:
	case MYSQL_TYPE_TIME:
	case MYSQL_TYPE_TIMESTAMP:
2796
		return(DATA_INT);
2797
	case MYSQL_TYPE_FLOAT:
2798
		return(DATA_FLOAT);
2799
	case MYSQL_TYPE_DOUBLE:
2800
		return(DATA_DOUBLE);
2801
	case MYSQL_TYPE_DECIMAL:
2802
		return(DATA_DECIMAL);
2803 2804 2805 2806 2807
	case MYSQL_TYPE_GEOMETRY:
	case MYSQL_TYPE_TINY_BLOB:
	case MYSQL_TYPE_MEDIUM_BLOB:
	case MYSQL_TYPE_BLOB:
	case MYSQL_TYPE_LONG_BLOB:
2808 2809 2810
		return(DATA_BLOB);
	default:
		assert(0);
2811 2812 2813 2814
	}

	return(0);
}
2815

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841
/***********************************************************************
Writes an unsigned integer value < 64k to 2 bytes, in the little-endian
storage format. */
inline
void
innobase_write_to_2_little_endian(
/*==============================*/
	byte*	buf,	/* in: where to store */
	ulint	val)	/* in: value to write, must be < 64k */
{
	ut_a(val < 256 * 256);

	buf[0] = (byte)(val & 0xFF);
	buf[1] = (byte)(val / 256);
}

/***********************************************************************
Reads an unsigned integer value < 64k from 2 bytes, in the little-endian
storage format. */
inline
uint
innobase_read_from_2_little_endian(
/*===============================*/
			/* out: value */
	const mysql_byte*	buf)	/* in: from where to read */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2842
	return (uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1])));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2843 2844
}

2845
/***********************************************************************
2846
Stores a key value for a row to a buffer. */
2847 2848 2849 2850 2851

uint
ha_innobase::store_key_val_for_row(
/*===============================*/
				/* out: key value length as stored in buff */
2852
	uint		keynr,	/* in: key number */
2853
	char*		buff,	/* in/out: buffer for the key value (in MySQL
2854 2855
				format) */
	uint		buff_len,/* in: buffer length */
2856
	const mysql_byte* record)/* in: row in MySQL format */
2857
{
2858 2859 2860
	KEY*		key_info	= table->key_info + keynr;
	KEY_PART_INFO*	key_part	= key_info->key_part;
	KEY_PART_INFO*	end		= key_part + key_info->key_parts;
2861
	char*		buff_start	= buff;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2862 2863 2864
	enum_field_types mysql_type;
	Field*		field;
	ibool		is_null;
2865

2866
	DBUG_ENTER("store_key_val_for_row");
2867

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881
	/* The format for storing a key field in MySQL is the following:

	1. If the column can be NULL, then in the first byte we put 1 if the
	field value is NULL, 0 otherwise.

	2. If the column is of a BLOB type (it must be a column prefix field
	in this case), then we put the length of the data in the field to the
	next 2 bytes, in the little-endian format. If the field is SQL NULL,
	then these 2 bytes are set to 0. Note that the length of data in the
	field is <= column prefix length.

	3. In a column prefix field, prefix_len next bytes are reserved for
	data. In a normal field the max field length next bytes are reserved
	for data. For a VARCHAR(n) the max field length is n. If the stored
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2882
	value is the SQL NULL then these data bytes are set to 0.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2883

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2884 2885 2886 2887 2888 2889
	4. We always use a 2 byte length for a true >= 5.0.3 VARCHAR. Note that
	in the MySQL row format, the length is stored in 1 or 2 bytes,
	depending on the maximum allowed length. But in the MySQL key value
	format, the length always takes 2 bytes.

	We have to zero-fill the buffer so that MySQL is able to use a
2890 2891
	simple memcmp to compare two key values to determine if they are
	equal. MySQL does this to compare contents of two 'ref' values. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2892

2893
	bzero(buff, buff_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2894

2895 2896
	for (; key_part != end; key_part++) {
		is_null = FALSE;
2897

2898 2899
		if (key_part->null_bit) {
			if (record[key_part->null_offset]
2900
						& key_part->null_bit) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2901 2902
				*buff = 1;
				is_null = TRUE;
2903
			} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2904 2905 2906
				*buff = 0;
			}
			buff++;
2907
		}
2908

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2909 2910 2911
		field = key_part->field;
		mysql_type = field->type();

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2912 2913 2914 2915 2916
		if (mysql_type == MYSQL_TYPE_VARCHAR) {
						/* >= 5.0.3 true VARCHAR */
			ulint	lenlen;
			ulint	len;
			byte*	data;
2917
			ulint	key_len;
2918
			ulint	true_len;
2919 2920
			CHARSET_INFO*	cs;
			int	error=0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2921

2922 2923
			key_len = key_part->length;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2924
			if (is_null) {
2925 2926
				buff += key_len + 2;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2927 2928
				continue;
			}
2929
			cs = field->charset();
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2930 2931 2932 2933

			lenlen = (ulint)
				(((Field_varstring*)field)->length_bytes);

2934
			data = row_mysql_read_true_varchar(&len,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2935 2936 2937
				(byte*) (record
				+ (ulint)get_field_offset(table, field)),
				lenlen);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2938

2939 2940 2941 2942 2943 2944 2945 2946 2947
			true_len = len;

			/* For multi byte character sets we need to calculate
			the true length of the key */

			if (len > 0 && cs->mbmaxlen > 1) {
				true_len = (ulint) cs->cset->well_formed_len(cs,
						(const char *) data,
						(const char *) data + len,
2948 2949
                                                (uint) (key_len /
                                                        cs->mbmaxlen),
2950 2951 2952
						&error);
			}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2953 2954
			/* In a column prefix index, we may need to truncate
			the stored value: */
2955

2956 2957
			if (true_len > key_len) {
				true_len = key_len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2958 2959
			}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2960 2961 2962
			/* The length in a key value is always stored in 2
			bytes */

2963
			row_mysql_store_true_var_len((byte*)buff, true_len, 2);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2964 2965
			buff += 2;

2966
			memcpy(buff, data, true_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2967 2968 2969 2970 2971 2972 2973

			/* Note that we always reserve the maximum possible
			length of the true VARCHAR in the key value, though
			only len first bytes after the 2 length bytes contain
			actual data. The rest of the space was reset to zero
			in the bzero() call above. */

2974
			buff += key_len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2975

2976 2977 2978 2979
		} else if (mysql_type == MYSQL_TYPE_TINY_BLOB
			|| mysql_type == MYSQL_TYPE_MEDIUM_BLOB
			|| mysql_type == MYSQL_TYPE_BLOB
			|| mysql_type == MYSQL_TYPE_LONG_BLOB) {
2980

2981 2982
			CHARSET_INFO*	cs;
			ulint		key_len;
2983
			ulint		true_len;
2984
			int		error=0;
2985 2986
			ulint		blob_len;
			byte*		blob_data;
2987

2988
			ut_a(key_part->key_part_flag & HA_PART_KEY_SEG);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2989

2990 2991 2992 2993 2994
			key_len = key_part->length;

			if (is_null) {
				buff += key_len + 2;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2995
				continue;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
2996
			}
2997 2998 2999 3000

			cs = field->charset();

			blob_data = row_mysql_read_blob_ref(&blob_len,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3001
				(byte*) (record
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3002
				+ (ulint)get_field_offset(table, field)),
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3003 3004
					(ulint) field->pack_length());

3005 3006
			true_len = blob_len;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3007
			ut_a(get_field_offset(table, field)
3008 3009 3010 3011 3012 3013 3014 3015 3016 3017
				== key_part->offset);

			/* For multi byte character sets we need to calculate
			the true length of the key */

			if (blob_len > 0 && cs->mbmaxlen > 1) {
				true_len = (ulint) cs->cset->well_formed_len(cs,
						(const char *) blob_data,
						(const char *) blob_data
							+ blob_len,
3018 3019
                                                (uint) (key_len /
                                                        cs->mbmaxlen),
3020 3021
						&error);
			}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3022 3023 3024

			/* All indexes on BLOB and TEXT are column prefix
			indexes, and we may need to truncate the data to be
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3025
			stored in the key value: */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3026

3027 3028
			if (true_len > key_len) {
				true_len = key_len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3029 3030 3031 3032 3033
			}

			/* MySQL reserves 2 bytes for the length and the
			storage of the number is little-endian */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3034
			innobase_write_to_2_little_endian(
3035
					(byte*)buff, true_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3036 3037
			buff += 2;

3038
			memcpy(buff, blob_data, true_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3039

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3040 3041 3042
			/* Note that we always reserve the maximum possible
			length of the BLOB prefix in the key value. */

3043
			buff += key_len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3044
		} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3045 3046 3047 3048 3049
			/* Here we handle all other data types except the
			true VARCHAR, BLOB and TEXT. Note that the column
			value we store may be also in a column prefix
			index. */

3050
			CHARSET_INFO*		cs;
3051 3052
			ulint			true_len;
			ulint			key_len;
3053 3054
			const mysql_byte*	src_start;
			int			error=0;
3055 3056 3057 3058 3059 3060
			enum_field_types	real_type;

			key_len = key_part->length;

			if (is_null) {
				 buff += key_len;
3061

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3062 3063
				 continue;
			}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3064

3065
			src_start = record + key_part->offset;
3066 3067
			real_type = field->real_type();
			true_len = key_len;
3068

3069 3070 3071 3072 3073
			/* Character set for the field is defined only
			to fields whose type is string and real field
			type is not enum or set. For these fields check
			if character set is multi byte. */

3074 3075
			if (real_type != MYSQL_TYPE_ENUM
				&& real_type != MYSQL_TYPE_SET
3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090
				&& ( mysql_type == MYSQL_TYPE_VAR_STRING
					|| mysql_type == MYSQL_TYPE_STRING)) {

				cs = field->charset();

				/* For multi byte character sets we need to
				calculate the true length of the key */

				if (key_len > 0 && cs->mbmaxlen > 1) {

					true_len = (ulint)
						cs->cset->well_formed_len(cs,
							(const char *)src_start,
							(const char *)src_start
								+ key_len,
3091 3092
                                                        (uint) (key_len /
                                                                cs->mbmaxlen),
3093 3094
							&error);
				}
3095 3096
			}

3097 3098
			memcpy(buff, src_start, true_len);
			buff += true_len;
3099

3100 3101 3102
			/* Pad the unused space with spaces. Note that no
			padding is ever needed for UCS-2 because in MySQL,
			all UCS2 characters are 2 bytes, as MySQL does not
3103 3104
			support surrogate pairs, which are needed to represent
			characters in the range U+10000 to U+10FFFF. */
3105

3106 3107 3108 3109
			if (true_len < key_len) {
				ulint pad_len = key_len - true_len;
				memset(buff, ' ', pad_len);
				buff += pad_len;
3110
			}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3111
		}
3112
	}
3113

3114
	ut_a(buff <= buff_start + buff_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3115 3116

	DBUG_RETURN((uint)(buff - buff_start));
3117 3118 3119
}

/******************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3120 3121
Builds a 'template' to the prebuilt struct. The template is used in fast
retrieval of just those column values MySQL needs in its processing. */
3122
static
3123
void
3124
build_template(
3125 3126 3127 3128 3129 3130
/*===========*/
	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
	THD*		thd,		/* in: current user thread, used
					only if templ_type is
					ROW_MYSQL_REC_FIELDS */
	TABLE*		table,		/* in: MySQL table */
3131
	uint		templ_type)	/* in: ROW_MYSQL_WHOLE_ROW or
3132
					ROW_MYSQL_REC_FIELDS */
3133
{
3134 3135
	dict_index_t*	index;
	dict_index_t*	clust_index;
3136
	mysql_row_templ_t* templ;
3137
	Field*		field;
3138 3139
	ulint		n_fields;
	ulint		n_requested_fields	= 0;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3140
	ibool		fetch_all_in_key	= FALSE;
3141
	ibool		fetch_primary_key_cols	= FALSE;
3142
	ulint		i;
3143 3144
	/* byte offset of the end of last requested column */
	ulint		mysql_prefix_len	= 0;
3145

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3146 3147 3148 3149
	if (prebuilt->select_lock_type == LOCK_X) {
		/* We always retrieve the whole clustered index record if we
		use exclusive row level locks, for example, if the read is
		done in an UPDATE statement. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3150

3151
		templ_type = ROW_MYSQL_WHOLE_ROW;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3152 3153
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3154
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
3155 3156
		if (prebuilt->hint_need_to_fetch_extra_cols
			== ROW_RETRIEVE_ALL_COLS) {
3157

3158 3159
			/* We know we must at least fetch all columns in the
			key, or all columns in the table */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3160

3161 3162 3163 3164 3165 3166 3167
			if (prebuilt->read_just_key) {
				/* MySQL has instructed us that it is enough
				to fetch the columns in the key; looks like
				MySQL can set this flag also when there is
				only a prefix of the column in the key: in
				that case we retrieve the whole column from
				the clustered index */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3168

3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182
				fetch_all_in_key = TRUE;
			} else {
				templ_type = ROW_MYSQL_WHOLE_ROW;
			}
		} else if (prebuilt->hint_need_to_fetch_extra_cols
			== ROW_RETRIEVE_PRIMARY_KEY) {
			/* We must at least fetch all primary key cols. Note
			   that if the clustered index was internally generated
			   by InnoDB on the row id (no primary key was
			   defined), then row_search_for_mysql() will always
			   retrieve the row id to a special buffer in the
			   prebuilt struct. */

			fetch_primary_key_cols = TRUE;
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3183
		}
3184 3185
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3186
	clust_index = dict_table_get_first_index_noninline(prebuilt->table);
3187

3188
	if (templ_type == ROW_MYSQL_REC_FIELDS) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3189
		index = prebuilt->index;
3190 3191
	} else {
		index = clust_index;
3192
	}
3193

3194 3195 3196 3197 3198 3199 3200
	if (index == clust_index) {
		prebuilt->need_to_access_clustered = TRUE;
	} else {
		prebuilt->need_to_access_clustered = FALSE;
		/* Below we check column by column if we need to access
		the clustered index */
	}
3201

3202
	n_fields = (ulint)table->s->fields; /* number of columns */
3203 3204 3205 3206 3207 3208

	if (!prebuilt->mysql_template) {
		prebuilt->mysql_template = (mysql_row_templ_t*)
						mem_alloc_noninline(
					n_fields * sizeof(mysql_row_templ_t));
	}
3209

3210
	prebuilt->template_type = templ_type;
3211
	prebuilt->null_bitmap_len = table->s->null_bytes;
3212

3213 3214
	prebuilt->templ_contains_blob = FALSE;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3215 3216
	/* Note that in InnoDB, i is the column number. MySQL calls columns
	'fields'. */
3217
	for (i = 0; i < n_fields; i++) {
3218
		templ = prebuilt->mysql_template + n_requested_fields;
3219 3220
		field = table->field[i];

3221 3222 3223 3224 3225
		if (UNIV_LIKELY(templ_type == ROW_MYSQL_REC_FIELDS)) {
			/* Decide which columns we should fetch
			and which we can skip. */
			register const ibool	index_contains_field =
				dict_index_contains_col_or_prefix(index, i);
3226

3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239
			if (!index_contains_field && prebuilt->read_just_key) {
				/* If this is a 'key read', we do not need
				columns that are not in the key */

				goto skip_field;
			}

			if (index_contains_field && fetch_all_in_key) {
				/* This field is needed in the query */

				goto include_field;
			}

3240 3241
                        if (bitmap_is_set(table->read_set, i) ||
                            bitmap_is_set(table->write_set, i)) {
3242 3243 3244 3245
				/* This field is needed in the query */

				goto include_field;
			}
3246

3247
			if (fetch_primary_key_cols
3248 3249
				&& dict_table_col_in_clustered_key(
					index->table, i)) {
3250 3251 3252 3253
				/* This field is needed in the query */

				goto include_field;
			}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3254 3255

			/* This field is not needed in the query, skip it */
3256 3257 3258

			goto skip_field;
		}
3259
include_field:
3260
		n_requested_fields++;
3261

3262
		templ->col_no = i;
3263

3264
		if (index == clust_index) {
3265 3266
			templ->rec_field_no = dict_col_get_clust_pos_noninline(
				&index->table->cols[i], index);
3267
		} else {
3268 3269
			templ->rec_field_no = dict_index_get_nth_col_pos(
								index, i);
3270 3271
		}

3272 3273 3274 3275 3276 3277 3278 3279
		if (templ->rec_field_no == ULINT_UNDEFINED) {
			prebuilt->need_to_access_clustered = TRUE;
		}

		if (field->null_ptr) {
			templ->mysql_null_byte_offset =
				(ulint) ((char*) field->null_ptr
					- (char*) table->record[0]);
3280

3281 3282 3283 3284
			templ->mysql_null_bit_mask = (ulint) field->null_bit;
		} else {
			templ->mysql_null_bit_mask = 0;
		}
3285

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3286 3287 3288
		templ->mysql_col_offset = (ulint)
					get_field_offset(table, field);

3289
		templ->mysql_col_len = (ulint) field->pack_length();
3290 3291 3292 3293 3294
		if (mysql_prefix_len < templ->mysql_col_offset
				+ templ->mysql_col_len) {
			mysql_prefix_len = templ->mysql_col_offset
				+ templ->mysql_col_len;
		}
3295
		templ->type = index->table->cols[i].mtype;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3296 3297 3298 3299
		templ->mysql_type = (ulint)field->type();

		if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) {
			templ->mysql_length_bytes = (ulint)
3300
				(((Field_varstring*)field)->length_bytes);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3301
		}
3302

3303
		templ->charset = dtype_get_charset_coll_noninline(
3304 3305 3306 3307
				index->table->cols[i].prtype);
		templ->mbminlen = index->table->cols[i].mbminlen;
		templ->mbmaxlen = index->table->cols[i].mbmaxlen;
		templ->is_unsigned = index->table->cols[i].prtype
3308
							& DATA_UNSIGNED;
3309 3310
		if (templ->type == DATA_BLOB) {
			prebuilt->templ_contains_blob = TRUE;
3311
		}
3312 3313 3314
skip_field:
		;
	}
3315

3316
	prebuilt->n_template = n_requested_fields;
3317
	prebuilt->mysql_prefix_len = mysql_prefix_len;
3318

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3319
	if (index != clust_index && prebuilt->need_to_access_clustered) {
3320 3321 3322 3323
		/* Change rec_field_no's to correspond to the clustered index
		record */
		for (i = 0; i < n_requested_fields; i++) {
			templ = prebuilt->mysql_template + i;
3324

3325 3326 3327
			templ->rec_field_no = dict_col_get_clust_pos_noninline(
				&index->table->cols[templ->col_no],
				clust_index);
3328
		}
3329
	}
3330 3331 3332
}

/************************************************************************
3333
Stores a row in an InnoDB database, to the table specified in this
3334 3335 3336 3337 3338
handle. */

int
ha_innobase::write_row(
/*===================*/
3339
				/* out: error code */
3340
	mysql_byte*	record)	/* in: a row in MySQL format */
3341
{
3342
	int		error;
3343
	longlong	auto_inc;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3344
	longlong	dummy;
3345
	ibool		auto_inc_used= FALSE;
3346
        THD *thd=       ha_thd();
3347
	trx_t*		trx = thd_to_trx(thd);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3348

3349
	DBUG_ENTER("ha_innobase::write_row");
3350

3351
	if (prebuilt->trx != trx) {
3352 3353
	  sql_print_error("The transaction object for the table handle is at "
			  "%p, but for the current thread it is at %p",
3354
			  prebuilt->trx, trx);
3355

3356 3357 3358
		fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr);
		ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200);
		fputs("\n"
3359
			"InnoDB: Dump of 200 bytes around ha_data: ",
3360
			stderr);
3361
		ut_print_buf(stderr, ((const byte*) trx) - 100, 200);
3362 3363
		putc('\n', stderr);
		ut_error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3364
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3365

3366
	ha_statistic_increment(&SSV::ha_write_count);
3367

3368 3369
	if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
		table->timestamp_field->set_time();
3370

3371 3372 3373 3374
	if ((thd_sql_command(thd) == SQLCOM_ALTER_TABLE
			|| thd_sql_command(thd) == SQLCOM_OPTIMIZE
			|| thd_sql_command(thd) == SQLCOM_CREATE_INDEX
			|| thd_sql_command(thd) == SQLCOM_DROP_INDEX)
3375
		&& num_write_row >= 10000) {
3376 3377 3378 3379 3380 3381 3382 3383
		/* ALTER TABLE is COMMITted at every 10000 copied rows.
		The IX table lock for the original table has to be re-issued.
		As this method will be called on a temporary table where the
		contents of the original table is being copied to, it is
		a bit tricky to determine the source table.  The cursor
		position in the source table need not be adjusted after the
		intermediate COMMIT, since writes by other transactions are
		being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */
3384

3385
		dict_table_t*	src_table;
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
3386
		ulint		mode;
3387

3388
		num_write_row = 0;
3389

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
3390 3391
		/* Commit the transaction.  This will release the table
		locks, so they have to be acquired again. */
3392 3393 3394 3395 3396 3397

		/* Altering an InnoDB table */
		/* Get the source table. */
		src_table = lock_get_src_table(
				prebuilt->trx, prebuilt->table, &mode);
		if (!src_table) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3398
no_commit:
3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411
			/* Unknown situation: do not commit */
			/*
			ut_print_timestamp(stderr);
			fprintf(stderr,
				"  InnoDB error: ALTER TABLE is holding lock"
				" on %lu tables!\n",
				prebuilt->trx->mysql_n_tables_locked);
			*/
			;
		} else if (src_table == prebuilt->table) {
			/* Source table is not in InnoDB format:
			no need to re-acquire locks on it. */

3412
			/* Altering to InnoDB format */
3413
			innobase_commit(ht, user_thd, 1);
3414
			/* Note that this transaction is still active. */
3415
			prebuilt->trx->active_trans = 1;
3416
			/* We will need an IX lock on the destination table. */
3417
			prebuilt->sql_stat_start = TRUE;
3418 3419 3420
		} else {
			/* Ensure that there are no other table locks than
			LOCK_IX and LOCK_AUTO_INC on the destination table. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3421

3422 3423
			if (!lock_is_table_exclusive(prebuilt->table,
							prebuilt->trx)) {
3424 3425 3426 3427 3428
				goto no_commit;
			}

			/* Commit the transaction.  This will release the table
			locks, so they have to be acquired again. */
3429
			innobase_commit(ht, user_thd, 1);
3430
			/* Note that this transaction is still active. */
3431
			prebuilt->trx->active_trans = 1;
3432
			/* Re-acquire the table lock on the source table. */
3433
			row_lock_table_for_mysql(prebuilt, src_table, mode);
3434
			/* We will need an IX lock on the destination table. */
3435
			prebuilt->sql_stat_start = TRUE;
3436
		}
3437 3438
	}

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
3439 3440
	num_write_row++;

3441
	if (table->next_number_field && record == table->record[0]) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3442 3443
		/* This is the case where the table has an
		auto-increment column */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3444 3445 3446 3447 3448 3449 3450

		/* Initialize the auto-inc counter if it has not been
		initialized yet */

		if (0 == dict_table_autoinc_peek(prebuilt->table)) {

			/* This call initializes the counter */
3451
			error = innobase_read_and_init_auto_inc(&dummy);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468

			if (error) {
				/* Deadlock or lock wait timeout */

				goto func_exit;
			}

			/* We have to set sql_stat_start to TRUE because
			the above call probably has called a select, and
			has reset that flag; row_insert_for_mysql has to
			know to set the IX intention lock on the table,
			something it only does at the start of each
			statement */

			prebuilt->sql_stat_start = TRUE;
		}

3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486
		/* We have to use the transactional lock mechanism on the
		auto-inc counter of the table to ensure that replication and
		roll-forward of the binlog exactly imitates also the given
		auto-inc values. The lock is released at each SQL statement's
		end. This lock also prevents a race where two threads would
		call ::get_auto_increment() simultaneously. */

		error = row_lock_table_autoinc_for_mysql(prebuilt);

		if (error != DB_SUCCESS) {
			/* Deadlock or lock wait timeout */

			error = convert_error_code_to_mysql(error, user_thd);

			goto func_exit;
		}

		/* We must use the handler code to update the auto-increment
3487
		value to be sure that we increment it correctly. */
3488

3489 3490
    		if ((error= update_auto_increment()))
			goto func_exit;
3491
		auto_inc_used = 1;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3492

3493
	}
3494

3495 3496 3497 3498
	if (prebuilt->mysql_template == NULL
			|| prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
		/* Build the template used in converting quickly between
		the two database formats */
3499

3500 3501
		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}
3502

3503
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3504

3505
	error = row_insert_for_mysql((byte*) record, prebuilt);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3506

3507
	if (error == DB_SUCCESS && auto_inc_used) {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3508

3509
		/* Fetch the value that was set in the autoincrement field */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
3510

3511
		auto_inc = table->next_number_field->val_int();
3512

3513
		if (auto_inc != 0) {
3514 3515
			/* This call will update the counter according to the
			value that was inserted in the table */
3516

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3517 3518
            		dict_table_autoinc_update(prebuilt->table, auto_inc);
          	}
3519
        }
3520

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3521 3522 3523 3524 3525
        /* A REPLACE command and LOAD DATA INFILE REPLACE handle a duplicate
        key error themselves, and we must update the autoinc counter if we are
        performing those statements. */

        if (error == DB_DUPLICATE_KEY && auto_inc_used
3526 3527 3528 3529 3530 3531 3532 3533
            && (thd_sql_command(user_thd) == SQLCOM_REPLACE
                || thd_sql_command(user_thd) == SQLCOM_REPLACE_SELECT
                || (thd_sql_command(user_thd) == SQLCOM_INSERT
                    && prebuilt->trx->allow_duplicates
		    && !prebuilt->trx->replace_duplicates)
                || (thd_sql_command(user_thd) == SQLCOM_LOAD
                    && prebuilt->trx->allow_duplicates
		    && prebuilt->trx->replace_duplicates))) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3534 3535 3536 3537 3538 3539 3540 3541

                auto_inc = table->next_number_field->val_int();

                if (auto_inc != 0) {
                        dict_table_autoinc_update(prebuilt->table, auto_inc);
                }
        }

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3542
	innodb_srv_conc_exit_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3543

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3544
	error = convert_error_code_to_mysql(error, user_thd);
3545

3546
	/* Tell InnoDB server that there might be work for
3547
	utility threads: */
3548
func_exit:
3549
	innobase_active_small();
3550

3551
	DBUG_RETURN(error);
3552 3553
}

3554 3555 3556 3557 3558 3559 3560 3561 3562
/**************************************************************************
Checks which fields have changed in a row and stores information
of them to an update vector. */
static
int
calc_row_difference(
/*================*/
					/* out: error number or 0 */
	upd_t*		uvect,		/* in/out: update vector */
3563 3564
	mysql_byte*	old_row,	/* in: old row in MySQL format */
	mysql_byte*	new_row,	/* in: new row in MySQL format */
3565 3566
	struct st_table* table,		/* in: table in MySQL data
					dictionary */
3567
	mysql_byte*	upd_buff,	/* in: buffer to use */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3568
	ulint		buff_len,	/* in: buffer length */
3569
	row_prebuilt_t*	prebuilt,	/* in: InnoDB prebuilt struct */
3570 3571
	THD*		thd)		/* in: user thread */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3572
	mysql_byte*	original_upd_buff = upd_buff;
3573
	Field*		field;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3574
	enum_field_types field_mysql_type;
3575 3576 3577
	uint		n_fields;
	ulint		o_len;
	ulint		n_len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3578
	ulint		col_pack_len;
3579
	byte*		new_mysql_row_col;
3580 3581 3582
	byte*		o_ptr;
	byte*		n_ptr;
	byte*		buf;
3583
	upd_field_t*	ufield;
3584
	ulint		col_type;
3585
	ulint		n_changed = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3586
	dfield_t	dfield;
3587
	dict_index_t*	clust_index;
3588
	uint		i;
3589

3590
	n_fields = table->s->fields;
3591
	clust_index = dict_table_get_first_index_noninline(prebuilt->table);
3592

3593
	/* We use upd_buff to convert changed fields */
3594
	buf = (byte*) upd_buff;
3595

3596 3597 3598
	for (i = 0; i < n_fields; i++) {
		field = table->field[i];

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3599 3600
		o_ptr = (byte*) old_row + get_field_offset(table, field);
		n_ptr = (byte*) new_row + get_field_offset(table, field);
3601

3602 3603 3604
		/* Use new_mysql_row_col and col_pack_len save the values */

		new_mysql_row_col = n_ptr;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3605
		col_pack_len = field->pack_length();
3606

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3607 3608
		o_len = col_pack_len;
		n_len = col_pack_len;
3609

3610
		/* We use o_ptr and n_ptr to dig up the actual data for
3611
		comparison. */
3612

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3613
		field_mysql_type = field->type();
3614

3615
		col_type = prebuilt->table->cols[i].mtype;
3616 3617 3618 3619 3620 3621

		switch (col_type) {

		case DATA_BLOB:
			o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len);
			n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3622

3623
			break;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3624

3625 3626 3627
		case DATA_VARCHAR:
		case DATA_BINARY:
		case DATA_VARMYSQL:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3628 3629 3630 3631
			if (field_mysql_type == MYSQL_TYPE_VARCHAR) {
				/* This is a >= 5.0.3 type true VARCHAR where
				the real payload data length is stored in
				1 or 2 bytes */
3632

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3633
				o_ptr = row_mysql_read_true_varchar(
3634 3635 3636 3637
					&o_len, o_ptr,
					(ulint)
					(((Field_varstring*)field)->length_bytes));

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3638
				n_ptr = row_mysql_read_true_varchar(
3639 3640 3641
					&n_len, n_ptr,
					(ulint)
					(((Field_varstring*)field)->length_bytes));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3642 3643 3644
			}

			break;
3645 3646 3647
		default:
			;
		}
3648

3649
		if (field->null_ptr) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3650 3651
			if (field_in_record_is_null(table, field,
							(char*) old_row)) {
3652 3653
				o_len = UNIV_SQL_NULL;
			}
3654

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3655 3656
			if (field_in_record_is_null(table, field,
							(char*) new_row)) {
3657 3658 3659 3660 3661 3662 3663 3664 3665
				n_len = UNIV_SQL_NULL;
			}
		}

		if (o_len != n_len || (o_len != UNIV_SQL_NULL &&
					0 != memcmp(o_ptr, n_ptr, o_len))) {
			/* The field has changed */

			ufield = uvect->fields + n_changed;
3666

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3667 3668 3669
			/* Let us use a dummy dfield to make the conversion
			from the MySQL column format to the InnoDB format */

3670 3671
			dict_col_copy_type_noninline(prebuilt->table->cols + i,
						     &dfield.type);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3672 3673 3674

			if (n_len != UNIV_SQL_NULL) {
				buf = row_mysql_store_col_in_innobase_format(
3675 3676 3677 3678 3679
					&dfield,
					(byte*)buf,
					TRUE,
					new_mysql_row_col,
					col_pack_len,
3680 3681
					dict_table_is_comp_noninline(
							prebuilt->table));
3682 3683
				ufield->new_val.data = dfield.data;
				ufield->new_val.len = dfield.len;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3684 3685 3686 3687
			} else {
				ufield->new_val.data = NULL;
				ufield->new_val.len = UNIV_SQL_NULL;
			}
3688 3689

			ufield->exp = NULL;
3690 3691
			ufield->field_no = dict_col_get_clust_pos_noninline(
				&prebuilt->table->cols[i], clust_index);
3692 3693 3694 3695 3696 3697 3698
			n_changed++;
		}
	}

	uvect->n_fields = n_changed;
	uvect->info_bits = 0;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3699 3700
	ut_a(buf <= (byte*)original_upd_buff + buff_len);

3701 3702 3703 3704 3705 3706 3707
	return(0);
}

/**************************************************************************
Updates a row given as a parameter to a new value. Note that we are given
whole rows, not just the fields which are updated: this incurs some
overhead for CPU when we check which fields are actually updated.
3708
TODO: currently InnoDB does not prevent the 'Halloween problem':
3709 3710
in a searched update a single row can get updated several times
if its index columns are updated! */
3711

3712 3713 3714 3715
int
ha_innobase::update_row(
/*====================*/
					/* out: error number or 0 */
3716 3717
	const mysql_byte*	old_row,/* in: old row in MySQL format */
	mysql_byte*		new_row)/* in: new row in MySQL format */
3718 3719 3720
{
	upd_t*		uvect;
	int		error = 0;
3721
	trx_t*		trx = thd_to_trx(ha_thd());
3722

3723
	DBUG_ENTER("ha_innobase::update_row");
3724

3725
	ut_a(prebuilt->trx == trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3726

3727 3728
	if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE)
		table->timestamp_field->set_time();
3729

3730 3731 3732 3733 3734
	if (prebuilt->upd_node) {
		uvect = prebuilt->upd_node->update;
	} else {
		uvect = row_get_prebuilt_update_vector(prebuilt);
	}
3735 3736 3737 3738

	/* Build an update vector from the modified fields in the rows
	(uses upd_buff of the handle) */

3739
	calc_row_difference(uvect, (mysql_byte*) old_row, new_row, table,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3740 3741 3742
			upd_buff, (ulint)upd_and_key_val_buff_len,
			prebuilt, user_thd);

3743 3744 3745
	/* This is not a delete */
	prebuilt->upd_node->is_delete = FALSE;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3746
	assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
3747

3748
	innodb_srv_conc_enter_innodb(trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3749

3750
	error = row_update_for_mysql((byte*) old_row, prebuilt);
3751

3752
	innodb_srv_conc_exit_innodb(trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3753

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3754
	error = convert_error_code_to_mysql(error, user_thd);
3755

3756
	/* Tell InnoDB server that there might be work for
3757 3758
	utility threads: */

3759
	innobase_active_small();
3760 3761 3762 3763 3764 3765 3766 3767 3768 3769

	DBUG_RETURN(error);
}

/**************************************************************************
Deletes a row given as the parameter. */

int
ha_innobase::delete_row(
/*====================*/
3770 3771
					/* out: error number or 0 */
	const mysql_byte* record)	/* in: a row in MySQL format */
3772 3773
{
	int		error = 0;
3774
	trx_t*		trx = thd_to_trx(ha_thd());
3775

3776
	DBUG_ENTER("ha_innobase::delete_row");
3777

3778
	ut_a(prebuilt->trx == trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3779

3780 3781 3782
	if (!prebuilt->upd_node) {
		row_get_prebuilt_update_vector(prebuilt);
	}
3783 3784

	/* This is a delete */
3785

3786
	prebuilt->upd_node->is_delete = TRUE;
3787

3788
	innodb_srv_conc_enter_innodb(trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3789

3790
	error = row_update_for_mysql((byte*) record, prebuilt);
3791

3792
	innodb_srv_conc_exit_innodb(trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3793

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3794
	error = convert_error_code_to_mysql(error, user_thd);
3795

3796
	/* Tell the InnoDB server that there might be work for
3797 3798
	utility threads: */

3799
	innobase_active_small();
3800 3801 3802 3803

	DBUG_RETURN(error);
}

3804
/**************************************************************************
3805
Removes a new lock set on a row, if it was not read optimistically. This can
3806 3807
be called after a row has been read in the processing of an UPDATE or a DELETE
query, if the option innodb_locks_unsafe_for_binlog is set. */
3808 3809 3810 3811 3812 3813 3814

void
ha_innobase::unlock_row(void)
/*=========================*/
{
	DBUG_ENTER("ha_innobase::unlock_row");

3815 3816 3817 3818 3819 3820 3821
	/* Consistent read does not take any locks, thus there is
	nothing to unlock. */

	if (prebuilt->select_lock_type == LOCK_NONE) {
		DBUG_VOID_RETURN;
	}

3822 3823
	switch (prebuilt->row_read_type) {
	case ROW_READ_WITH_LOCKS:
3824 3825
		if (!srv_locks_unsafe_for_binlog
		|| prebuilt->trx->isolation_level == TRX_ISO_READ_COMMITTED) {
3826 3827 3828 3829
			break;
		}
		/* fall through */
	case ROW_READ_TRY_SEMI_CONSISTENT:
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3830
		row_unlock_for_mysql(prebuilt, FALSE);
3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852
		break;
	case ROW_READ_DID_SEMI_CONSISTENT:
		prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
		break;
	}

	DBUG_VOID_RETURN;
}

/* See handler.h and row0mysql.h for docs on this function. */
bool
ha_innobase::was_semi_consistent_read(void)
/*=======================================*/
{
	return(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT);
}

/* See handler.h and row0mysql.h for docs on this function. */
void
ha_innobase::try_semi_consistent_read(bool yes)
/*===========================================*/
{
3853
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
3854

3855 3856 3857 3858 3859 3860 3861
	/* Row read type is set to semi consistent read if this was
	requested by the MySQL and either innodb_locks_unsafe_for_binlog
	option is used or this session is using READ COMMITTED isolation
	level. */

	if (yes &&  (srv_locks_unsafe_for_binlog
		|| prebuilt->trx->isolation_level == TRX_ISO_READ_COMMITTED)) {
3862 3863 3864
		prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
	} else {
		prebuilt->row_read_type = ROW_READ_WITH_LOCKS;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3865
	}
3866 3867
}

3868 3869 3870 3871 3872 3873 3874
/**********************************************************************
Initializes a handle to use an index. */

int
ha_innobase::index_init(
/*====================*/
			/* out: 0 or error number */
3875 3876
	uint	keynr,	/* in: key (index) number */
	bool sorted)	/* in: 1 if result MUST be sorted according to index */
3877
{
3878 3879
	int	error	= 0;
	DBUG_ENTER("index_init");
3880

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
3881
	error = change_active_index(keynr);
3882

3883
	DBUG_RETURN(error);
3884 3885 3886
}

/**********************************************************************
3887
Currently does nothing. */
3888 3889 3890 3891 3892

int
ha_innobase::index_end(void)
/*========================*/
{
3893 3894 3895 3896
	int	error	= 0;
	DBUG_ENTER("index_end");
	active_index=MAX_KEY;
	DBUG_RETURN(error);
3897 3898 3899 3900
}

/*************************************************************************
Converts a search mode flag understood by MySQL to a flag understood
3901
by InnoDB. */
3902 3903 3904 3905 3906 3907 3908
inline
ulint
convert_search_mode_to_innobase(
/*============================*/
	enum ha_rkey_function	find_flag)
{
	switch (find_flag) {
3909 3910 3911
		case HA_READ_KEY_EXACT:		return(PAGE_CUR_GE);
			/* the above does not require the index to be UNIQUE */
		case HA_READ_KEY_OR_NEXT:	return(PAGE_CUR_GE);
3912 3913 3914 3915
		case HA_READ_KEY_OR_PREV:	return(PAGE_CUR_LE);
		case HA_READ_AFTER_KEY:		return(PAGE_CUR_G);
		case HA_READ_BEFORE_KEY:	return(PAGE_CUR_L);
		case HA_READ_PREFIX:		return(PAGE_CUR_GE);
3916 3917
		case HA_READ_PREFIX_LAST:	return(PAGE_CUR_LE);
		case HA_READ_PREFIX_LAST_OR_PREV:return(PAGE_CUR_LE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3918 3919
		  /* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always
		  pass a complete-field prefix of a key value as the search
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3920 3921 3922 3923 3924
		  tuple. I.e., it is not allowed that the last field would
		  just contain n first bytes of the full field value.
		  MySQL uses a 'padding' trick to convert LIKE 'abc%'
		  type queries so that it can use as a search tuple
		  a complete-field-prefix of a key value. Thus, the InnoDB
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3925 3926 3927 3928 3929 3930 3931
		  search mode PAGE_CUR_LE_OR_EXTENDS is never used.
		  TODO: when/if MySQL starts to use also partial-field
		  prefixes, we have to deal with stripping of spaces
		  and comparison of non-latin1 char type fields in
		  innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to
		  work correctly. */

3932 3933 3934 3935 3936
		default:			assert(0);
	}

	return(0);
}
3937

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970 3971 3972 3973 3974 3975 3976 3977 3978 3979 3980 3981 3982 3983 3984 3985 3986
/*
   BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED
   ---------------------------------------------------
The following does not cover all the details, but explains how we determine
the start of a new SQL statement, and what is associated with it.

For each table in the database the MySQL interpreter may have several
table handle instances in use, also in a single SQL query. For each table
handle instance there is an InnoDB  'prebuilt' struct which contains most
of the InnoDB data associated with this table handle instance.

  A) if the user has not explicitly set any MySQL table level locks:

  1) MySQL calls ::external_lock to set an 'intention' table level lock on
the table of the handle instance. There we set
prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set
true if we are taking this table handle instance to use in a new SQL
statement issued by the user. We also increment trx->n_mysql_tables_in_use.

  2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search
instructions to prebuilt->template of the table handle instance in
::index_read. The template is used to save CPU time in large joins.

  3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we
allocate a new consistent read view for the trx if it does not yet have one,
or in the case of a locking read, set an InnoDB 'intention' table level
lock on the table.

  4) We do the SELECT. MySQL may repeatedly call ::index_read for the
same table handle instance, if it is a join.

  5) When the SELECT ends, MySQL removes its intention table level locks
in ::external_lock. When trx->n_mysql_tables_in_use drops to zero,
 (a) we execute a COMMIT there if the autocommit is on,
 (b) we also release possible 'SQL statement level resources' InnoDB may
have for this SQL statement. The MySQL interpreter does NOT execute
autocommit for pure read transactions, though it should. That is why the
table handler in that case has to execute the COMMIT in ::external_lock.

  B) If the user has explicitly set MySQL table level locks, then MySQL
does NOT call ::external_lock at the start of the statement. To determine
when we are at the start of a new SQL statement we at the start of
::index_read also compare the query id to the latest query id where the
table handle instance was used. If it has changed, we know we are at the
start of a new SQL statement. Since the query id can theoretically
overwrap, we use this test only as a secondary way of determining the
start of a new SQL statement. */


3987 3988 3989 3990 3991 3992 3993 3994 3995
/**************************************************************************
Positions an index cursor to the index specified in the handle. Fetches the
row if any. */

int
ha_innobase::index_read(
/*====================*/
					/* out: 0, HA_ERR_KEY_NOT_FOUND,
					or error number */
3996
	mysql_byte*		buf,	/* in/out: buffer for the returned
3997
					row */
3998
	const mysql_byte*	key_ptr,/* in: key value; if this is NULL
3999
					we position the cursor at the
4000 4001 4002
					start or end of index; this can
					also contain an InnoDB row id, in
					which case key_len is the InnoDB
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4003 4004 4005 4006
					row id length; the key value can
					also be a prefix of a full key value,
					and the last column can be a prefix
					of a full column */
4007
	uint			key_len,/* in: key value length */
4008 4009 4010 4011
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
	ulint		mode;
	dict_index_t*	index;
4012 4013
	ulint		match_mode	= 0;
	int		error;
4014 4015
	ulint		ret;

4016
	DBUG_ENTER("index_read");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4017

4018
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4019

4020
	ha_statistic_increment(&SSV::ha_read_key_count);
4021

4022
	index = prebuilt->index;
4023

4024
	/* Note that if the index for which the search template is built is not
4025
	necessarily prebuilt->index, but can also be the clustered index */
4026

4027 4028 4029 4030
	if (prebuilt->sql_stat_start) {
		build_template(prebuilt, user_thd, table,
							ROW_MYSQL_REC_FIELDS);
	}
4031 4032

	if (key_ptr) {
4033
		/* Convert the search key value to InnoDB format into
4034 4035
		prebuilt->search_tuple */

4036
		row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4037 4038 4039 4040
					(byte*) key_val_buff,
					(ulint)upd_and_key_val_buff_len,
					index,
					(byte*) key_ptr,
4041
					(ulint) key_len, prebuilt->trx);
4042 4043 4044 4045
	} else {
		/* We position the cursor to the last or the first entry
		in the index */

4046
		dtuple_set_n_fields(prebuilt->search_tuple, 0);
4047
	}
4048

4049 4050 4051 4052 4053 4054 4055 4056 4057 4058 4059 4060
	mode = convert_search_mode_to_innobase(find_flag);

	match_mode = 0;

	if (find_flag == HA_READ_KEY_EXACT) {
		match_mode = ROW_SEL_EXACT;

	} else if (find_flag == HA_READ_PREFIX
				|| find_flag == HA_READ_PREFIX_LAST) {
		match_mode = ROW_SEL_EXACT_PREFIX;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4061
	last_match_mode = (uint) match_mode;
4062

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4063
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4064

4065
	ret = row_search_for_mysql((byte*) buf, mode, prebuilt, match_mode, 0);
4066

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4067
	innodb_srv_conc_exit_innodb(prebuilt->trx);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4068

4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080
	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_KEY_NOT_FOUND;
		table->status = STATUS_NOT_FOUND;
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4081
		error = convert_error_code_to_mysql((int) ret, user_thd);
4082 4083
		table->status = STATUS_NOT_FOUND;
	}
4084

4085 4086 4087
	DBUG_RETURN(error);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4088 4089 4090
/***********************************************************************
The following functions works like index_read, but it find the last
row with the current key value or prefix. */
4091 4092

int
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4093 4094
ha_innobase::index_read_last(
/*=========================*/
4095
				   /* out: 0, HA_ERR_KEY_NOT_FOUND, or an
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4096
				   error code */
4097 4098
	mysql_byte*	  buf,	   /* out: fetched row */
	const mysql_byte* key_ptr, /* in: key value, or a prefix of a full
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4099
				   key value */
4100
	uint		  key_len) /* in: length of the key val or prefix
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4101
				   in bytes */
4102
{
4103
	return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST));
4104 4105
}

4106
/************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4107
Changes the active index of a handle. */
4108 4109 4110 4111

int
ha_innobase::change_active_index(
/*=============================*/
4112
			/* out: 0 or error code */
4113
	uint	keynr)	/* in: use this index; MAX_KEY means always clustered
4114
			index, even if it was internally generated by
4115
			InnoDB */
4116
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4117 4118
	KEY*		key=0;
	DBUG_ENTER("change_active_index");
antony@ppcg5.local's avatar
antony@ppcg5.local committed
4119
	ha_statistic_increment(&SSV::ha_read_key_count);
4120

4121 4122
	ut_ad(user_thd == ha_thd());
	ut_a(prebuilt->trx == thd_to_trx(user_thd));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4123

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4124
	active_index = keynr;
4125

4126
	if (keynr != MAX_KEY && table->s->keys > 0) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4127
		key = table->key_info + active_index;
4128

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4129
		prebuilt->index = dict_table_get_index_noninline(
4130 4131
			prebuilt->table, key->name);
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4132
		prebuilt->index = dict_table_get_first_index_noninline(
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4133
							   prebuilt->table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4134
	}
4135

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4136
	if (!prebuilt->index) {
4137 4138 4139 4140 4141 4142 4143
		sql_print_error(
			"Innodb could not find key n:o %u with name %s "
			"from dict cache for table %s",
			keynr, key ? key->name : "NULL",
			prebuilt->table->name);

		DBUG_RETURN(1);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4144
	}
4145

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4146
	assert(prebuilt->search_tuple != 0);
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4147

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4148
	dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
4149

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4150
	dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
4151
			prebuilt->index->n_fields);
4152

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4153 4154 4155 4156 4157
	/* MySQL changes the active index for a handle also during some
	queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
	and then calculates the sum. Previously we played safe and used
	the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary
	copying. Starting from MySQL-4.1 we use a more efficient flag here. */
4158

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4159
	build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS);
4160

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4161
	DBUG_RETURN(0);
4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172
}

/**************************************************************************
Positions an index cursor to the index specified in keynr. Fetches the
row if any. */
/* ??? This is only used to read whole keys ??? */

int
ha_innobase::index_read_idx(
/*========================*/
					/* out: error number or 0 */
4173
	mysql_byte*	buf,		/* in/out: buffer for the returned
4174
					row */
4175
	uint		keynr,		/* in: use this index */
4176
	const mysql_byte* key,		/* in: key value; if this is NULL
4177 4178 4179 4180 4181
					we position the cursor at the
					start or end of index */
	uint		key_len,	/* in: key value length */
	enum ha_rkey_function find_flag)/* in: search flags from my_base.h */
{
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4182 4183 4184 4185
	if (change_active_index(keynr)) {

		return(1);
	}
4186 4187 4188 4189 4190 4191 4192 4193 4194 4195 4196 4197 4198

	return(index_read(buf, key, key_len, find_flag));
}

/***************************************************************************
Reads the next or previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::general_fetch(
/*=======================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
4199
	mysql_byte*	buf,	/* in/out: buffer for next row in MySQL
4200
				format */
4201
	uint	direction,	/* in: ROW_SEL_NEXT or ROW_SEL_PREV */
4202 4203 4204 4205 4206
	uint	match_mode)	/* in: 0, ROW_SEL_EXACT, or
				ROW_SEL_EXACT_PREFIX */
{
	ulint		ret;
	int		error	= 0;
4207

4208
	DBUG_ENTER("general_fetch");
4209

4210
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4211

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4212
	innodb_srv_conc_enter_innodb(prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4213

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4214 4215
	ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode,
								direction);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4216
	innodb_srv_conc_exit_innodb(prebuilt->trx);
4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228 4229

	if (ret == DB_SUCCESS) {
		error = 0;
		table->status = 0;

	} else if (ret == DB_RECORD_NOT_FOUND) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;

	} else if (ret == DB_END_OF_INDEX) {
		error = HA_ERR_END_OF_FILE;
		table->status = STATUS_NOT_FOUND;
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4230
		error = convert_error_code_to_mysql((int) ret, user_thd);
4231 4232
		table->status = STATUS_NOT_FOUND;
	}
4233

4234 4235 4236 4237 4238 4239 4240 4241 4242 4243 4244 4245
	DBUG_RETURN(error);
}

/***************************************************************************
Reads the next row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_next(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
4246
	mysql_byte*	buf)	/* in/out: buffer for next row in MySQL
4247 4248
				format */
{
4249
	ha_statistic_increment(&SSV::ha_read_next_count);
4250

4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261
	return(general_fetch(buf, ROW_SEL_NEXT, 0));
}

/***********************************************************************
Reads the next row matching to the key value given as the parameter. */

int
ha_innobase::index_next_same(
/*=========================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
4262
	mysql_byte*	buf,	/* in/out: buffer for the row */
4263
	const mysql_byte* key,	/* in: key value */
4264
	uint		keylen)	/* in: key value length */
4265
{
4266
	ha_statistic_increment(&SSV::ha_read_next_count);
4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279

	return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode));
}

/***************************************************************************
Reads the previous row from a cursor, which must have previously been
positioned using index_read. */

int
ha_innobase::index_prev(
/*====================*/
				/* out: 0, HA_ERR_END_OF_FILE, or error
				number */
4280
	mysql_byte*	buf)	/* in/out: buffer for previous row in MySQL
4281 4282
				format */
{
4283
	ha_statistic_increment(&SSV::ha_read_prev_count);
4284

4285 4286 4287 4288 4289 4290 4291 4292 4293 4294
	return(general_fetch(buf, ROW_SEL_PREV, 0));
}

/************************************************************************
Positions a cursor on the first record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_first(
/*=====================*/
4295
				/* out: 0, HA_ERR_END_OF_FILE,
4296 4297
				or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
4298 4299 4300
{
	int	error;

4301
	DBUG_ENTER("index_first");
4302
	ha_statistic_increment(&SSV::ha_read_first_count);
4303

4304
	error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY);
4305

4306
	/* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
4307

4308 4309 4310
	if (error == HA_ERR_KEY_NOT_FOUND) {
		error = HA_ERR_END_OF_FILE;
	}
4311

4312
	DBUG_RETURN(error);
4313 4314 4315 4316 4317 4318 4319 4320 4321
}

/************************************************************************
Positions a cursor on the last record in an index and reads the
corresponding row to buf. */

int
ha_innobase::index_last(
/*====================*/
4322 4323
				/* out: 0, HA_ERR_END_OF_FILE, or error code */
	mysql_byte*	buf)	/* in/out: buffer for the row */
4324 4325 4326
{
	int	error;

4327
	DBUG_ENTER("index_last");
4328
	ha_statistic_increment(&SSV::ha_read_last_count);
4329

4330
	error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY);
4331

4332
	/* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */
4333

4334 4335 4336
	if (error == HA_ERR_KEY_NOT_FOUND) {
		error = HA_ERR_END_OF_FILE;
	}
4337

4338
	DBUG_RETURN(error);
4339 4340 4341 4342 4343 4344 4345 4346 4347
}

/********************************************************************
Initialize a table scan. */

int
ha_innobase::rnd_init(
/*==================*/
			/* out: 0 or error number */
4348
	bool	scan)	/* in: ???????? */
4349
{
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4350
	int	err;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4351

4352 4353 4354
	/* Store the active index value so that we can restore the original
	value after a scan */

4355
	if (prebuilt->clust_index_was_generated) {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4356
		err = change_active_index(MAX_KEY);
4357
	} else {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4358
		err = change_active_index(primary_key);
4359
	}
4360

4361 4362 4363 4364 4365 4366 4367
	/* Don't use semi-consistent read in random row reads (by position).
	This means we must disable semi_consistent_read if scan is false */

	if (!scan) {
		try_semi_consistent_read(0);
	}

4368
	start_of_scan = 1;
4369

4370
	return(err);
4371 4372 4373
}

/*********************************************************************
4374
Ends a table scan. */
4375 4376 4377 4378 4379 4380

int
ha_innobase::rnd_end(void)
/*======================*/
				/* out: 0 or error number */
{
4381
	return(index_end());
4382 4383 4384 4385 4386 4387 4388 4389 4390 4391
}

/*********************************************************************
Reads the next row in a table scan (also used to read the FIRST row
in a table scan). */

int
ha_innobase::rnd_next(
/*==================*/
			/* out: 0, HA_ERR_END_OF_FILE, or error number */
4392
	mysql_byte* buf)/* in/out: returns the row in this buffer,
4393 4394
			in MySQL format */
{
4395
	int	error;
4396

4397
	DBUG_ENTER("rnd_next");
4398
	ha_statistic_increment(&SSV::ha_read_rnd_next_count);
4399

4400
	if (start_of_scan) {
4401 4402 4403 4404
		error = index_first(buf);
		if (error == HA_ERR_KEY_NOT_FOUND) {
			error = HA_ERR_END_OF_FILE;
		}
4405
		start_of_scan = 0;
4406
	} else {
4407
		error = general_fetch(buf, ROW_SEL_NEXT, 0);
4408
	}
4409

4410
	DBUG_RETURN(error);
4411 4412 4413
}

/**************************************************************************
4414
Fetches a row from the table based on a row reference. */
4415

4416 4417 4418
int
ha_innobase::rnd_pos(
/*=================*/
4419 4420
				/* out: 0, HA_ERR_KEY_NOT_FOUND,
				or error code */
4421
	mysql_byte*	buf,	/* in/out: buffer for the row */
4422 4423 4424 4425 4426
	mysql_byte*	pos)	/* in: primary key value of the row in the
				MySQL format, or the row id if the clustered
				index was internally generated by InnoDB;
				the length of data in pos has to be
				ref_length */
4427
{
4428 4429
	int		error;
	uint		keynr	= active_index;
4430
	DBUG_ENTER("rnd_pos");
4431
	DBUG_DUMP("key", pos, ref_length);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4432

4433
	ha_statistic_increment(&SSV::ha_read_rnd_count);
4434

4435
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4436

4437 4438 4439 4440
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from the row id: the
		row reference is the row id, not any key value
4441
		that MySQL knows of */
4442

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4443
		error = change_active_index(MAX_KEY);
4444
	} else {
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4445
		error = change_active_index(primary_key);
4446
	}
4447

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4448
	if (error) {
4449
		DBUG_PRINT("error", ("Got error: %d", error));
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4450 4451
		DBUG_RETURN(error);
	}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4452

4453
	/* Note that we assume the length of the row reference is fixed
4454
	for the table, and it is == ref_length */
4455 4456

	error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4457 4458

	if (error) {
4459
		DBUG_PRINT("error", ("Got error: %d", error));
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4460
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4461

4462
	change_active_index(keynr);
4463

4464
	DBUG_RETURN(error);
4465 4466 4467
}

/*************************************************************************
4468
Stores a reference to the current row to 'ref' field of the handle. Note
4469 4470
that in the case where we have generated the clustered index for the
table, the function parameter is illogical: we MUST ASSUME that 'record'
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4471
is the current 'position' of the handle, because if row ref is actually
4472
the row id internally generated in InnoDB, then 'record' does not contain
4473 4474
it. We just guess that the row id must be for the record where the handle
was positioned the last time. */
4475 4476 4477 4478

void
ha_innobase::position(
/*==================*/
4479
	const mysql_byte*	record)	/* in: row in MySQL format */
4480
{
4481
	uint		len;
4482

4483
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4484

4485 4486 4487 4488
	if (prebuilt->clust_index_was_generated) {
		/* No primary key was defined for the table and we
		generated the clustered index from row id: the
		row reference will be the row id, not any key value
4489
		that MySQL knows of */
4490 4491 4492 4493 4494

		len = DATA_ROW_ID_LEN;

		memcpy(ref, prebuilt->row_id, len);
	} else {
4495 4496
		len = store_key_val_for_row(primary_key, (char*)ref,
							 ref_length, record);
4497
	}
4498

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4499 4500
	/* We assume that the 'ref' value len is always fixed for the same
	table. */
4501

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4502
	if (len != ref_length) {
4503
	  sql_print_error("Stored ref len is %lu, but table ref len is %lu",
4504
			  (ulong) len, (ulong) ref_length);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4505
	}
4506 4507 4508
}

/*********************************************************************
4509
Creates a table definition to an InnoDB database. */
4510 4511 4512 4513
static
int
create_table_def(
/*=============*/
4514
	trx_t*		trx,		/* in: InnoDB transaction handle */
4515 4516
	TABLE*		form,		/* in: information on table
					columns and indexes */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4517
	const char*	table_name,	/* in: table name */
marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
4518
	const char*	path_of_temp_table,/* in: if this is a table explicitly
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4519 4520 4521 4522 4523 4524 4525
					created by the user with the
					TEMPORARY keyword, then this
					parameter is the dir path where the
					table should be placed if we create
					an .ibd file for it (no .ibd extension
					in the path, though); otherwise this
					is NULL */
4526
	ulint		flags)		/* in: table flags */
4527 4528 4529 4530
{
	Field*		field;
	dict_table_t*	table;
	ulint		n_cols;
4531 4532
	int		error;
	ulint		col_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4533
	ulint		col_len;
4534
	ulint		nulls_allowed;
4535
	ulint		unsigned_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4536
	ulint		binary_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4537
	ulint		long_true_varchar;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4538
	ulint		charset_no;
4539
	ulint		i;
4540

4541 4542
	DBUG_ENTER("create_table_def");
	DBUG_PRINT("enter", ("table_name: %s", table_name));
4543

4544
	n_cols = form->s->fields;
4545

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4546 4547
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
4548

4549
	table = dict_mem_table_create(table_name, 0, n_cols, flags);
4550

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4551 4552 4553 4554 4555
	if (path_of_temp_table) {
		table->dir_path_of_temp_table =
			mem_heap_strdup(table->heap, path_of_temp_table);
	}

4556 4557 4558
	for (i = 0; i < n_cols; i++) {
		field = form->field[i];

4559 4560
		col_type = get_innobase_type_from_mysql_type(&unsigned_type,
									field);
4561 4562 4563 4564 4565 4566
		if (field->null_ptr) {
			nulls_allowed = 0;
		} else {
			nulls_allowed = DATA_NOT_NULL;
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4567
		if (field->binary()) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4568 4569 4570 4571 4572
			binary_type = DATA_BINARY_TYPE;
		} else {
			binary_type = 0;
		}

4573
		charset_no = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4574 4575 4576 4577 4578

		if (dtype_is_string_type(col_type)) {

			charset_no = (ulint)field->charset()->number;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591
			ut_a(charset_no < 256); /* in data0type.h we assume
						that the number fits in one
						byte */
		}

		ut_a(field->type() < 256); /* we assume in dtype_form_prtype()
					   that this fits in one byte */
		col_len = field->pack_length();

		/* The MySQL pack length contains 1 or 2 bytes length field
		for a true VARCHAR. Let us subtract that, so that the InnoDB
		column length in the InnoDB data dictionary is the real
		maximum byte length of the actual data. */
4592

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4593 4594 4595 4596 4597 4598 4599 4600
		long_true_varchar = 0;

		if (field->type() == MYSQL_TYPE_VARCHAR) {
			col_len -= ((Field_varstring*)field)->length_bytes;

			if (((Field_varstring*)field)->length_bytes == 2) {
				long_true_varchar = DATA_LONG_TRUE_VARCHAR;
			}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4601 4602
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4603
		dict_mem_table_add_col(table,
4604 4605 4606 4607 4608 4609 4610
			(char*) field->field_name,
			col_type,
			dtype_form_prtype(
				(ulint)field->type()
				| nulls_allowed | unsigned_type
				| binary_type | long_true_varchar,
				charset_no),
4611
			col_len);
4612 4613 4614 4615
	}

	error = row_create_table_for_mysql(table, trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4616
	error = convert_error_code_to_mysql(error, NULL);
4617 4618 4619 4620 4621

	DBUG_RETURN(error);
}

/*********************************************************************
4622
Creates an index in an InnoDB database. */
4623 4624
static
int
4625 4626
create_index(
/*=========*/
4627
	trx_t*		trx,		/* in: InnoDB transaction handle */
4628 4629 4630 4631 4632
	TABLE*		form,		/* in: information on table
					columns and indexes */
	const char*	table_name,	/* in: table name */
	uint		key_num)	/* in: index number */
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4633
	Field*		field;
4634
	dict_index_t*	index;
4635
	int		error;
4636 4637 4638 4639
	ulint		n_fields;
	KEY*		key;
	KEY_PART_INFO*	key_part;
	ulint		ind_type;
4640 4641
	ulint		col_type;
	ulint		prefix_len;
4642
	ulint		is_unsigned;
4643 4644
	ulint		i;
	ulint		j;
4645
	ulint*		field_lengths;
4646 4647

	DBUG_ENTER("create_index");
4648

4649 4650
	key = form->key_info + key_num;

4651
	n_fields = key->key_parts;
4652

4653
	ind_type = 0;
4654

4655
	if (key_num == form->s->primary_key) {
4656 4657
		ind_type = ind_type | DICT_CLUSTERED;
	}
4658

4659 4660 4661 4662
	if (key->flags & HA_NOSAME ) {
		ind_type = ind_type | DICT_UNIQUE;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4663 4664
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
4665 4666 4667

	index = dict_mem_index_create((char*) table_name, key->name, 0,
						ind_type, n_fields);
4668 4669 4670

	field_lengths = (ulint*) my_malloc(sizeof(ulint) * n_fields,
		MYF(MY_FAE));
4671

4672 4673 4674
	for (i = 0; i < n_fields; i++) {
		key_part = key->key_part + i;

4675
		/* (The flag HA_PART_KEY_SEG denotes in MySQL a column prefix
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4676 4677 4678 4679
		field in an index: we only store a specified number of first
		bytes of the column to the index field.) The flag does not
		seem to be properly set by MySQL. Let us fall back on testing
		the length of the key part versus the column. */
4680

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4681
		field = NULL;
4682
		for (j = 0; j < form->s->fields; j++) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4683 4684 4685

			field = form->field[j];

4686 4687 4688
			if (0 == innobase_strcasecmp(
					field->field_name,
					key_part->field->field_name)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4689 4690 4691 4692 4693 4694
				/* Found the corresponding column */

				break;
			}
		}

4695
		ut_a(j < form->s->fields);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4696

4697 4698
		col_type = get_innobase_type_from_mysql_type(
					&is_unsigned, key_part->field);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4699 4700

		if (DATA_BLOB == col_type
4701 4702 4703 4704 4705
			|| (key_part->length < field->pack_length()
				&& field->type() != MYSQL_TYPE_VARCHAR)
			|| (field->type() == MYSQL_TYPE_VARCHAR
				&& key_part->length < field->pack_length()
				- ((Field_varstring*)field)->length_bytes)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4706

4707
			prefix_len = key_part->length;
4708 4709

			if (col_type == DATA_INT
4710 4711 4712 4713 4714 4715 4716 4717 4718 4719 4720 4721
				|| col_type == DATA_FLOAT
				|| col_type == DATA_DOUBLE
				|| col_type == DATA_DECIMAL) {
				sql_print_error(
					"MySQL is trying to create a column "
					"prefix index field, on an "
					"inappropriate data type. Table "
					"name %s, column name %s.",
					table_name,
					key_part->field->field_name);

				prefix_len = 0;
4722 4723
			}
		} else {
4724
			prefix_len = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4725 4726
		}

4727 4728
		field_lengths[i] = key_part->length;

4729
		dict_mem_index_add_field(index,
4730
			(char*) key_part->field->field_name, prefix_len);
4731 4732
	}

4733 4734 4735
	/* Even though we've defined max_supported_key_part_length, we
	still do our own checking using field_lengths to be absolutely
	sure we don't create too long indexes. */
4736
	error = row_create_index_for_mysql(index, trx, field_lengths);
4737

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4738
	error = convert_error_code_to_mysql(error, NULL);
4739

4740
	my_free(field_lengths, MYF(0));
4741

4742 4743 4744 4745
	DBUG_RETURN(error);
}

/*********************************************************************
4746
Creates an index to an InnoDB table when the user has defined no
4747
primary index. */
4748 4749
static
int
4750 4751
create_clustered_index_when_no_primary(
/*===================================*/
4752
	trx_t*		trx,		/* in: InnoDB transaction handle */
4753 4754 4755
	const char*	table_name)	/* in: table name */
{
	dict_index_t*	index;
4756
	int		error;
4757

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4758 4759
	/* We pass 0 as the space id, and determine at a lower level the space
	id where to store the table */
4760

monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
4761
	index = dict_mem_index_create((char*) table_name,
4762
		(char*) "GEN_CLUST_INDEX", 0, DICT_CLUSTERED, 0);
4763
	error = row_create_index_for_mysql(index, trx, NULL);
4764

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4765
	error = convert_error_code_to_mysql(error, NULL);
4766

4767
	return(error);
4768 4769 4770
}

/*********************************************************************
4771 4772 4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784
Update create_info.  Used in SHOW CREATE TABLE et al. */

void
ha_innobase::update_create_info(
/*============================*/
	HA_CREATE_INFO* create_info)	/* in/out: create info */
{
  if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) {
    ha_innobase::info(HA_STATUS_AUTO);
    create_info->auto_increment_value = stats.auto_increment_value;
  }
}

/*********************************************************************
4785
Creates a new table to an InnoDB database. */
4786 4787 4788 4789 4790 4791 4792 4793

int
ha_innobase::create(
/*================*/
					/* out: error number */
	const char*	name,		/* in: table name */
	TABLE*		form,		/* in: information on table
					columns and indexes */
4794 4795 4796
	HA_CREATE_INFO*	create_info)	/* in: more information of the
					created table, contains also the
					create statement string */
4797 4798 4799
{
	int		error;
	dict_table_t*	innobase_table;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4800
	trx_t*		parent_trx;
4801
	trx_t*		trx;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4802
	int		primary_key_no;
4803
	uint		i;
4804 4805
	char		name2[FN_REFLEN];
	char		norm_name[FN_REFLEN];
4806
	THD		*thd= ha_thd();
4807 4808
	ib_longlong	auto_inc_value;
	ulint		flags;
4809

4810
	DBUG_ENTER("ha_innobase::create");
4811

monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
4812
	DBUG_ASSERT(thd != NULL);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4813

4814
	if (form->s->fields > 1000) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4815 4816 4817
		/* The limit probably should be REC_MAX_N_FIELDS - 3 = 1020,
		but we play safe here */

4818 4819
		DBUG_RETURN(HA_ERR_TO_BIG_ROW);
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4820

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4821 4822
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
4823

4824
	parent_trx = check_trx_exists(ht, thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4825 4826 4827 4828

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

4829 4830
	trx_search_latch_release_if_reserved(parent_trx);

4831
	trx = trx_allocate_for_mysql();
4832

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4833 4834
	trx->mysql_thd = thd;
	trx->mysql_query_str = &((*thd).query);
4835

antony@ppcg5.local's avatar
antony@ppcg5.local committed
4836
	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4837 4838 4839
		trx->check_foreigns = FALSE;
	}

antony@ppcg5.local's avatar
antony@ppcg5.local committed
4840
	if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4841 4842 4843
		trx->check_unique_secondary = FALSE;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4844 4845 4846 4847 4848
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}
monty@mashka.mysql.fi's avatar
monty@mashka.mysql.fi committed
4849

4850
	strcpy(name2, name);
4851 4852

	normalize_table_name(norm_name, name2);
4853

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4854
	/* Latch the InnoDB data dictionary exclusively so that no deadlocks
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4855
	or lock waits can happen in it during a table create operation.
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4856
	Drop table etc. do this latching in row0mysql.c. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4857

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4858
	row_mysql_lock_data_dictionary(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4859 4860

	/* Create the table definition in InnoDB */
4861

4862 4863 4864 4865 4866 4867
	flags = 0;

	if (form->s->row_type != ROW_TYPE_REDUNDANT) {
		flags |= DICT_TF_COMPACT;
	}

marko@hundin.mysql.fi's avatar
marko@hundin.mysql.fi committed
4868 4869
	error = create_table_def(trx, form, norm_name,
		create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL,
4870
		flags);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4871

4872
	if (error) {
4873
		goto cleanup;
4874
	}
4875

4876 4877
	/* Look for a primary key */

4878
	primary_key_no= (form->s->primary_key != MAX_KEY ?
4879
			 (int) form->s->primary_key :
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4880
			 -1);
4881

4882 4883 4884
	/* Our function row_get_mysql_key_number_for_index assumes
	the primary key is always number 0, if it exists */

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4885
	DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0);
4886

4887 4888
	/* Create the keys */

4889
	if (form->s->keys == 0 || primary_key_no == -1) {
4890 4891
		/* Create an index which is used as the clustered index;
		order the rows by their row id which is internally generated
4892
		by InnoDB */
4893

4894
		error = create_clustered_index_when_no_primary(trx,
4895
							norm_name);
4896
		if (error) {
4897
			goto cleanup;
4898
		}
4899 4900 4901
	}

	if (primary_key_no != -1) {
4902
		/* In InnoDB the clustered index must always be created
4903
		first */
4904
		if ((error = create_index(trx, form, norm_name,
monty@donna.mysql.fi's avatar
monty@donna.mysql.fi committed
4905
					  (uint) primary_key_no))) {
4906
			goto cleanup;
4907 4908
		}
	}
4909

4910
	for (i = 0; i < form->s->keys; i++) {
4911 4912 4913

		if (i != (uint) primary_key_no) {

4914
			if ((error = create_index(trx, form, norm_name, i))) {
4915
				goto cleanup;
4916 4917 4918
			}
		}
	}
4919

4920
	if (thd->query != NULL) {
4921
		error = row_table_add_foreign_constraints(trx,
4922
			thd->query, norm_name,
4923
			create_info->options & HA_LEX_CREATE_TMP_TABLE);
4924

4925
		error = convert_error_code_to_mysql(error, NULL);
4926

4927 4928
		if (error) {
			goto cleanup;
4929
		}
4930 4931
	}

4932
	innobase_commit_low(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4933

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4934
	row_mysql_unlock_data_dictionary(trx);
4935

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4936 4937 4938
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4939

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4940
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
4941

4942
	innobase_table = dict_table_get(norm_name, FALSE);
4943

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
4944
	DBUG_ASSERT(innobase_table != 0);
4945

4946 4947
	if ((create_info->used_fields & HA_CREATE_USED_AUTO) &&
	   (create_info->auto_increment_value != 0)) {
4948

4949
		/* Query was ALTER TABLE...AUTO_INCREMENT = x; or
4950
		CREATE TABLE ...AUTO_INCREMENT = x; Find out a table
4951 4952
		definition from the dictionary and get the current value
		of the auto increment field. Set a new value to the
4953 4954
		auto increment field if the value is greater than the
		maximum value in the column. */
4955

4956
		auto_inc_value = create_info->auto_increment_value;
4957 4958 4959
		dict_table_autoinc_initialize(innobase_table, auto_inc_value);
	}

4960
	/* Tell the InnoDB server that there might be work for
4961 4962 4963 4964
	utility threads: */

	srv_active_wake_master_thread();

4965
	trx_free_for_mysql(trx);
4966 4967

	DBUG_RETURN(0);
4968 4969 4970

cleanup:
	innobase_commit_low(trx);
4971

4972
	row_mysql_unlock_data_dictionary(trx);
4973

4974 4975 4976
	trx_free_for_mysql(trx);

	DBUG_RETURN(error);
4977 4978
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4979 4980 4981 4982 4983 4984 4985 4986 4987
/*********************************************************************
Discards or imports an InnoDB tablespace. */

int
ha_innobase::discard_or_import_tablespace(
/*======================================*/
				/* out: 0 == success, -1 == error */
	my_bool discard)	/* in: TRUE if discard, else import */
{
4988
	dict_table_t*	dict_table;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4989 4990 4991
	trx_t*		trx;
	int		err;

4992
	DBUG_ENTER("ha_innobase::discard_or_import_tablespace");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4993

4994 4995
	ut_a(prebuilt->trx);
	ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
4996
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4997

4998
	dict_table = prebuilt->table;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
4999 5000 5001
	trx = prebuilt->trx;

	if (discard) {
5002
		err = row_discard_tablespace_for_mysql(dict_table->name, trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5003
	} else {
5004
		err = row_import_tablespace_for_mysql(dict_table->name, trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5005 5006
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5007
	err = convert_error_code_to_mysql(err, NULL);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5008

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5009
	DBUG_RETURN(err);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5010 5011
}

5012 5013 5014 5015 5016 5017 5018 5019 5020
/*********************************************************************
Deletes all rows of an InnoDB table. */

int
ha_innobase::delete_all_rows(void)
/*==============================*/
				/* out: error number */
{
	int		error;
5021
	THD*		thd		= ha_thd();
5022 5023 5024 5025

	DBUG_ENTER("ha_innobase::delete_all_rows");

	/* Get the transaction associated with the current thd, or create one
5026
	if not yet created, and update prebuilt->trx */
5027

5028
	update_thd(thd);
5029

5030
	if (thd_sql_command(thd) == SQLCOM_TRUNCATE) {
antony@ppcg5.local's avatar
antony@ppcg5.local committed
5031
		/* Truncate the table in InnoDB */
5032

antony@ppcg5.local's avatar
antony@ppcg5.local committed
5033 5034 5035 5036 5037
		error = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
		if (error == DB_ERROR) {
			/* Cannot truncate; resort to ha_innobase::delete_row() */
			goto fallback;
		}
5038

antony@ppcg5.local's avatar
antony@ppcg5.local committed
5039
		error = convert_error_code_to_mysql(error, NULL);
5040

antony@ppcg5.local's avatar
antony@ppcg5.local committed
5041 5042 5043 5044 5045 5046 5047
		DBUG_RETURN(error);
	}

fallback:
	/* We only handle TRUNCATE TABLE t as a special case.
	DELETE FROM t will have to use ha_innobase::delete_row(). */
	DBUG_RETURN(my_errno=HA_ERR_WRONG_COMMAND);
5048 5049
}

5050
/*********************************************************************
5051
Drops a table from an InnoDB database. Before calling this function,
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
5052 5053
MySQL calls innobase_commit to commit the transaction of the current user.
Then the current user cannot have locks set on the table. Drop table
5054 5055
operation inside InnoDB will remove all locks any user has on the table
inside InnoDB. */
5056 5057 5058 5059

int
ha_innobase::delete_table(
/*======================*/
monty@donna.mysql.com's avatar
monty@donna.mysql.com committed
5060 5061
				/* out: error number */
	const char*	name)	/* in: table name */
5062 5063 5064
{
	ulint	name_len;
	int	error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5065
	trx_t*	parent_trx;
5066
	trx_t*	trx;
5067
	THD	*thd= ha_thd();
5068
	char	norm_name[1000];
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5069

5070
	DBUG_ENTER("ha_innobase::delete_table");
5071

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5072 5073
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
5074

5075
	parent_trx = check_trx_exists(ht, thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5076 5077 5078 5079

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

5080
	trx_search_latch_release_if_reserved(parent_trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5081

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5082 5083 5084 5085 5086 5087
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

5088 5089
	trx = trx_allocate_for_mysql();

5090 5091
	trx->mysql_thd = ha_thd();
        trx->mysql_query_str = &(ha_thd()->query);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5092

5093 5094 5095
        if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
          trx->check_foreigns = FALSE;
        }
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5096

5097 5098 5099
        if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
          trx->check_unique_secondary = FALSE;
        }
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5100

5101
        name_len = strlen(name);
5102

5103
        assert(name_len < 1000);
5104

5105 5106
        /* Strangely, MySQL passes the table name without the '.frm'
           extension, in contrast to ::create */
5107

5108
        normalize_table_name(norm_name, name);
5109

5110
        /* Drop the table in InnoDB */
5111

5112 5113
        error = row_drop_table_for_mysql(norm_name, trx,
                                         thd_sql_command(thd) == SQLCOM_DROP_DB);
5114

5115 5116 5117
        /* Flush the log to reduce probability that the .frm files and
           the InnoDB data dictionary get out-of-sync if the user runs
           with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5118

5119
        log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5120

5121 5122
        /* Tell the InnoDB server that there might be work for
           utility threads: */
5123

5124
        srv_active_wake_master_thread();
5125

5126
        innobase_commit_low(trx);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5127

5128
        trx_free_for_mysql(trx);
5129

5130
        error = convert_error_code_to_mysql(error, NULL);
5131

5132
        DBUG_RETURN(error);
5133 5134
}

5135
/*********************************************************************
5136
  Removes all tables in the named database inside InnoDB. */
5137
static
5138
void
5139
innobase_drop_database(
5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157
                       /*===================*/
                       /* out: error number */
                       handlerton *hton, /* in: handlerton of Innodb */
                       char*	path)	/* in: database path; inside InnoDB the name
                                           of the last directory in the path is used as
                                           the database name: for example, in 'mysql/data/test'
                                           the database name is 'test' */
{
  ulint	len		= 0;
  trx_t*	parent_trx;
  trx_t*	trx;
  char*	ptr;
  int	error;
  char*	namebuf;

  /* Get the transaction associated with the current thd, or create one
     if not yet created */

serg@janus.mylan's avatar
serg@janus.mylan committed
5158
  parent_trx = check_trx_exists(hton, current_thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5159 5160 5161 5162

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

5163
	trx_search_latch_release_if_reserved(parent_trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5164

5165
	ptr = strend(path) - 2;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5166

5167 5168 5169 5170 5171 5172
	while (ptr >= path && *ptr != '\\' && *ptr != '/') {
		ptr--;
		len++;
	}

	ptr++;
5173
	namebuf = (char*) my_malloc((uint) len + 2, MYF(0));
5174 5175 5176 5177

	memcpy(namebuf, ptr, len);
	namebuf[len] = '/';
	namebuf[len + 1] = '\0';
5178
#ifdef	__WIN__
5179
	innobase_casedn_str(namebuf);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5180
#endif
5181
	trx = trx_allocate_for_mysql();
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5182 5183
	trx->mysql_thd = current_thd;
	trx->mysql_query_str = &((*current_thd).query);
5184

antony@ppcg5.local's avatar
antony@ppcg5.local committed
5185
	if (thd_test_options(current_thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5186 5187 5188
		trx->check_foreigns = FALSE;
	}

5189
	error = row_drop_database_for_mysql(namebuf, trx);
5190
	my_free(namebuf, MYF(0));
5191

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5192 5193 5194
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5195

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5196
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5197

5198 5199 5200 5201 5202
	/* Tell the InnoDB server that there might be work for
	utility threads: */

	srv_active_wake_master_thread();

5203 5204 5205
	innobase_commit_low(trx);
	trx_free_for_mysql(trx);
#ifdef NO_LONGER_INTERESTED_IN_DROP_DB_ERROR
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5206
	error = convert_error_code_to_mysql(error, NULL);
5207 5208

	return(error);
5209 5210 5211
#else
	return;
#endif
5212 5213
}

5214
/*************************************************************************
5215
Renames an InnoDB table. */
5216 5217 5218 5219 5220 5221 5222 5223 5224 5225 5226

int
ha_innobase::rename_table(
/*======================*/
				/* out: 0 or error code */
	const char*	from,	/* in: old name of the table */
	const char*	to)	/* in: new name of the table */
{
	ulint	name_len1;
	ulint	name_len2;
	int	error;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5227
	trx_t*	parent_trx;
5228
	trx_t*	trx;
5229 5230
	char	norm_from[1000];
	char	norm_to[1000];
5231

5232
	DBUG_ENTER("ha_innobase::rename_table");
5233

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5234 5235
	/* Get the transaction associated with the current thd, or create one
	if not yet created */
5236

5237
	parent_trx = check_trx_exists(ht, ha_thd());
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5238 5239 5240 5241

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

5242
	trx_search_latch_release_if_reserved(parent_trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5243

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5244 5245 5246 5247 5248 5249
	if (lower_case_table_names) {
		srv_lower_case_table_names = TRUE;
	} else {
		srv_lower_case_table_names = FALSE;
	}

5250
	trx = trx_allocate_for_mysql();
5251 5252
	trx->mysql_thd = ha_thd();
	trx->mysql_query_str = &((*ha_thd()).query);
5253

5254
	if (thd_test_options(ha_thd(), OPTION_NO_FOREIGN_KEY_CHECKS)) {
5255 5256 5257
		trx->check_foreigns = FALSE;
	}

5258 5259 5260 5261 5262
	name_len1 = strlen(from);
	name_len2 = strlen(to);

	assert(name_len1 < 1000);
	assert(name_len2 < 1000);
5263

5264 5265 5266
	normalize_table_name(norm_from, from);
	normalize_table_name(norm_to, to);

5267
	/* Rename the table in InnoDB */
5268

5269
	error = row_rename_table_for_mysql(norm_from, norm_to, trx);
5270

monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5271 5272 5273
	/* Flush the log to reduce probability that the .frm files and
	the InnoDB data dictionary get out-of-sync if the user runs
	with innodb_flush_log_at_trx_commit = 0 */
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5274

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5275
	log_buffer_flush_to_disk();
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5276

5277
	/* Tell the InnoDB server that there might be work for
5278 5279 5280 5281
	utility threads: */

	srv_active_wake_master_thread();

5282 5283
	innobase_commit_low(trx);
	trx_free_for_mysql(trx);
5284

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5285
	error = convert_error_code_to_mysql(error, NULL);
5286 5287 5288 5289 5290 5291 5292 5293 5294 5295

	DBUG_RETURN(error);
}

/*************************************************************************
Estimates the number of index records in a range. */

ha_rows
ha_innobase::records_in_range(
/*==========================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5296 5297
						/* out: estimated number of
						rows */
5298 5299 5300
	uint			keynr,		/* in: index number */
	key_range		*min_key,	/* in: start key value of the
						   range, may also be 0 */
5301
	key_range		*max_key)	/* in: range end key val, may
5302
						   also be 0 */
5303 5304 5305
{
	KEY*		key;
	dict_index_t*	index;
5306
	mysql_byte*	key_val_buff2	= (mysql_byte*) my_malloc(
5307
						  table->s->reclength
5308
					+ table->s->max_key_length + 100,
osku@127.(none)'s avatar
osku@127.(none) committed
5309
								MYF(MY_FAE));
5310
	ulint		buff2_len = table->s->reclength
5311
					+ table->s->max_key_length + 100;
5312
	dtuple_t*	range_start;
5313
	dtuple_t*	range_end;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5314
	ib_longlong	n_rows;
5315 5316
	ulint		mode1;
	ulint		mode2;
5317 5318
	void*		heap1;
	void*		heap2;
5319

5320
	DBUG_ENTER("records_in_range");
5321

5322
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
5323

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5324 5325
	prebuilt->trx->op_info = (char*)"estimating records in index range";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5326 5327 5328 5329
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5330

5331 5332 5333
	active_index = keynr;

	key = table->key_info + active_index;
5334

5335
	index = dict_table_get_index_noninline(prebuilt->table, key->name);
5336

5337
	range_start = dtuple_create_for_mysql(&heap1, key->key_parts);
5338
	dict_index_copy_types(range_start, index, key->key_parts);
5339

5340
	range_end = dtuple_create_for_mysql(&heap2, key->key_parts);
5341
	dict_index_copy_types(range_end, index, key->key_parts);
5342

5343
	row_sel_convert_mysql_key_to_innobase(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5344 5345 5346
				range_start, (byte*) key_val_buff,
				(ulint)upd_and_key_val_buff_len,
				index,
5347
				(byte*) (min_key ? min_key->key :
5348
					 (const mysql_byte*) 0),
5349 5350
				(ulint) (min_key ? min_key->length : 0),
				prebuilt->trx);
5351

5352
	row_sel_convert_mysql_key_to_innobase(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5353 5354
				range_end, (byte*) key_val_buff2,
				buff2_len, index,
5355
				(byte*) (max_key ? max_key->key :
5356
					 (const mysql_byte*) 0),
5357 5358
				(ulint) (max_key ? max_key->length : 0),
				prebuilt->trx);
5359 5360

	mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag :
5361
						HA_READ_KEY_EXACT);
5362
	mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag :
5363
						HA_READ_KEY_EXACT);
5364

5365
	n_rows = btr_estimate_n_rows_in_range(index, range_start,
5366
						mode1, range_end, mode2);
5367 5368
	dtuple_free_for_mysql(heap1);
	dtuple_free_for_mysql(heap2);
5369

5370
	my_free(key_val_buff2, MYF(0));
5371

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5372 5373
	prebuilt->trx->op_info = (char*)"";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5374 5375 5376 5377 5378 5379 5380
	/* The MySQL optimizer seems to believe an estimate of 0 rows is
	always accurate and may return the result 'Empty set' based on that.
	The accuracy is not guaranteed, and even if it were, for a locking
	read we should anyway perform the search to set the next-key lock.
	Add 1 to the value to make sure MySQL does not make the assumption! */

	if (n_rows == 0) {
5381
		n_rows = 1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5382 5383
	}

5384 5385 5386
	DBUG_RETURN((ha_rows) n_rows);
}

5387 5388
/*************************************************************************
Gives an UPPER BOUND to the number of rows in a table. This is used in
5389
filesort.cc. */
5390 5391

ha_rows
sergefp@mysql.com's avatar
sergefp@mysql.com committed
5392
ha_innobase::estimate_rows_upper_bound(void)
5393
/*======================================*/
5394
			/* out: upper bound of rows */
5395
{
5396 5397
	dict_index_t*	index;
	ulonglong	estimate;
5398
	ulonglong	local_data_file_length;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5399

5400
	DBUG_ENTER("estimate_rows_upper_bound");
5401

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5402 5403 5404 5405
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

5406
	update_thd(ha_thd());
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5407

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5408
	prebuilt->trx->op_info = (char*)
5409
				 "calculating upper bound for table rows";
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5410

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5411 5412 5413 5414
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
5415

5416
	index = dict_table_get_first_index_noninline(prebuilt->table);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5417

5418
	local_data_file_length = ((ulonglong) index->stat_n_leaf_pages)
5419
							* UNIV_PAGE_SIZE;
5420

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5421 5422
	/* Calculate a minimum length for a clustered index record and from
	that an upper bound for the number of rows. Since we only calculate
5423 5424
	new statistics in row0mysql.c when a table has grown by a threshold
	factor, we must add a safety factor 2 in front of the formula below. */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5425

5426 5427
	estimate = 2 * local_data_file_length /
					 dict_index_calc_min_rec_len(index);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5428

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5429 5430
	prebuilt->trx->op_info = (char*)"";

monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5431
	DBUG_RETURN((ha_rows) estimate);
5432 5433
}

5434 5435 5436 5437 5438 5439 5440 5441 5442 5443
/*************************************************************************
How many seeks it will take to read through the table. This is to be
comparable to the number returned by records_in_range so that we can
decide if we should scan the table or use keys. */

double
ha_innobase::scan_time()
/*====================*/
			/* out: estimated time measured in disk seeks */
{
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5444 5445 5446 5447
	/* Since MySQL seems to favor table scans too much over index
	searches, we pretend that a sequential read takes the same time
	as a random disk read, that is, we do not divide the following
	by 10, which would be physically realistic. */
5448

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5449
	return((double) (prebuilt->table->stat_clustered_index_size));
5450 5451
}

5452 5453 5454
/**********************************************************************
Calculate the time it takes to read a set of ranges through an index
This enables us to optimise reads for clustered indexes. */
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
5455

5456 5457 5458 5459
double
ha_innobase::read_time(
/*===================*/
			/* out: estimated time measured in disk seeks */
5460
	uint	index,	/* in: key number */
5461 5462
	uint	ranges,	/* in: how many ranges */
	ha_rows rows)	/* in: estimated number of rows in the ranges */
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
5463
{
5464
	ha_rows total_rows;
5465 5466
	double	time_for_scan;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5467
	if (index != table->s->primary_key) {
5468 5469
		/* Not clustered */
		return(handler::read_time(index, ranges, rows));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5470
	}
5471

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5472
	if (rows <= 2) {
5473

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5474 5475
		return((double) rows);
	}
5476 5477 5478 5479

	/* Assume that the read time is proportional to the scan time for all
	rows + at most one seek per range. */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5480
	time_for_scan = scan_time();
5481

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5482
	if ((total_rows = estimate_rows_upper_bound()) < rows) {
5483

5484
		return(time_for_scan);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5485
	}
5486

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5487
	return(ranges + (double) rows / (double) total_rows * time_for_scan);
monty@narttu.mysql.fi's avatar
monty@narttu.mysql.fi committed
5488 5489
}

5490 5491 5492 5493
/*************************************************************************
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object. */

5494
int
5495 5496 5497 5498 5499 5500
ha_innobase::info(
/*==============*/
	uint flag)	/* in: what information MySQL requests */
{
	dict_table_t*	ib_table;
	dict_index_t*	index;
5501
	ha_rows		rec_per_key;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5502
	ib_longlong	n_rows;
5503 5504
	ulong		j;
	ulong		i;
5505
	char		path[FN_REFLEN];
5506
	os_file_stat_t	stat_info;
5507

5508
	DBUG_ENTER("info");
5509

5510
	/* If we are forcing recovery at a high level, we will suppress
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5511 5512 5513
	statistics calculation on tables, because that may crash the
	server if an index is badly corrupted. */

5514
	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5515

5516
                DBUG_RETURN(HA_ERR_CRASHED);
5517
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5518

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5519 5520 5521 5522
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

5523
	update_thd(ha_thd());
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5524 5525 5526 5527

	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5528 5529
	prebuilt->trx->op_info = (char*)"returning various info to MySQL";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5530
	trx_search_latch_release_if_reserved(prebuilt->trx);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5531

5532
	ib_table = prebuilt->table;
5533

5534
	if (flag & HA_STATUS_TIME) {
5535 5536 5537
		if (srv_stats_on_metadata) {
			/* In sql_show we call with this flag: update then statistics
			so that they are up-to-date */
5538

5539
			prebuilt->trx->op_info = (char*)"updating table statistics";
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5540

5541
			dict_update_statistics(ib_table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5542

5543 5544 5545
			prebuilt->trx->op_info = (char*)
						  "returning various info to MySQL";
		}
5546

5547
		my_snprintf(path, sizeof(path), "%s/%s%s",
5548 5549
				mysql_data_home, ib_table->name, reg_ext);

5550
		unpack_filename(path,path);
5551

5552
		/* Note that we do not know the access time of the table,
5553 5554
		nor the CHECK TABLE time, nor the UPDATE or INSERT time. */

5555
		if (os_file_get_status(path,&stat_info)) {
5556
			stats.create_time = stat_info.ctime;
5557
		}
5558
	}
5559 5560

	if (flag & HA_STATUS_VARIABLE) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5561 5562 5563 5564 5565 5566 5567 5568 5569 5570
		n_rows = ib_table->stat_n_rows;

		/* Because we do not protect stat_n_rows by any mutex in a
		delete, it is theoretically possible that the value can be
		smaller than zero! TODO: fix this race.

		The MySQL optimizer seems to assume in a left join that n_rows
		is an accurate estimate if it is zero. Of course, it is not,
		since we do not have any locks on the rows yet at this phase.
		Since SHOW TABLE STATUS seems to call this function with the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5571
		HA_STATUS_TIME flag set, while the left join optimizer does not
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5572 5573 5574 5575 5576 5577 5578 5579 5580 5581 5582 5583
		set that flag, we add one to a zero value if the flag is not
		set. That way SHOW TABLE STATUS will show the best estimate,
		while the optimizer never sees the table empty. */

		if (n_rows < 0) {
			n_rows = 0;
		}

		if (n_rows == 0 && !(flag & HA_STATUS_TIME)) {
			n_rows++;
		}

5584 5585 5586
		stats.records = (ha_rows)n_rows;
		stats.deleted = 0;
		stats.data_file_length = ((ulonglong)
5587
				ib_table->stat_clustered_index_size)
5588
					* UNIV_PAGE_SIZE;
5589
		stats.index_file_length = ((ulonglong)
5590
				ib_table->stat_sum_of_other_index_sizes)
5591
					* UNIV_PAGE_SIZE;
5592 5593
		stats.delete_length = 0;
		stats.check_time = 0;
5594

5595 5596
		if (stats.records == 0) {
			stats.mean_rec_length = 0;
5597
		} else {
5598
			stats.mean_rec_length = (ulong) (stats.data_file_length / stats.records);
5599 5600
		}
	}
5601 5602 5603 5604 5605 5606 5607

	if (flag & HA_STATUS_CONST) {
		index = dict_table_get_first_index_noninline(ib_table);

		if (prebuilt->clust_index_was_generated) {
			index = dict_table_get_next_index_noninline(index);
		}
5608

5609
		for (i = 0; i < table->s->keys; i++) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5610 5611
			if (index == NULL) {
				ut_print_timestamp(stderr);
5612
				sql_print_error("Table %s contains fewer "
5613 5614 5615 5616
						"indexes inside InnoDB than "
						"are defined in the MySQL "
						".frm file. Have you mixed up "
						".frm files from different "
5617
						"installations? See "
5618
"http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n",
5619

5620
						ib_table->name);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5621 5622 5623
				break;
			}

5624 5625
			for (j = 0; j < table->key_info[i].key_parts; j++) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5626
				if (j + 1 > index->n_uniq) {
5627
					ut_print_timestamp(stderr);
5628 5629 5630 5631
					sql_print_error(
"Index %s of %s has %lu columns unique inside InnoDB, but MySQL is asking "
"statistics for %lu columns. Have you mixed up .frm files from different "
"installations? "
5632
"See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n",
5633 5634 5635 5636
							index->name,
							ib_table->name,
							(unsigned long)
							index->n_uniq, j + 1);
5637
					break;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5638 5639
				}

5640 5641
				if (index->stat_n_diff_key_vals[j + 1] == 0) {

5642
					rec_per_key = stats.records;
5643
				} else {
5644
					rec_per_key = (ha_rows)(stats.records /
5645
					 index->stat_n_diff_key_vals[j + 1]);
5646 5647
				}

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5648 5649 5650 5651 5652 5653 5654
				/* Since MySQL seems to favor table scans
				too much over index searches, we pretend
				index selectivity is 2 times better than
				our estimate: */

				rec_per_key = rec_per_key / 2;

5655 5656 5657
				if (rec_per_key == 0) {
					rec_per_key = 1;
				}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5658

5659
				table->key_info[i].rec_per_key[j]=
5660 5661
				  rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
				  rec_per_key;
5662
			}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5663

5664
			index = dict_table_get_next_index_noninline(index);
5665 5666
		}
	}
5667

5668
	if (flag & HA_STATUS_ERRKEY) {
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5669 5670
		ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);

5671
		errkey = (unsigned int) row_get_mysql_key_number_for_index(
5672 5673
			(dict_index_t*) trx_get_error_info(prebuilt->trx));
	}
5674

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5675 5676 5677 5678 5679 5680
	if (flag & HA_STATUS_AUTO && table->found_next_number_field) {
		longlong	auto_inc;
		int		ret;

		/* The following function call can the first time fail in
		a lock wait timeout error because it reserves the auto-inc
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5681
		lock on the table. If it fails, then someone is already initing
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5682 5683 5684
		the auto-inc counter, and the second call is guaranteed to
		succeed. */

5685
		ret = innobase_read_and_init_auto_inc(&auto_inc);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5686 5687 5688 5689 5690 5691 5692 5693 5694 5695 5696 5697

		if (ret != 0) {
			ret = innobase_read_and_init_auto_inc(&auto_inc);

			if (ret != 0) {
				ut_print_timestamp(stderr);
				sql_print_error("Cannot get table %s auto-inc"
						"counter value in ::info\n",
						ib_table->name);
				auto_inc = 0;
			}
		}
5698

5699
		stats.auto_increment_value = auto_inc;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5700 5701
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5702 5703
	prebuilt->trx->op_info = (char*)"";

5704
  	DBUG_RETURN(0);
5705 5706
}

5707
/**************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5708 5709
Updates index cardinalities of the table, based on 8 random dives into
each index tree. This does NOT calculate exact statistics on the table. */
5710 5711 5712

int
ha_innobase::analyze(
5713
/*=================*/
5714 5715 5716 5717 5718 5719 5720 5721 5722 5723
					/* out: returns always 0 (success) */
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
{
	/* Simply call ::info() with all the flags */
	info(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE);

	return(0);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5724
/**************************************************************************
5725
This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds
5726
the table in MySQL. */
5727

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5728 5729 5730 5731 5732
int
ha_innobase::optimize(
/*==================*/
	THD*		thd,		/* in: connection thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: currently ignored */
5733
{
5734
	return(HA_ADMIN_TRY_ALTER);
5735 5736
}

5737 5738 5739 5740 5741 5742 5743 5744 5745 5746
/***********************************************************************
Tries to check that an InnoDB table is not corrupted. If corruption is
noticed, prints to stderr information about it. In case of corruption
may also assert a failure and crash the server. */

int
ha_innobase::check(
/*===============*/
					/* out: HA_ADMIN_CORRUPT or
					HA_ADMIN_OK */
5747 5748
	THD*		thd,		/* in: user thread handle */
	HA_CHECK_OPT*	check_opt)	/* in: check options, currently
5749 5750 5751
					ignored */
{
	ulint		ret;
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5752

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5753
	ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
5754
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
5755

5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767
	if (prebuilt->mysql_template == NULL) {
		/* Build the template; we will use a dummy template
		in index scans done in checking */

		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
	}

	ret = row_check_table_for_mysql(prebuilt);

	if (ret == DB_SUCCESS) {
		return(HA_ADMIN_OK);
	}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5768

5769
	return(HA_ADMIN_CORRUPT);
5770 5771
}

5772
/*****************************************************************
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5773 5774 5775
Adds information about free space in the InnoDB tablespace to a table comment
which is printed out when a user calls SHOW TABLE STATUS. Adds also info on
foreign keys. */
5776 5777 5778 5779

char*
ha_innobase::update_table_comment(
/*==============================*/
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5780 5781
				/* out: table comment + InnoDB free space +
				info on foreign keys */
5782
	const char*	comment)/* in: table comment defined by user */
5783
{
5784 5785
	uint	length = (uint) strlen(comment);
	char*	str;
5786
	long	flen;
5787

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5788 5789 5790 5791
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

monty@mishka.local's avatar
monty@mishka.local committed
5792
	if (length > 64000 - 3) {
5793 5794 5795
		return((char*)comment); /* string too long */
	}

5796
	update_thd(ha_thd());
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5797

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5798 5799
	prebuilt->trx->op_info = (char*)"returning table comment";

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5800 5801 5802 5803
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);
5804
	str = NULL;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5805

5806
	/* output the data to a temporary file */
monty@donna.mysql.fi's avatar
Merge  
monty@donna.mysql.fi committed
5807

5808 5809
	mutex_enter_noninline(&srv_dict_tmpfile_mutex);
	rewind(srv_dict_tmpfile);
5810

5811 5812 5813 5814 5815
	fprintf(srv_dict_tmpfile, "InnoDB free: %lu kB",
		   (ulong) fsp_get_available_space_in_free_extents(
					prebuilt->table->space));

	dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile,
5816
				prebuilt->trx, prebuilt->table);
5817 5818 5819 5820 5821 5822
	flen = ftell(srv_dict_tmpfile);
	if (flen < 0) {
		flen = 0;
	} else if (length + flen + 3 > 64000) {
		flen = 64000 - 3 - length;
	}
5823

5824 5825
	/* allocate buffer for the full string, and
	read the contents of the temporary file */
5826

5827
	str = (char*) my_malloc(length + flen + 3, MYF(0));
5828

5829 5830 5831 5832 5833 5834
	if (str) {
		char* pos	= str + length;
		if (length) {
			memcpy(str, comment, length);
			*pos++ = ';';
			*pos++ = ' ';
5835
		}
5836 5837 5838
		rewind(srv_dict_tmpfile);
		flen = (uint) fread(pos, 1, flen, srv_dict_tmpfile);
		pos[flen] = 0;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5839
	}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5840

5841 5842 5843
	mutex_exit_noninline(&srv_dict_tmpfile_mutex);

	prebuilt->trx->op_info = (char*)"";
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5844

5845
	return(str ? str : (char*) comment);
5846 5847
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5848 5849 5850 5851 5852 5853 5854 5855 5856 5857
/***********************************************************************
Gets the foreign key create info for a table stored in InnoDB. */

char*
ha_innobase::get_foreign_key_create_info(void)
/*==========================================*/
			/* out, own: character string in the form which
			can be inserted to the CREATE TABLE statement,
			MUST be freed with ::free_foreign_key_create_info */
{
5858
	char*	str	= 0;
5859
	long	flen;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5860

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5861
	ut_a(prebuilt != NULL);
5862

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5863 5864 5865 5866
	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

5867
	update_thd(ha_thd());
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5868

5869
	prebuilt->trx->op_info = (char*)"getting info on foreign keys";
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5870

5871 5872 5873
	/* In case MySQL calls this in the middle of a SELECT query,
	release possible adaptive hash latch to avoid
	deadlocks of threads */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5874

5875
	trx_search_latch_release_if_reserved(prebuilt->trx);
5876

5877 5878
	mutex_enter_noninline(&srv_dict_tmpfile_mutex);
	rewind(srv_dict_tmpfile);
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
5879

5880 5881
	/* output the data to a temporary file */
	dict_print_info_on_foreign_keys(TRUE, srv_dict_tmpfile,
5882
				prebuilt->trx, prebuilt->table);
5883
	prebuilt->trx->op_info = (char*)"";
5884

5885 5886 5887 5888 5889 5890
	flen = ftell(srv_dict_tmpfile);
	if (flen < 0) {
		flen = 0;
	} else if (flen > 64000 - 1) {
		flen = 64000 - 1;
	}
5891

5892 5893
	/* allocate buffer for the string, and
	read the contents of the temporary file */
5894

5895
	str = (char*) my_malloc(flen + 1, MYF(0));
5896

5897 5898 5899 5900
	if (str) {
		rewind(srv_dict_tmpfile);
		flen = (uint) fread(str, 1, flen, srv_dict_tmpfile);
		str[flen] = 0;
5901
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5902

5903 5904 5905
	mutex_exit_noninline(&srv_dict_tmpfile_mutex);

	return(str);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5906
}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
5907

5908

5909
int
5910 5911 5912 5913 5914 5915
ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list)
{
  dict_foreign_t* foreign;

  DBUG_ENTER("get_foreign_key_list");
  ut_a(prebuilt != NULL);
5916
  update_thd(ha_thd());
5917 5918
  prebuilt->trx->op_info = (char*)"getting list of foreign keys";
  trx_search_latch_release_if_reserved(prebuilt->trx);
5919
  mutex_enter_noninline(&(dict_sys->mutex));
5920 5921
  foreign = UT_LIST_GET_FIRST(prebuilt->table->foreign_list);

5922 5923 5924 5925
  while (foreign != NULL) {
	  uint i;
	  FOREIGN_KEY_INFO f_key_info;
	  LEX_STRING *name= 0;
5926
          uint ulen;
5927 5928
          char uname[NAME_LEN+1];           /* Unencoded name */
          char db_name[NAME_LEN+1];
5929 5930 5931 5932 5933 5934 5935 5936 5937 5938
	  const char *tmp_buff;

	  tmp_buff= foreign->id;
	  i= 0;
	  while (tmp_buff[i] != '/')
		  i++;
	  tmp_buff+= i + 1;
	  f_key_info.forein_id= make_lex_string(thd, 0, tmp_buff,
		  (uint) strlen(tmp_buff), 1);
	  tmp_buff= foreign->referenced_table_name;
5939 5940

          /* Database name */
5941 5942
	  i= 0;
	  while (tmp_buff[i] != '/')
5943 5944 5945 5946 5947 5948 5949 5950 5951
          {
            db_name[i]= tmp_buff[i];
            i++;
          }
          db_name[i]= 0;
          ulen= filename_to_tablename(db_name, uname, sizeof(uname));
          f_key_info.referenced_db= make_lex_string(thd, 0, uname, ulen, 1);

          /* Table name */
5952
	  tmp_buff+= i + 1;
5953 5954 5955
          ulen= filename_to_tablename(tmp_buff, uname, sizeof(uname));
          f_key_info.referenced_table= make_lex_string(thd, 0, uname,
                                                       ulen, 1);
5956 5957 5958 5959 5960 5961 5962 5963 5964 5965 5966 5967 5968 5969

	  for (i= 0;;) {
		  tmp_buff= foreign->foreign_col_names[i];
		  name= make_lex_string(thd, name, tmp_buff,
			  (uint) strlen(tmp_buff), 1);
		  f_key_info.foreign_fields.push_back(name);
		  tmp_buff= foreign->referenced_col_names[i];
		  name= make_lex_string(thd, name, tmp_buff,
			  (uint) strlen(tmp_buff), 1);
		  f_key_info.referenced_fields.push_back(name);
		  if (++i >= foreign->n_fields)
			  break;
	  }

5970 5971 5972 5973 5974 5975 5976 5977 5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989 5990 5991 5992 5993 5994 5995 5996 5997 5998 5999 6000 6001 6002 6003 6004 6005 6006 6007 6008 6009 6010 6011 6012 6013 6014 6015 6016
          ulong length;
          if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE)
          {
            length=7;
            tmp_buff= "CASCADE";
          }	
          else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)
          {
            length=8;
            tmp_buff= "SET NULL";
          }
          else if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION)
          {
            length=9;
            tmp_buff= "NO ACTION";
          }
          else
          {
            length=8;
            tmp_buff= "RESTRICT";
          }
          f_key_info.delete_method= make_lex_string(thd, f_key_info.delete_method,
                                                    tmp_buff, length, 1);
 
 
          if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)
          {
            length=7;
            tmp_buff= "CASCADE";
          }
          else if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL)
          {
            length=8;
            tmp_buff= "SET NULL";
          }
          else if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION)
          {
            length=9;
            tmp_buff= "NO ACTION";
          }
          else
          {
            length=8;
            tmp_buff= "RESTRICT";
          }
          f_key_info.update_method= make_lex_string(thd, f_key_info.update_method,
                                                    tmp_buff, length, 1);
6017 6018 6019 6020 6021 6022 6023 6024
          if (foreign->referenced_index &&
              foreign->referenced_index->name)
          {
            f_key_info.referenced_key_name= 
              make_lex_string(thd, f_key_info.referenced_key_name,
                              foreign->referenced_index->name,
                              strlen(foreign->referenced_index->name), 1);
          }
6025 6026

	  FOREIGN_KEY_INFO *pf_key_info= ((FOREIGN_KEY_INFO *)
6027
		  thd->memdup(&f_key_info,
6028 6029 6030
			  sizeof(FOREIGN_KEY_INFO)));
	  f_key_list->push_back(pf_key_info);
	  foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
6031
  }
6032
  mutex_exit_noninline(&(dict_sys->mutex));
6033
  prebuilt->trx->op_info = (char*)"";
6034

6035 6036 6037
  DBUG_RETURN(0);
}

6038 6039 6040 6041 6042 6043 6044 6045 6046 6047 6048
/*********************************************************************
Checks if ALTER TABLE may change the storage engine of the table.
Changing storage engines is not allowed for tables for which there
are foreign key constraints (parent or child tables). */

bool
ha_innobase::can_switch_engines(void)
/*=================================*/
{
	bool	can_switch;

6049
	DBUG_ENTER("ha_innobase::can_switch_engines");
6050

6051
	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
6052

6053 6054 6055 6056 6057 6058 6059 6060 6061 6062 6063 6064 6065
	prebuilt->trx->op_info =
			"determining if there are foreign key constraints";
	row_mysql_lock_data_dictionary(prebuilt->trx);

	can_switch = !UT_LIST_GET_FIRST(prebuilt->table->referenced_list)
			&& !UT_LIST_GET_FIRST(prebuilt->table->foreign_list);

	row_mysql_unlock_data_dictionary(prebuilt->trx);
	prebuilt->trx->op_info = "";

	DBUG_RETURN(can_switch);
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6066 6067 6068 6069 6070 6071 6072 6073 6074 6075 6076 6077 6078 6079 6080 6081 6082 6083
/***********************************************************************
Checks if a table is referenced by a foreign key. The MySQL manual states that
a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a
delete is then allowed internally to resolve a duplicate key conflict in
REPLACE, not an update. */

uint
ha_innobase::referenced_by_foreign_key(void)
/*========================================*/
			/* out: > 0 if referenced by a FOREIGN KEY */
{
	if (dict_table_referenced_by_foreign_key(prebuilt->table)) {

		return(1);
	}

	return(0);
}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6084 6085 6086 6087 6088 6089 6090 6091

/***********************************************************************
Frees the foreign key create info for a table stored in InnoDB, if it is
non-NULL. */

void
ha_innobase::free_foreign_key_create_info(
/*======================================*/
6092
	char*	str)	/* in, own: create info string to free	*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6093 6094
{
	if (str) {
6095
		my_free(str, MYF(0));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6096
	}
6097 6098
}

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6099 6100 6101 6102 6103 6104 6105 6106
/***********************************************************************
Tells something additional to the handler about how to do things. */

int
ha_innobase::extra(
/*===============*/
			   /* out: 0 or error number */
	enum ha_extra_function operation)
6107
			   /* in: HA_EXTRA_FLUSH or some other flag */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6108 6109 6110 6111 6112 6113
{
	/* Warning: since it is not sure that MySQL calls external_lock
	before calling this function, the trx field in prebuilt can be
	obsolete! */

	switch (operation) {
6114 6115 6116 6117 6118 6119 6120 6121 6122
		case HA_EXTRA_FLUSH:
			if (prebuilt->blob_heap) {
				row_mysql_prebuilt_free_blob_heap(prebuilt);
			}
			break;
		case HA_EXTRA_RESET_STATE:
			prebuilt->keep_other_fields_on_keyread = 0;
			prebuilt->read_just_key = 0;
			break;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6123
		case HA_EXTRA_NO_KEYREAD:
6124 6125 6126 6127 6128
			prebuilt->read_just_key = 0;
			break;
		case HA_EXTRA_KEYREAD:
			prebuilt->read_just_key = 1;
			break;
6129 6130 6131
		case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
			prebuilt->keep_other_fields_on_keyread = 1;
			break;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6132 6133 6134 6135 6136 6137 6138 6139 6140 6141 6142 6143 6144
		case HA_EXTRA_IGNORE_DUP_KEY:
			prebuilt->trx->allow_duplicates= TRUE;
			break;
		case HA_EXTRA_WRITE_CAN_REPLACE:
			prebuilt->trx->replace_duplicates= TRUE;
			break;
		case HA_EXTRA_WRITE_CANNOT_REPLACE:
			prebuilt->trx->replace_duplicates= FALSE;
			break;
		case HA_EXTRA_NO_IGNORE_DUP_KEY:
			prebuilt->trx->allow_duplicates= FALSE;
			prebuilt->trx->replace_duplicates= FALSE;
			break;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6145 6146 6147 6148 6149 6150 6151
		default:/* Do nothing */
			;
	}

	return(0);
}

6152 6153 6154 6155 6156 6157 6158 6159 6160 6161 6162
int ha_innobase::reset()
{
  if (prebuilt->blob_heap) {
    row_mysql_prebuilt_free_blob_heap(prebuilt);
  }
  prebuilt->keep_other_fields_on_keyread = 0;
  prebuilt->read_just_key = 0;
  return 0;
}


6163
/**********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6164 6165 6166 6167
MySQL calls this function at the start of each SQL statement inside LOCK
TABLES. Inside LOCK TABLES the ::external_lock method does not work to
mark SQL statement borders. Note also a special case: if a temporary table
is created inside LOCK TABLES, MySQL has not called external_lock() at all
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6168 6169 6170 6171
on that table.
MySQL-5.0 also calls this before each statement in an execution of a stored
procedure. To make the execution more deterministic for binlogging, MySQL-5.0
locks all tables involved in a stored procedure with full explicit table
6172
locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6173
procedure. */
6174 6175

int
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6176 6177
ha_innobase::start_stmt(
/*====================*/
6178 6179 6180
				/* out: 0 or error code */
	THD*		thd,	/* in: handle to the user thread */
	thr_lock_type	lock_type)
6181 6182 6183 6184 6185 6186 6187
{
	trx_t*		trx;

	update_thd(thd);

	trx = prebuilt->trx;

6188 6189 6190 6191 6192 6193 6194
	/* Here we release the search latch and the InnoDB thread FIFO ticket
	if they were reserved. They should have been released already at the
	end of the previous statement, but because inside LOCK TABLES the
	lock count method does not work to mark the end of a SELECT statement,
	that may not be the case. We MUST release the search latch before an
	INSERT, for example. */

6195 6196 6197
	innobase_release_stat_resources(trx);

	prebuilt->sql_stat_start = TRUE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6198
	prebuilt->hint_need_to_fetch_extra_cols = 0;
6199
	prebuilt->read_just_key = 0;
6200
	prebuilt->keep_other_fields_on_keyread = FALSE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6201

6202
	if (!prebuilt->mysql_has_locked) {
6203 6204 6205 6206 6207 6208
		/* This handle is for a temporary table created inside
		this same LOCK TABLES; since MySQL does NOT call external_lock
		in this case, we must use x-row locks inside InnoDB to be
		prepared for an update of a row */

		prebuilt->select_lock_type = LOCK_X;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6209 6210
	} else {
		if (trx->isolation_level != TRX_ISO_SERIALIZABLE
6211
			&& thd_sql_command(thd) == SQLCOM_SELECT
6212 6213
			&& lock_type == TL_READ) {

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6214 6215 6216 6217 6218 6219 6220 6221 6222
			/* For other than temporary tables, we obtain
			no lock for consistent read (plain SELECT). */

			prebuilt->select_lock_type = LOCK_NONE;
		} else {
			/* Not a consistent read: restore the
			select_lock_type value. The value of
			stored_select_lock_type was decided in:
			1) ::store_lock(),
6223 6224
			2) ::external_lock(),
			3) ::init_table_handle_for_HANDLER(), and
6225
			4) ::transactional_table_lock(). */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6226 6227 6228 6229 6230 6231

			prebuilt->select_lock_type =
				prebuilt->stored_select_lock_type;
		}
	}

6232 6233
	trx->detailed_error[0] = '\0';

6234
	/* Set the MySQL flag to mark that there is an active transaction */
6235
	if (trx->active_trans == 0) {
6236

6237
		innobase_register_trx_and_stmt(ht, thd);
6238 6239
		trx->active_trans = 1;
	} else {
6240
		innobase_register_stmt(ht, thd);
6241
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6242 6243

	return(0);
6244 6245
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6246 6247 6248 6249 6250 6251 6252 6253 6254 6255 6256
/**********************************************************************
Maps a MySQL trx isolation level code to the InnoDB isolation level code */
inline
ulint
innobase_map_isolation_level(
/*=========================*/
					/* out: InnoDB isolation level */
	enum_tx_isolation	iso)	/* in: MySQL isolation level code */
{
	switch(iso) {
		case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6257
		case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6258 6259 6260
		case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE);
		case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED);
		default: ut_a(0); return(0);
6261
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6262
}
6263

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6264 6265
/**********************************************************************
As MySQL will execute an external lock for every new table it uses when it
6266 6267 6268
starts to process an SQL statement (an exception is when MySQL calls
start_stmt for the handle) we can use this function to store the pointer to
the THD in the handle. We will also use this function to communicate
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6269 6270 6271 6272 6273 6274 6275
to InnoDB that a new SQL statement has started and that we must store a
savepoint to our transaction handle, so that we are able to roll back
the SQL statement in case of an error. */

int
ha_innobase::external_lock(
/*=======================*/
6276
				/* out: 0 */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6277
	THD*	thd,		/* in: handle to the user thread */
6278
	int	lock_type)	/* in: lock type */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6279 6280 6281
{
	trx_t*		trx;

6282
	DBUG_ENTER("ha_innobase::external_lock");
6283
	DBUG_PRINT("enter",("lock_type: %d", lock_type));
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6284 6285 6286 6287 6288 6289

	update_thd(thd);

	trx = prebuilt->trx;

	prebuilt->sql_stat_start = TRUE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6290
	prebuilt->hint_need_to_fetch_extra_cols = 0;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6291 6292

	prebuilt->read_just_key = 0;
6293
	prebuilt->keep_other_fields_on_keyread = FALSE;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6294 6295 6296 6297 6298 6299

	if (lock_type == F_WRLCK) {

		/* If this is a SELECT, then it is in UPDATE TABLE ...
		or SELECT ... FOR UPDATE */
		prebuilt->select_lock_type = LOCK_X;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6300
		prebuilt->stored_select_lock_type = LOCK_X;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6301 6302 6303
	}

	if (lock_type != F_UNLCK) {
6304
		/* MySQL is setting a new table lock */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6305

6306
		trx->detailed_error[0] = '\0';
6307

6308 6309
		/* Set the MySQL flag to mark that there is an active
		transaction */
6310
		if (trx->active_trans == 0) {
6311

6312
			innobase_register_trx_and_stmt(ht, thd);
6313 6314
			trx->active_trans = 1;
		} else if (trx->n_mysql_tables_in_use == 0) {
6315
			innobase_register_stmt(ht, thd);
6316
		}
6317

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6318
		trx->n_mysql_tables_in_use++;
6319
		prebuilt->mysql_has_locked = TRUE;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6320

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6321
		if (trx->isolation_level == TRX_ISO_SERIALIZABLE
6322
			&& prebuilt->select_lock_type == LOCK_NONE
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6323 6324
			&& thd_test_options(thd,
				OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6325

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6326 6327
			/* To get serializable execution, we let InnoDB
			conceptually add 'LOCK IN SHARE MODE' to all SELECTs
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6328 6329 6330 6331 6332
			which otherwise would have been consistent reads. An
			exception is consistent reads in the AUTOCOMMIT=1 mode:
			we know that they are read-only transactions, and they
			can be serialized also if performed as consistent
			reads. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6333 6334

			prebuilt->select_lock_type = LOCK_S;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6335
			prebuilt->stored_select_lock_type = LOCK_S;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6336 6337
		}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6338 6339 6340 6341
		/* Starting from 4.1.9, no InnoDB table lock is taken in LOCK
		TABLES if AUTOCOMMIT=1. It does not make much sense to acquire
		an InnoDB table lock if it is released immediately at the end
		of LOCK TABLES, and InnoDB's table locks in that case cause
6342 6343 6344
		VERY easily deadlocks.

		We do not set InnoDB table locks if user has not explicitly
6345 6346
		requested a table lock. Note that thd_in_lock_tables(thd)
		can hold in some cases, e.g., at the start of a stored
6347
		procedure call (SQLCOM_CALL). */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6348

heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6349
		if (prebuilt->select_lock_type != LOCK_NONE) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6350

antony@ppcg5.local's avatar
antony@ppcg5.local committed
6351
			if (thd_in_lock_tables(thd) &&
6352
				thd_sql_command(thd) == SQLCOM_LOCK_TABLES &&
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6353 6354
				THDVAR(thd, table_locks) &&
				thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6355

6356 6357
				ulint	error = row_lock_table_for_mysql(
					prebuilt, NULL, 0);
6358 6359 6360

				if (error != DB_SUCCESS) {
					error = convert_error_code_to_mysql(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6361 6362
						(int) error, user_thd);
					DBUG_RETURN((int) error);
6363 6364
				}
			}
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6365

6366
			trx->mysql_n_tables_locked++;
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6367 6368
		}

6369
		DBUG_RETURN(0);
6370
	}
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6371

6372
	/* MySQL is releasing a table lock */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6373

6374 6375
	trx->n_mysql_tables_in_use--;
	prebuilt->mysql_has_locked = FALSE;
6376

6377 6378
	/* If the MySQL lock count drops to zero we know that the current SQL
	statement has ended */
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6379

6380
	if (trx->n_mysql_tables_in_use == 0) {
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6381

6382
		trx->mysql_n_tables_locked = 0;
6383
		prebuilt->used_in_HANDLER = FALSE;
6384

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6385 6386 6387 6388
		/* Release a possible FIFO ticket and search latch. Since we
		may reserve the kernel mutex, we have to release the search
		system latch first to obey the latching order. */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6389
		innobase_release_stat_resources(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6390

antony@ppcg5.local's avatar
antony@ppcg5.local committed
6391
		if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
6392
			if (trx->active_trans != 0) {
6393
				innobase_commit(ht, thd, TRUE);
6394 6395
			}
		} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6396
			if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
6397
						&& trx->global_read_view) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6398

6399
				/* At low transaction isolation levels we let
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6400 6401
				each consistent read set its own snapshot */

6402
				read_view_close_for_mysql(trx);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6403
			}
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6404 6405 6406
		}
	}

6407
	DBUG_RETURN(0);
heikki@donna.mysql.fi's avatar
heikki@donna.mysql.fi committed
6408 6409
}

6410 6411 6412 6413 6414 6415 6416
/**********************************************************************
With this function MySQL request a transactional lock to a table when
user issued query LOCK TABLES..WHERE ENGINE = InnoDB. */

int
ha_innobase::transactional_table_lock(
/*==================================*/
6417
				/* out: error code */
6418
	THD*	thd,		/* in: handle to the user thread */
6419
	int	lock_type)	/* in: lock type */
6420 6421 6422
{
	trx_t*		trx;

6423
	DBUG_ENTER("ha_innobase::transactional_table_lock");
6424 6425 6426 6427 6428 6429 6430 6431
	DBUG_PRINT("enter",("lock_type: %d", lock_type));

	/* We do not know if MySQL can call this function before calling
	external_lock(). To be safe, update the thd of the current table
	handle. */

	update_thd(thd);

6432
	if (prebuilt->table->ibd_file_missing
6433
	    && !thd_tablespace_op(ha_thd())) {
6434 6435
		ut_print_timestamp(stderr);
		fprintf(stderr, "  InnoDB error:\n"
6436 6437 6438
"MySQL is trying to use a table handle but the .ibd file for\n"
"table %s does not exist.\n"
"Have you deleted the .ibd file from the database directory under\n"
jan@hundin.mysql.fi's avatar
jan@hundin.mysql.fi committed
6439
"the MySQL datadir?"
6440
"See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n"
6441 6442 6443 6444 6445 6446 6447 6448 6449 6450 6451 6452 6453 6454 6455 6456 6457
"how you can resolve the problem.\n",
				prebuilt->table->name);
		DBUG_RETURN(HA_ERR_CRASHED);
	}

	trx = prebuilt->trx;

	prebuilt->sql_stat_start = TRUE;
	prebuilt->hint_need_to_fetch_extra_cols = 0;

	prebuilt->read_just_key = 0;
	prebuilt->keep_other_fields_on_keyread = FALSE;

	if (lock_type == F_WRLCK) {
		prebuilt->select_lock_type = LOCK_X;
		prebuilt->stored_select_lock_type = LOCK_X;
	} else if (lock_type == F_RDLCK) {
jan@hundin.mysql.fi's avatar
jan@hundin.mysql.fi committed
6458 6459
		prebuilt->select_lock_type = LOCK_S;
		prebuilt->stored_select_lock_type = LOCK_S;
6460
	} else {
6461 6462
		ut_print_timestamp(stderr);
		fprintf(stderr, "  InnoDB error:\n"
6463 6464 6465 6466 6467 6468 6469 6470 6471
"MySQL is trying to set transactional table lock with corrupted lock type\n"
"to table %s, lock type %d does not exist.\n",
				prebuilt->table->name, lock_type);
		DBUG_RETURN(HA_ERR_CRASHED);
	}

	/* MySQL is setting a new transactional table lock */

	/* Set the MySQL flag to mark that there is an active transaction */
6472
	if (trx->active_trans == 0) {
serg@serg.mylan's avatar
serg@serg.mylan committed
6473

6474
		innobase_register_trx_and_stmt(ht, thd);
6475 6476
		trx->active_trans = 1;
	}
6477

antony@ppcg5.local's avatar
antony@ppcg5.local committed
6478
	if (thd_in_lock_tables(thd) && THDVAR(thd, table_locks)) {
6479 6480
		ulint	error = DB_SUCCESS;

6481
		error = row_lock_table_for_mysql(prebuilt, NULL, 0);
6482 6483

		if (error != DB_SUCCESS) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6484 6485
			error = convert_error_code_to_mysql((int) error, user_thd);
			DBUG_RETURN((int) error);
6486 6487
		}

antony@ppcg5.local's avatar
antony@ppcg5.local committed
6488
		if (thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
6489

6490 6491
			/* Store the current undo_no of the transaction
			so that we know where to roll back if we have
6492 6493 6494 6495 6496 6497 6498 6499 6500
			to roll back the next SQL statement */

			trx_mark_sql_stat_end(trx);
		}
	}

	DBUG_RETURN(0);
}

6501 6502
/****************************************************************************
Here we export InnoDB status variables to MySQL.  */
6503
static
6504
int
6505
innodb_export_status()
6506
/*==================*/
6507
{
6508 6509 6510 6511 6512
	if (innodb_inited) {
		srv_export_innodb_status();
	}

	return 0;
6513 6514
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6515
/****************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6516
Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6517
Monitor to the client. */
6518
static
6519
bool
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6520 6521
innodb_show_status(
/*===============*/
6522
	handlerton*	hton,	/* in: the innodb handlerton */
6523 6524
	THD*	thd,	/* in: the MySQL query thread of the caller */
	stat_print_fn *stat_print)
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6525
{
6526 6527 6528 6529 6530
	trx_t*			trx;
	static const char	truncated_msg[] = "... truncated...\n";
	const long		MAX_STATUS_SIZE = 64000;
	ulint			trx_list_start = ULINT_UNDEFINED;
	ulint			trx_list_end = ULINT_UNDEFINED;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6531

6532
	DBUG_ENTER("innodb_show_status");
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6533

6534
	trx = check_trx_exists(hton, thd);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6535 6536 6537

	innobase_release_stat_resources(trx);

6538 6539
	/* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE
	bytes of text. */
6540

6541
	long	flen, usable_len;
6542
	char*	str;
6543

6544
	mutex_enter_noninline(&srv_monitor_file_mutex);
6545
	rewind(srv_monitor_file);
6546 6547
	srv_printf_innodb_monitor(srv_monitor_file,
				&trx_list_start, &trx_list_end);
6548
	flen = ftell(srv_monitor_file);
6549
	os_file_set_eof(srv_monitor_file);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6550

6551 6552
	if (flen < 0) {
		flen = 0;
6553 6554 6555 6556 6557 6558
	}

	if (flen > MAX_STATUS_SIZE) {
		usable_len = MAX_STATUS_SIZE;
	} else {
		usable_len = flen;
6559
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6560

6561 6562
	/* allocate buffer for the string, and
	read the contents of the temporary file */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6563

6564
	if (!(str = (char*) my_malloc(usable_len + 1, MYF(0)))) {
6565 6566 6567
	  mutex_exit_noninline(&srv_monitor_file_mutex);
	  DBUG_RETURN(TRUE);
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6568

monty@mishka.local's avatar
monty@mishka.local committed
6569
	rewind(srv_monitor_file);
6570 6571
	if (flen < MAX_STATUS_SIZE) {
		/* Display the entire output. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6572
		flen = (long) fread(str, 1, flen, srv_monitor_file);
6573 6574 6575 6576 6577
	} else if (trx_list_end < (ulint) flen
			&& trx_list_start < trx_list_end
			&& trx_list_start + (flen - trx_list_end)
			< MAX_STATUS_SIZE - sizeof truncated_msg - 1) {
		/* Omit the beginning of the list of active transactions. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6578
		long len = (long) fread(str, 1, trx_list_start, srv_monitor_file);
6579 6580 6581 6582
		memcpy(str + len, truncated_msg, sizeof truncated_msg - 1);
		len += sizeof truncated_msg - 1;
		usable_len = (MAX_STATUS_SIZE - 1) - len;
		fseek(srv_monitor_file, flen - usable_len, SEEK_SET);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6583
		len += (long) fread(str + len, 1, usable_len, srv_monitor_file);
6584 6585 6586
		flen = len;
	} else {
		/* Omit the end of the output. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6587
		flen = (long) fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file);
6588
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6589

6590
	mutex_exit_noninline(&srv_monitor_file_mutex);
6591

6592
	bool result = FALSE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6593

6594
	if (stat_print(thd, innobase_hton_name, strlen(innobase_hton_name),
6595
			STRING_WITH_LEN(""), str, flen)) {
6596
		result= TRUE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6597
	}
6598
	my_free(str, MYF(0));
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6599

6600
	DBUG_RETURN(FALSE);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6601 6602
}

vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6603 6604
/****************************************************************************
Implements the SHOW MUTEX STATUS command. . */
6605
static
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6606 6607
bool
innodb_mutex_show_status(
6608
/*=====================*/
6609
	handlerton*	hton,	/* in: the innodb handlerton */
6610 6611 6612 6613 6614 6615
	THD*		thd,		/* in: the MySQL query thread of the
					caller */
	stat_print_fn*	stat_print)
{
	char buf1[IO_SIZE], buf2[IO_SIZE];
	mutex_t*  mutex;
6616
#ifdef UNIV_DEBUG
6617 6618 6619 6620 6621 6622
	ulint	  rw_lock_count= 0;
	ulint	  rw_lock_count_spin_loop= 0;
	ulint	  rw_lock_count_spin_rounds= 0;
	ulint	  rw_lock_count_os_wait= 0;
	ulint	  rw_lock_count_os_yield= 0;
	ulonglong rw_lock_wait_time= 0;
6623
#endif /* UNIV_DEBUG */
6624
	uint	  hton_name_len= strlen(innobase_hton_name), buf1len, buf2len;
6625
	DBUG_ENTER("innodb_mutex_show_status");
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6626

6627
	mutex_enter_noninline(&mutex_list_mutex);
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6628

6629 6630 6631
	mutex = UT_LIST_GET_FIRST(mutex_list);

	while (mutex != NULL) {
6632
#ifdef UNIV_DEBUG
6633 6634 6635 6636 6637 6638 6639 6640 6641 6642 6643 6644 6645 6646 6647
		if (mutex->mutex_type != 1) {
			if (mutex->count_using > 0) {
				buf1len= my_snprintf(buf1, sizeof(buf1),
					"%s:%s",
					mutex->cmutex_name, mutex->cfile_name);
				buf2len= my_snprintf(buf2, sizeof(buf2),
					"count=%lu, spin_waits=%lu,"
					" spin_rounds=%lu, "
					"os_waits=%lu, os_yields=%lu,"
					" os_wait_times=%lu",
					mutex->count_using,
					mutex->count_spin_loop,
					mutex->count_spin_rounds,
					mutex->count_os_wait,
					mutex->count_os_yield,
6648
					(ulong) (mutex->lspent_time/1000));
6649

6650
				if (stat_print(thd, innobase_hton_name,
6651 6652
						hton_name_len, buf1, buf1len,
						buf2, buf2len)) {
6653 6654
					mutex_exit_noninline(
						&mutex_list_mutex);
6655 6656 6657 6658 6659 6660 6661 6662 6663 6664 6665 6666
					DBUG_RETURN(1);
				}
			}
		}
		else {
			rw_lock_count += mutex->count_using;
			rw_lock_count_spin_loop += mutex->count_spin_loop;
			rw_lock_count_spin_rounds += mutex->count_spin_rounds;
			rw_lock_count_os_wait += mutex->count_os_wait;
			rw_lock_count_os_yield += mutex->count_os_yield;
			rw_lock_wait_time += mutex->lspent_time;
		}
6667 6668 6669 6670 6671 6672 6673 6674 6675 6676 6677 6678 6679
#else /* UNIV_DEBUG */
		buf1len= my_snprintf(buf1, sizeof(buf1), "%s:%lu",
				     mutex->cfile_name, (ulong) mutex->cline);
		buf2len= my_snprintf(buf2, sizeof(buf2), "os_waits=%lu",
				     mutex->count_os_wait);

		if (stat_print(thd, innobase_hton_name,
			       hton_name_len, buf1, buf1len,
			       buf2, buf2len)) {
			mutex_exit_noninline(&mutex_list_mutex);
			DBUG_RETURN(1);
		}
#endif /* UNIV_DEBUG */
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6680

6681 6682 6683
		mutex = UT_LIST_GET_NEXT(list, mutex);
	}

6684 6685 6686
	mutex_exit_noninline(&mutex_list_mutex);

#ifdef UNIV_DEBUG
6687 6688 6689 6690 6691 6692
	buf2len= my_snprintf(buf2, sizeof(buf2),
		"count=%lu, spin_waits=%lu, spin_rounds=%lu, "
		"os_waits=%lu, os_yields=%lu, os_wait_times=%lu",
		rw_lock_count, rw_lock_count_spin_loop,
		rw_lock_count_spin_rounds,
		rw_lock_count_os_wait, rw_lock_count_os_yield,
6693
		(ulong) (rw_lock_wait_time/1000));
6694

6695
	if (stat_print(thd, innobase_hton_name, hton_name_len,
6696 6697 6698
			STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) {
		DBUG_RETURN(1);
	}
6699
#endif /* UNIV_DEBUG */
6700 6701

	DBUG_RETURN(FALSE);
vtkachenko@intelp4d.mysql.com's avatar
vtkachenko@intelp4d.mysql.com committed
6702 6703
}

6704
static
6705 6706 6707
bool innobase_show_status(handlerton *hton, THD* thd, 
                          stat_print_fn* stat_print,
                          enum ha_stat_type stat_type)
6708 6709 6710
{
	switch (stat_type) {
	case HA_ENGINE_STATUS:
6711
		return innodb_show_status(hton, thd, stat_print);
6712
	case HA_ENGINE_MUTEX:
6713
		return innodb_mutex_show_status(hton, thd, stat_print);
6714 6715 6716
	default:
		return FALSE;
	}
6717 6718 6719
}


6720 6721 6722 6723 6724
/****************************************************************************
 Handling the shared INNOBASE_SHARE structure that is needed to provide table
 locking.
****************************************************************************/

6725
static mysql_byte* innobase_get_key(INNOBASE_SHARE* share, size_t *length,
6726 6727 6728 6729 6730
	my_bool not_used __attribute__((unused)))
{
	*length=share->table_name_length;

	return (mysql_byte*) share->table_name;
6731 6732
}

6733
static INNOBASE_SHARE* get_share(const char* table_name)
6734
{
6735 6736 6737 6738 6739 6740 6741 6742 6743 6744 6745 6746 6747 6748 6749 6750 6751 6752
	INNOBASE_SHARE *share;
	pthread_mutex_lock(&innobase_share_mutex);
	uint length=(uint) strlen(table_name);

	if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables,
				(mysql_byte*) table_name,
				length))) {

		share = (INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1,
			MYF(MY_FAE | MY_ZEROFILL));

		share->table_name_length=length;
		share->table_name=(char*) (share+1);
		strmov(share->table_name,table_name);

		if (my_hash_insert(&innobase_open_tables,
				(mysql_byte*) share)) {
			pthread_mutex_unlock(&innobase_share_mutex);
6753
			my_free(share,0);
6754 6755 6756 6757 6758 6759 6760 6761 6762 6763 6764 6765 6766 6767 6768 6769 6770 6771 6772 6773 6774 6775

			return 0;
		}

		thr_lock_init(&share->lock);
		pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST);
	}

	share->use_count++;
	pthread_mutex_unlock(&innobase_share_mutex);

	return share;
}

static void free_share(INNOBASE_SHARE* share)
{
	pthread_mutex_lock(&innobase_share_mutex);

	if (!--share->use_count) {
		hash_delete(&innobase_open_tables, (mysql_byte*) share);
		thr_lock_delete(&share->lock);
		pthread_mutex_destroy(&share->mutex);
6776
		my_free(share, MYF(0));
6777 6778 6779
	}

	pthread_mutex_unlock(&innobase_share_mutex);
6780
}
6781 6782

/*********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6783
Converts a MySQL table lock stored in the 'lock' field of the handle to
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6784 6785 6786 6787 6788 6789
a proper type before storing pointer to the lock into an array of pointers.
MySQL also calls this if it wants to reset some table locks to a not-locked
state during the processing of an SQL query. An example is that during a
SELECT the read lock is released early on the 'const' tables where we only
fetch one row. MySQL does not call this when it releases all locks at the
end of an SQL statement. */
6790 6791 6792 6793 6794 6795 6796 6797 6798 6799 6800 6801

THR_LOCK_DATA**
ha_innobase::store_lock(
/*====================*/
						/* out: pointer to the next
						element in the 'to' array */
	THD*			thd,		/* in: user thread handle */
	THR_LOCK_DATA**		to,		/* in: pointer to an array
						of pointers to lock structs;
						pointer to the 'lock' field
						of current handle is stored
						next to this array */
6802
	enum thr_lock_type	lock_type)	/* in: lock type to store in
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6803 6804
						'lock'; this may also be
						TL_IGNORE */
6805
{
6806 6807
	trx_t*		trx;

6808 6809 6810
	/* Note that trx in this function is NOT necessarily prebuilt->trx
	because we call update_thd() later, in ::external_lock()! Failure to
	understand this caused a serious memory corruption bug in 5.1.11. */
6811

6812
	trx = check_trx_exists(ht, thd);
6813

6814
	/* NOTE: MySQL can call this function with lock 'type' TL_IGNORE!
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6815 6816 6817
	Be careful to ignore TL_IGNORE if we are going to do something with
	only 'real' locks! */

6818
	/* If no MySQL table is in use, we need to set the isolation level
6819 6820 6821 6822 6823
	of the transaction. */

	if (lock_type != TL_IGNORE
	&& trx->n_mysql_tables_in_use == 0) {
		trx->isolation_level = innobase_map_isolation_level(
6824
                                    (enum_tx_isolation)thd_tx_isolation(thd));
6825 6826 6827 6828 6829 6830 6831 6832 6833

		if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
		    && trx->global_read_view) {

			/* At low transaction isolation levels we let
			each consistent read set its own snapshot */

			read_view_close_for_mysql(trx);
		}
6834 6835
	}

serg@janus.mylan's avatar
serg@janus.mylan committed
6836
	DBUG_ASSERT(thd == current_thd);
6837
	const bool in_lock_tables = thd_in_lock_tables(thd);
6838
	const uint sql_command = thd_sql_command(thd);
6839

antony@ppcg5.local's avatar
antony@ppcg5.local committed
6840
	if (sql_command == SQLCOM_DROP_TABLE) {
6841 6842 6843 6844 6845

		/* MySQL calls this function in DROP TABLE though this table
		handle may belong to another thd that is running a query. Let
		us in that case skip any changes to the prebuilt struct. */ 

6846 6847
	} else if ((lock_type == TL_READ && in_lock_tables) ||
		(lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables) ||
6848 6849
		lock_type == TL_READ_WITH_SHARED_LOCKS ||
		lock_type == TL_READ_NO_INSERT ||
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6850
		(sql_command != SQLCOM_SELECT
6851
			&& lock_type != TL_IGNORE)) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6852

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6853
		/* The OR cases above are in this order:
6854 6855
		1) MySQL is doing LOCK TABLES ... READ LOCAL, or we
		are processing a stored procedure or function, or
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6856 6857 6858
		2) (we do not know when TL_READ_HIGH_PRIORITY is used), or
		3) this is a SELECT ... IN SHARE MODE, or
		4) we are doing a complex SQL statement like
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6859
		INSERT INTO ... SELECT ... and the logical logging (MySQL
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6860
		binlog) requires the use of a locking read, or
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6861 6862 6863
		MySQL is doing LOCK TABLES ... READ.
		5) we let InnoDB do locking reads for all SQL statements that
		are not simple SELECTs; note that select_lock_type in this
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6864 6865 6866 6867 6868 6869
		case may get strengthened in ::external_lock() to LOCK_X.
		Note that we MUST use a locking read in all data modifying
		SQL statements, because otherwise the execution would not be
		serializable, and also the results from the update could be
		unexpected if an obsolete consistent read view would be
		used. */
6870

6871 6872 6873 6874 6875 6876 6877 6878
		ulint	isolation_level;

		isolation_level = trx->isolation_level;

		if ((srv_locks_unsafe_for_binlog
			|| isolation_level == TRX_ISO_READ_COMMITTED)
		&& isolation_level != TRX_ISO_SERIALIZABLE
		&& (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT)
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6879 6880 6881
		&& (sql_command == SQLCOM_INSERT_SELECT
			|| sql_command == SQLCOM_UPDATE
			|| sql_command == SQLCOM_CREATE_TABLE)) {
6882

6883 6884 6885
			/* If we either have innobase_locks_unsafe_for_binlog
			option set or this session is using READ COMMITTED
			isolation level and isolation level of the transaction
6886
			is not set to serializable and MySQL is doing
6887 6888 6889 6890
			INSERT INTO...SELECT or UPDATE ... = (SELECT ...) or
			CREATE  ... SELECT... without FOR UPDATE or
			IN SHARE MODE in select, then we use consistent
			read for select. */
6891 6892 6893

			prebuilt->select_lock_type = LOCK_NONE;
			prebuilt->stored_select_lock_type = LOCK_NONE;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6894
		} else if (sql_command == SQLCOM_CHECKSUM) {
6895
			/* Use consistent read for checksum table */
6896

6897 6898 6899 6900 6901 6902
			prebuilt->select_lock_type = LOCK_NONE;
			prebuilt->stored_select_lock_type = LOCK_NONE;
		} else {
			prebuilt->select_lock_type = LOCK_S;
			prebuilt->stored_select_lock_type = LOCK_S;
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6903

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6904 6905
	} else if (lock_type != TL_IGNORE) {

6906
		/* We set possible LOCK_X value in external_lock, not yet
6907
		here even if this would be SELECT ... FOR UPDATE */
6908

6909
		prebuilt->select_lock_type = LOCK_NONE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6910
		prebuilt->stored_select_lock_type = LOCK_NONE;
6911 6912 6913 6914
	}

	if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) {

6915
		/* Starting from 5.0.7, we weaken also the table locks
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6916 6917
		set at the start of a MySQL stored procedure call, just like
		we weaken the locks set at the start of an SQL statement.
6918
		MySQL does set in_lock_tables TRUE there, but in reality
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6919 6920 6921 6922
		we do not need table locks to make the execution of a
		single transaction stored procedure call deterministic
		(if it does not use a consistent read). */

6923
		if (lock_type == TL_READ
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6924
		    && sql_command == SQLCOM_LOCK_TABLES) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6925 6926 6927 6928 6929 6930 6931
			/* We come here if MySQL is processing LOCK TABLES
			... READ LOCAL. MyISAM under that table lock type
			reads the table as it was at the time the lock was
			granted (new inserts are allowed, but not seen by the
			reader). To get a similar effect on an InnoDB table,
			we must use LOCK TABLES ... READ. We convert the lock
			type here, so that for InnoDB, READ LOCAL is
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6932 6933 6934
			equivalent to READ. This will change the InnoDB
			behavior in mysqldump, so that dumps of InnoDB tables
			are consistent with dumps of MyISAM tables. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6935 6936 6937 6938

			lock_type = TL_READ_NO_INSERT;
		}

6939
		/* If we are not doing a LOCK TABLE, DISCARD/IMPORT
6940
		TABLESPACE or TRUNCATE TABLE then allow multiple
6941 6942
		writers. Note that ALTER TABLE uses a TL_WRITE_ALLOW_READ
		< TL_WRITE_CONCURRENT_INSERT.
6943

6944 6945
		We especially allow multiple writers if MySQL is at the
		start of a stored procedure call (SQLCOM_CALL) or a
6946
		stored function call (MySQL does have in_lock_tables
6947
		TRUE there). */
6948

6949 6950
		if ((lock_type >= TL_WRITE_CONCURRENT_INSERT
		&& lock_type <= TL_WRITE)
6951
		&& !(in_lock_tables
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6952
			&& sql_command == SQLCOM_LOCK_TABLES)
6953
		&& !thd_tablespace_op(thd)
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6954 6955
		&& sql_command != SQLCOM_TRUNCATE
		&& sql_command != SQLCOM_OPTIMIZE
6956

6957
#ifdef __WIN__
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6958 6959 6960 6961 6962 6963 6964
		/* For alter table on win32 for succesful operation
		completion it is used TL_WRITE(=10) lock instead of
		TL_WRITE_ALLOW_READ(=6), however here in innodb handler
		TL_WRITE is lifted to TL_WRITE_ALLOW_WRITE, which causes
		race condition when several clients do alter table
		simultaneously (bug #17264). This fix avoids the problem. */
		&& sql_command != SQLCOM_ALTER_TABLE
6965
#endif
6966

antony@ppcg5.local's avatar
antony@ppcg5.local committed
6967
		&& sql_command != SQLCOM_CREATE_TABLE) {
6968 6969

			lock_type = TL_WRITE_ALLOW_WRITE;
6970
		}
6971

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6972 6973 6974 6975
		/* In queries of type INSERT INTO t1 SELECT ... FROM t2 ...
		MySQL would use the lock TL_READ_NO_INSERT on t2, and that
		would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts
		to t2. Convert the lock to a normal read lock to allow
6976 6977 6978 6979
		concurrent inserts to t2.

		We especially allow concurrent inserts if MySQL is at the
		start of a stored procedure call (SQLCOM_CALL)
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6980
		(MySQL does have thd_in_lock_tables() TRUE there). */
6981

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6982
		if (lock_type == TL_READ_NO_INSERT
antony@ppcg5.local's avatar
antony@ppcg5.local committed
6983
		    && sql_command != SQLCOM_LOCK_TABLES) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6984

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6985 6986
			lock_type = TL_READ;
		}
6987

6988 6989 6990 6991
		lock.type = lock_type;
	}

	*to++= &lock;
6992

6993 6994 6995
	return(to);
}

6996
/***********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
6997 6998
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
6999
counter if it already has been initialized. In parameter ret returns
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7000
the value of the auto-inc counter. */
7001

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7002 7003 7004
int
ha_innobase::innobase_read_and_init_auto_inc(
/*=========================================*/
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7005 7006
				/* out: 0 or error code: deadlock or lock wait
				timeout */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7007
	longlong*	ret)	/* out: auto-inc value */
7008
{
7009
	longlong	auto_inc;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7010 7011
	ulint		old_select_lock_type;
	ibool		trx_was_not_started	= FALSE;
7012
	int		error;
7013

7014
	ut_a(prebuilt);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7015
	ut_a(prebuilt->table);
7016

7017
	/* Prepare prebuilt->trx in the table handle */
7018
	update_thd(ha_thd());
7019

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7020 7021 7022 7023
	if (prebuilt->trx->conc_state == TRX_NOT_STARTED) {
		trx_was_not_started = TRUE;
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7024 7025 7026 7027 7028
	/* In case MySQL calls this in the middle of a SELECT query, release
	possible adaptive hash latch to avoid deadlocks of threads */

	trx_search_latch_release_if_reserved(prebuilt->trx);

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7029
	auto_inc = dict_table_autoinc_read(prebuilt->table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7030

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7031 7032 7033
	if (auto_inc != 0) {
		/* Already initialized */
		*ret = auto_inc;
7034

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7035 7036 7037
		error = 0;

		goto func_exit_early;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7038
	}
7039

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7040
	error = row_lock_table_autoinc_for_mysql(prebuilt);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7041

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7042 7043
	if (error != DB_SUCCESS) {
		error = convert_error_code_to_mysql(error, user_thd);
7044

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7045
		goto func_exit_early;
7046
	}
monty@hundin.mysql.fi's avatar
monty@hundin.mysql.fi committed
7047

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7048 7049
	/* Check again if someone has initialized the counter meanwhile */
	auto_inc = dict_table_autoinc_read(prebuilt->table);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7050

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7051 7052
	if (auto_inc != 0) {
		*ret = auto_inc;
7053

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7054 7055 7056
		error = 0;

		goto func_exit_early;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7057
	}
7058

7059 7060
	(void) extra(HA_EXTRA_KEYREAD);
	index_init(table->s->next_number_index, 1);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7061

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7062 7063 7064 7065 7066 7067
	/* Starting from 5.0.9, we use a consistent read to read the auto-inc
	column maximum value. This eliminates the spurious deadlocks caused
	by the row X-lock that we previously used. Note the following flaw
	in our algorithm: if some other user meanwhile UPDATEs the auto-inc
	column, our consistent read will not return the largest value. We
	accept this flaw, since the deadlocks were a bigger trouble. */
7068

7069 7070
	/* Fetch all the columns in the key */

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7071
	prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
7072

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7073
	old_select_lock_type = prebuilt->select_lock_type;
7074
	prebuilt->select_lock_type = LOCK_NONE;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7075 7076 7077 7078 7079

	/* Eliminate an InnoDB error print that happens when we try to SELECT
	from a table when no table has been locked in ::external_lock(). */
	prebuilt->trx->n_mysql_tables_in_use++;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7080
	error = index_last(table->record[1]);
7081

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7082
	prebuilt->trx->n_mysql_tables_in_use--;
7083
	prebuilt->select_lock_type = old_select_lock_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7084

7085
	if (error) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7086 7087 7088 7089 7090 7091
		if (error == HA_ERR_END_OF_FILE) {
			/* The table was empty, initialize to 1 */
			auto_inc = 1;

			error = 0;
		} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7092
			/* This should not happen in a consistent read */
7093 7094
		  sql_print_error("Consistent read of auto-inc column "
				  "returned %lu", (ulong) error);
7095
			auto_inc = -1;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7096

7097 7098 7099
			goto func_exit;
		}
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7100 7101 7102 7103
		/* Initialize to max(col) + 1; we use
		'found_next_number_field' below because MySQL in SHOW TABLE
		STATUS does not seem to set 'next_number_field'. The comment
		in table.h says that 'next_number_field' is set when it is
7104 7105 7106 7107
		'active'.
		Since 5.1 MySQL enforces that we announce fields which we will
		read; as we only do a val_*() call, dbug_tmp_use_all_columns()
		with read_set is sufficient. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7108

7109 7110
		my_bitmap_map *old_map;
		old_map= dbug_tmp_use_all_columns(table, table->read_set);
7111 7112
		auto_inc = (longlong) table->found_next_number_field->
				val_int_offset(table->s->rec_buff_length) + 1;
7113
		dbug_tmp_restore_column_map(table->read_set, old_map);
7114
	}
7115

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7116 7117 7118
	dict_table_autoinc_initialize(prebuilt->table, auto_inc);

func_exit:
7119
	(void) extra(HA_EXTRA_NO_KEYREAD);
7120

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7121 7122 7123 7124
	index_end();

	*ret = auto_inc;

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7125 7126
func_exit_early:
	/* Since MySQL does not seem to call autocommit after SHOW TABLE
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7127
	STATUS (even if we would register the trx here), we commit our
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7128
	transaction here if it was started here. This is to eliminate a
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7129 7130 7131
	dangling transaction. If the user had AUTOCOMMIT=0, then SHOW
	TABLE STATUS does leave a dangling transaction if the user does not
	himself call COMMIT. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7132 7133 7134 7135 7136 7137

	if (trx_was_not_started) {

		innobase_commit_low(prebuilt->trx);
	}

7138
	return(error);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7139 7140
}

7141
/*******************************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7142 7143 7144
This function initializes the auto-inc counter if it has not been
initialized yet. This function does not change the value of the auto-inc
counter if it already has been initialized. Returns the value of the
7145 7146 7147
auto-inc counter in *first_value, and ULONGLONG_MAX in *nb_reserved_values (as
we have a table-level lock). offset, increment, nb_desired_values are ignored.
*first_value is set to -1 if error (deadlock or lock wait timeout)            */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7148

7149 7150 7151 7152 7153 7154 7155
void ha_innobase::get_auto_increment(
/*=================================*/
        ulonglong offset,              /* in */
        ulonglong increment,           /* in */
        ulonglong nb_desired_values,   /* in */
        ulonglong *first_value,        /* out */
        ulonglong *nb_reserved_values) /* out */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7156
{
7157 7158 7159
	longlong	nr;
	int		error;

7160
	/* Prepare prebuilt->trx in the table handle */
7161
	update_thd(ha_thd());
7162

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7163 7164 7165
	error = innobase_read_and_init_auto_inc(&nr);

	if (error) {
7166 7167 7168
		/* This should never happen in the current (5.0.6) code, since
		we call this function only after the counter has been
		initialized. */
7169

7170
		ut_print_timestamp(stderr);
7171 7172
		sql_print_error("Error %lu in ::get_auto_increment()",
				(ulong) error);
7173 7174
                *first_value= (~(ulonglong) 0);
		return;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7175
	}
7176

7177 7178 7179
        *first_value= (ulonglong) nr;
        /* table-level autoinc lock reserves up to +inf */
        *nb_reserved_values= ULONGLONG_MAX;
7180 7181
}

7182 7183
/* See comment in handler.h */
int
osku@127.(none)'s avatar
osku@127.(none) committed
7184
ha_innobase::reset_auto_increment(ulonglong value)
7185 7186 7187
{
	DBUG_ENTER("ha_innobase::reset_auto_increment");

7188
	int	error;
7189

7190
	update_thd(ha_thd());
7191

7192 7193 7194 7195 7196 7197
	error = row_lock_table_autoinc_for_mysql(prebuilt);

	if (error != DB_SUCCESS) {
		error = convert_error_code_to_mysql(error, user_thd);

		DBUG_RETURN(error);
7198
	}
7199

osku@127.(none)'s avatar
osku@127.(none) committed
7200
	dict_table_autoinc_initialize(prebuilt->table, value);
7201 7202 7203 7204

	DBUG_RETURN(0);
}

7205 7206 7207 7208
/* See comment in handler.cc */
bool
ha_innobase::get_error_message(int error, String *buf)
{
7209
	trx_t*	trx = check_trx_exists(ht, ha_thd());
7210 7211 7212 7213 7214 7215 7216

	buf->copy(trx->detailed_error, strlen(trx->detailed_error),
		system_charset_info);

	return FALSE;
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7217 7218 7219 7220
/***********************************************************************
Compares two 'refs'. A 'ref' is the (internal) primary key value of the row.
If there is no explicitly declared non-null unique key or a primary key, then
InnoDB internally uses the row id as the primary key. */
monty@mysql.com's avatar
monty@mysql.com committed
7221

7222 7223
int
ha_innobase::cmp_ref(
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7224 7225 7226 7227 7228 7229 7230
/*=================*/
				/* out: < 0 if ref1 < ref2, 0 if equal, else
				> 0 */
	const mysql_byte* ref1,	/* in: an (internal) primary key value in the
				MySQL key value format */
	const mysql_byte* ref2)	/* in: an (internal) primary key value in the
				MySQL key value format */
7231 7232
{
	enum_field_types mysql_type;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7233 7234 7235 7236 7237
	Field*		field;
	KEY_PART_INFO*	key_part;
	KEY_PART_INFO*	key_part_end;
	uint		len1;
	uint		len2;
7238
	int		result;
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7239 7240 7241 7242 7243 7244 7245 7246 7247 7248 7249 7250 7251 7252 7253

	if (prebuilt->clust_index_was_generated) {
		/* The 'ref' is an InnoDB row id */

		return(memcmp(ref1, ref2, DATA_ROW_ID_LEN));
	}

	/* Do a type-aware comparison of primary key fields. PK fields
	are always NOT NULL, so no checks for NULL are performed. */

	key_part = table->key_info[table->s->primary_key].key_part;

	key_part_end = key_part
			+ table->key_info[table->s->primary_key].key_parts;

7254 7255 7256
	for (; key_part != key_part_end; ++key_part) {
		field = key_part->field;
		mysql_type = field->type();
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7257

7258 7259 7260 7261
		if (mysql_type == MYSQL_TYPE_TINY_BLOB
			|| mysql_type == MYSQL_TYPE_MEDIUM_BLOB
			|| mysql_type == MYSQL_TYPE_BLOB
			|| mysql_type == MYSQL_TYPE_LONG_BLOB) {
7262

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7263 7264 7265 7266 7267 7268
			/* In the MySQL key value format, a column prefix of
			a BLOB is preceded by a 2-byte length field */

			len1 = innobase_read_from_2_little_endian(ref1);
			len2 = innobase_read_from_2_little_endian(ref2);

7269 7270
			ref1 += 2;
			ref2 += 2;
7271 7272
			result = ((Field_blob*)field)->cmp( ref1, len1,
                                                            ref2, len2);
7273
		} else {
7274
			result = field->key_cmp(ref1, ref2);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7275 7276 7277 7278 7279
		}

		if (result) {

			return(result);
7280 7281
		}

7282 7283
		ref1 += key_part->store_length;
		ref2 += key_part->store_length;
7284
	}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7285 7286

	return(0);
7287 7288
}

7289 7290 7291 7292 7293 7294 7295 7296 7297 7298 7299 7300 7301 7302 7303 7304 7305 7306 7307 7308 7309 7310 7311 7312 7313 7314 7315
/***********************************************************************
Ask InnoDB if a query to a table can be cached. */

my_bool
ha_innobase::register_query_cache_table(
/*====================================*/
					/* out: TRUE if query caching
					of the table is permitted */
	THD*		thd,		/* in: user thread handle */
	char*		table_key,	/* in: concatenation of database name,
					the null character '\0',
					and the table name */
	uint		key_length,	/* in: length of the full name, i.e.
					len(dbname) + len(tablename) + 1 */
	qc_engine_callback*
			call_back,	/* out: pointer to function for
					checking if query caching
					is permitted */
	ulonglong	*engine_data)	/* in/out: data to call_back */
{
	*call_back = innobase_query_caching_of_table_permitted;
	*engine_data = 0;
	return(innobase_query_caching_of_table_permitted(thd, table_key,
							 key_length,
							 engine_data));
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7316 7317
char*
ha_innobase::get_mysql_bin_log_name()
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7318
{
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7319
	return(trx_sys_mysql_bin_log_name);
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7320 7321
}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7322 7323
ulonglong
ha_innobase::get_mysql_bin_log_pos()
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7324
{
7325
	/* trx... is ib_longlong, which is a typedef for a 64-bit integer
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7326 7327
	(__int64 or longlong) so it's ok to cast it to ulonglong. */

7328
	return(trx_sys_mysql_bin_log_pos);
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7329 7330
}

7331
extern "C" {
7332
/**********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7333 7334 7335 7336 7337 7338 7339
This function is used to find the storage length in bytes of the first n
characters for prefix indexes using a multibyte character set. The function
finds charset information and returns length of prefix_len characters in the
index field in bytes.

NOTE: the prototype of this function is copied to data0type.c! If you change
this function, you MUST change also data0type.c! */
7340

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7341 7342 7343 7344 7345
ulint
innobase_get_at_most_n_mbchars(
/*===========================*/
				/* out: number of bytes occupied by the first
				n characters */
7346
	ulint charset_id,	/* in: character set id */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7347 7348 7349
	ulint prefix_len,	/* in: prefix length in bytes of the index
				(this has to be divided by mbmaxlen to get the
				number of CHARACTERS n in the prefix) */
7350
	ulint data_len,		/* in: length of the string in bytes */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7351
	const char* str)	/* in: character string */
7352
{
7353
	ulint char_length;	/* character length in bytes */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7354
	ulint n_chars;		/* number of characters in prefix */
7355
	CHARSET_INFO* charset;	/* charset used in the field */
7356

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7357
	charset = get_charset((uint) charset_id, MYF(MY_WME));
7358

7359 7360
	ut_ad(charset);
	ut_ad(charset->mbmaxlen);
7361

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7362
	/* Calculate how many characters at most the prefix index contains */
7363

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7364
	n_chars = prefix_len / charset->mbmaxlen;
7365

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7366 7367 7368
	/* If the charset is multi-byte, then we must find the length of the
	first at most n chars in the string. If the string contains less
	characters than n, then we return the length to the end of the last
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7369
	character. */
7370

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7371 7372
	if (charset->mbmaxlen > 1) {
		/* my_charpos() returns the byte length of the first n_chars
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7373 7374 7375 7376 7377 7378 7379 7380 7381 7382 7383 7384 7385 7386 7387 7388
		characters, or a value bigger than the length of str, if
		there were not enough full characters in str.

		Why does the code below work:
		Suppose that we are looking for n UTF-8 characters.

		1) If the string is long enough, then the prefix contains at
		least n complete UTF-8 characters + maybe some extra
		characters + an incomplete UTF-8 character. No problem in
		this case. The function returns the pointer to the
		end of the nth character.

		2) If the string is not long enough, then the string contains
		the complete value of a column, that is, only complete UTF-8
		characters, and we can store in the column prefix index the
		whole string. */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7389

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7390
		char_length = my_charpos(charset, str,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7391
						str + data_len, (int) n_chars);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7392 7393
		if (char_length > data_len) {
			char_length = data_len;
7394
		}
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7395
	} else {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7396 7397 7398 7399 7400
		if (data_len < prefix_len) {
			char_length = data_len;
		} else {
			char_length = prefix_len;
		}
7401
	}
7402

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7403
	return(char_length);
7404 7405 7406
}
}

7407
/**********************************************************************
7408
This function returns true if
7409 7410

1) SQL-query in the current thread
7411
is either REPLACE or LOAD DATA INFILE REPLACE.
7412 7413 7414 7415

2) SQL-query in the current thread
is INSERT ON DUPLICATE KEY UPDATE.

7416
NOTE that storage/innobase/row/row0ins.c must contain the
7417
prototype for this function ! */
7418
extern "C"
7419
ibool
7420
innobase_query_is_update(void)
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7421
/*==========================*/
7422
{
7423
	THD*	thd = current_thd;
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7424
	trx_t*	trx;
7425

7426 7427 7428 7429
	if (!thd) {
		/* InnoDB's internal threads may run InnoDB stored procedures
		that call this function. Then current_thd is not defined
		(it is probably NULL). */
7430

7431
		return(FALSE);
7432
	}
7433

7434
	trx = check_trx_exists(innodb_hton_ptr, thd);
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7435 7436

	return(trx->allow_duplicates);
guilhem@mysql.com's avatar
guilhem@mysql.com committed
7437 7438
}

7439 7440
/***********************************************************************
This function is used to prepare X/Open XA distributed transaction   */
7441
static
7442
int
7443 7444
innobase_xa_prepare(
/*================*/
7445
			/* out: 0 or error number */
7446
        handlerton *hton,
7447 7448 7449 7450 7451 7452
	THD*	thd,	/* in: handle to the MySQL thread of the user
			whose XA transaction should be prepared */
	bool	all)	/* in: TRUE - commit transaction
			FALSE - the current SQL statement ended */
{
	int error = 0;
7453
	trx_t* trx = check_trx_exists(hton, thd);
serg@serg.mylan's avatar
serg@serg.mylan committed
7454

antony@ppcg5.local's avatar
antony@ppcg5.local committed
7455 7456
	if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
	    (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
7457
	{
serg@serg.mylan's avatar
serg@serg.mylan committed
7458

7459 7460
		/* For ibbackup to work the order of transactions in binlog
		and InnoDB must be the same. Consider the situation
serg@serg.mylan's avatar
serg@serg.mylan committed
7461

7462 7463 7464 7465
		  thread1> prepare; write to binlog; ...
			  <context switch>
		  thread2> prepare; write to binlog; commit
		  thread1>			     ... commit
serg@serg.mylan's avatar
serg@serg.mylan committed
7466

7467 7468
		To ensure this will not happen we're taking the mutex on
		prepare, and releasing it on commit.
serg@serg.mylan's avatar
serg@serg.mylan committed
7469

7470 7471 7472 7473 7474 7475 7476 7477 7478 7479 7480
		Note: only do it for normal commits, done via ha_commit_trans.
		If 2pc protocol is executed by external transaction
		coordinator, it will be just a regular MySQL client
		executing XA PREPARE and XA COMMIT commands.
		In this case we cannot know how many minutes or hours
		will be between XA PREPARE and XA COMMIT, and we don't want
		to block for undefined period of time.
		*/
		pthread_mutex_lock(&prepare_commit_mutex);
		trx->active_trans = 2;
	}
7481

antony@ppcg5.local's avatar
antony@ppcg5.local committed
7482
	if (!THDVAR(thd, support_xa)) {
7483 7484 7485 7486

		return(0);
	}

7487
	trx->xid=thd->transaction.xid_state.xid;
7488 7489 7490 7491 7492 7493 7494 7495 7496

	/* Release a possible FIFO ticket and search latch. Since we will
	reserve the kernel mutex, we have to release the search system latch
	first to obey the latching order. */

	innobase_release_stat_resources(trx);

	if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) {

7497 7498
	  sql_print_error("trx->active_trans == 0, but trx->conc_state != "
			  "TRX_NOT_STARTED");
7499 7500
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7501
	if (all
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7502
		|| (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) {
serg@serg.mylan's avatar
serg@serg.mylan committed
7503

7504 7505
		/* We were instructed to prepare the whole transaction, or
		this is an SQL statement end and autocommit is on */
7506

7507
		ut_ad(trx->active_trans);
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7508

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7509
		error = (int) trx_prepare_for_mysql(trx);
7510
	} else {
7511
		/* We just mark the SQL statement ended and do not do a
7512 7513 7514 7515 7516
		transaction prepare */

		if (trx->auto_inc_lock) {
			/* If we had reserved the auto-inc lock for some
			table in this SQL statement we release it now */
7517

7518 7519 7520 7521 7522 7523 7524 7525 7526 7527 7528 7529 7530 7531
			row_unlock_table_autoinc_for_mysql(trx);
		}
		/* Store the current undo_no of the transaction so that we
		know where to roll back if we have to roll back the next
		SQL statement */

		trx_mark_sql_stat_end(trx);
	}

	/* Tell the InnoDB server that there might be work for utility
	threads: */

	srv_active_wake_master_thread();

7532
	return error;
7533 7534 7535 7536
}

/***********************************************************************
This function is used to recover X/Open XA distributed transactions   */
7537
static
7538
int
7539 7540
innobase_xa_recover(
/*================*/
7541
				/* out: number of prepared transactions
7542
				stored in xid_list */
7543
        handlerton *hton,
7544
	XID*	xid_list,	/* in/out: prepared transactions */
7545 7546 7547
	uint	len)		/* in: number of slots in xid_list */
{
	if (len == 0 || xid_list == NULL) {
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7548 7549

		return(0);
7550 7551
	}

heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7552
	return(trx_recover_for_mysql(xid_list, len));
7553 7554 7555 7556 7557
}

/***********************************************************************
This function is used to commit one X/Open XA distributed transaction
which is in the prepared state */
7558
static
7559
int
7560 7561
innobase_commit_by_xid(
/*===================*/
7562
			/* out: 0 or error number */
7563
        handlerton *hton,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7564
	XID*	xid)	/* in: X/Open XA transaction identification */
7565 7566 7567 7568 7569 7570 7571
{
	trx_t*	trx;

	trx = trx_get_trx_by_xid(xid);

	if (trx) {
		innobase_commit_low(trx);
7572

7573 7574 7575 7576 7577 7578 7579 7580 7581
		return(XA_OK);
	} else {
		return(XAER_NOTA);
	}
}

/***********************************************************************
This function is used to rollback one X/Open XA distributed transaction
which is in the prepared state */
7582
static
7583
int
7584 7585
innobase_rollback_by_xid(
/*=====================*/
7586
			/* out: 0 or error number */
7587
        handlerton *hton,
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7588
	XID	*xid)	/* in: X/Open XA transaction identification */
7589 7590 7591 7592 7593 7594 7595 7596 7597 7598 7599 7600
{
	trx_t*	trx;

	trx = trx_get_trx_by_xid(xid);

	if (trx) {
		return(innobase_rollback_trx(trx));
	} else {
		return(XAER_NOTA);
	}
}

7601
/***********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7602 7603
Create a consistent view for a cursor based on current transaction
which is created if the corresponding MySQL thread still lacks one.
7604
This consistent view is then used inside of MySQL when accessing records
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7605
using a cursor. */
7606
static
7607
void*
7608
innobase_create_cursor_view(
7609
/*========================*/
7610 7611 7612
                          /* out: pointer to cursor view or NULL */
        handlerton *hton, /* in: innobase hton */
	THD* thd)	  /* in: user thread handle */
7613 7614
{
	return(read_cursor_view_create_for_mysql(
7615
					check_trx_exists(hton, thd)));
7616 7617 7618
}

/***********************************************************************
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7619
Close the given consistent cursor view of a transaction and restore
7620
global read view to a transaction read view. Transaction is created if the
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7621
corresponding MySQL thread still lacks one. */
7622
static
7623 7624
void
innobase_close_cursor_view(
7625
/*=======================*/
7626
        handlerton *hton,
7627
	THD*	thd,	/* in: user thread handle */
7628 7629
	void*	curview)/* in: Consistent read view to be closed */
{
7630
	read_cursor_view_close_for_mysql(check_trx_exists(hton, current_thd),
7631 7632 7633 7634
						(cursor_view_t*) curview);
}

/***********************************************************************
7635 7636
Set the given consistent cursor view to a transaction which is created
if the corresponding MySQL thread still lacks one. If the given
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7637 7638
consistent cursor view is NULL global read view of a transaction is
restored to a transaction read view. */
7639
static
7640 7641 7642
void
innobase_set_cursor_view(
/*=====================*/
7643
        handlerton *hton,
7644
	THD*	thd,	/* in: user thread handle */
heikki@hundin.mysql.fi's avatar
heikki@hundin.mysql.fi committed
7645
	void*	curview)/* in: Consistent cursor view to be set */
7646
{
7647
	read_cursor_set_for_mysql(check_trx_exists(hton, current_thd),
7648 7649 7650
						(cursor_view_t*) curview);
}

7651

7652 7653 7654
bool ha_innobase::check_if_incompatible_data(
	HA_CREATE_INFO*	info,
	uint		table_changes)
7655
{
7656
	if (table_changes != IS_EQUAL_YES) {
7657

7658 7659 7660 7661 7662 7663
		return COMPATIBLE_DATA_NO;
	}

	/* Check that auto_increment value was not changed */
	if ((info->used_fields & HA_CREATE_USED_AUTO) &&
		info->auto_increment_value != 0) {
7664

7665 7666 7667 7668 7669 7670 7671 7672 7673 7674 7675 7676
		return COMPATIBLE_DATA_NO;
	}

	/* Check that row format didn't change */
	if ((info->used_fields & HA_CREATE_USED_AUTO) &&
		get_row_type() != info->row_type) {

		return COMPATIBLE_DATA_NO;
	}

	return COMPATIBLE_DATA_YES;
}
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
7677

brian@zim.(none)'s avatar
brian@zim.(none) committed
7678 7679 7680 7681 7682 7683 7684 7685
static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff)
{
  innodb_export_status();
  var->type= SHOW_ARRAY;
  var->value= (char *) &innodb_status_variables;
  return 0;
}

7686
static SHOW_VAR innodb_status_variables_export[]= {
brian@zim.(none)'s avatar
brian@zim.(none) committed
7687 7688 7689 7690
  {"Innodb",                   (char*) &show_innodb_vars, SHOW_FUNC},
  {NullS, NullS, SHOW_LONG}
};

7691
static struct st_mysql_storage_engine innobase_storage_engine=
7692
{ MYSQL_HANDLERTON_INTERFACE_VERSION };
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
7693

antony@ppcg5.local's avatar
antony@ppcg5.local committed
7694 7695 7696 7697 7698 7699 7700 7701 7702 7703 7704 7705 7706 7707 7708 7709 7710 7711 7712 7713 7714 7715 7716 7717 7718 7719
/* plugin options */
static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
  "Enable InnoDB checksums validation (enabled by default). "
  "Disable with --skip-innodb-checksums.",
  NULL, NULL, TRUE);

static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir,
  PLUGIN_VAR_READONLY,
  "The common part for InnoDB table spaces.",
  NULL, NULL, NULL);

static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
  "Enable InnoDB doublewrite buffer (enabled by default). "
  "Disable with --skip-innodb-doublewrite.",
  NULL, NULL, TRUE);

static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
  PLUGIN_VAR_OPCMDARG,
  "Speeds up the shutdown process of the InnoDB storage engine. Possible "
  "values are 0, 1 (faster)"
  /*
    NetWare can't close unclosed files, can't automatically kill remaining
    threads, etc, so on this OS we disable the crash-like InnoDB shutdown.
  */
7720
  IF_NETWARE("", " or 2 (fastest - crash-like)")
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7721 7722 7723 7724 7725 7726 7727 7728 7729 7730
  ".",
  NULL, NULL, 1, 0, IF_NETWARE(1,2), 0);

static MYSQL_SYSVAR_BOOL(file_per_table, innobase_file_per_table,
  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
  "Stores each InnoDB table to an .ibd file in the database dir.",
  NULL, NULL, FALSE);

static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
  PLUGIN_VAR_OPCMDARG,
7731 7732
 "Set to 0 (write and flush once per second), 1 (write and flush at each commit)"
 " or 2 (write at commit, flush once per second).",
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7733 7734 7735 7736 7737 7738 7739 7740 7741 7742 7743 7744 7745 7746 7747
  NULL, NULL, 1, 0, 2, 0);

static MYSQL_SYSVAR_STR(flush_method, innobase_unix_file_flush_method,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "With which method to flush data.", NULL, NULL, NULL);

static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog,
  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
  "Force InnoDB to not use next-key locking, to use only row-level locking.",
  NULL, NULL, FALSE);

static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Where full logs should be archived.", NULL, NULL, NULL);

antony@ppcg5.local's avatar
antony@ppcg5.local committed
7748
#ifdef UNIV_LOG_ARCHIVE
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7749 7750 7751
static MYSQL_SYSVAR_BOOL(log_archive, innobase_log_archive,
  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
  "Set to 1 if you want to have logs archived.", NULL, NULL, FALSE);
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7752
#endif /* UNIV_LOG_ARCHIVE */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7753 7754 7755 7756 7757 7758 7759 7760 7761 7762 7763 7764 7765 7766 7767 7768 7769 7770 7771 7772 7773 7774 7775 7776 7777

static MYSQL_SYSVAR_STR(log_group_home_dir, innobase_log_group_home_dir,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Path to InnoDB log files.", NULL, NULL, NULL);

static MYSQL_SYSVAR_ULONG(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
  PLUGIN_VAR_RQCMDARG,
  "Percentage of dirty pages allowed in bufferpool.",
  NULL, NULL, 90, 0, 100, 0);

static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
  PLUGIN_VAR_RQCMDARG,
  "Desired maximum length of the purge queue (0 = no limit)",
  NULL, NULL, 0, 0, ~0L, 0);

static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout,
  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
  "Roll back the complete transaction on lock wait timeout, for 4.x compatibility (disabled by default)",
  NULL, NULL, FALSE);

static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file,
  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NOSYSVAR,
  "Enable SHOW INNODB STATUS output in the innodb_status.<pid> file",
  NULL, NULL, FALSE);

7778 7779 7780 7781 7782
static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata,
  PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NOSYSVAR,
  "Enable statistics gathering for metadata commands such as SHOW TABLE STATUS (on by default)",
  NULL, NULL, TRUE);

antony@ppcg5.local's avatar
antony@ppcg5.local committed
7783 7784 7785 7786 7787 7788 7789 7790 7791 7792 7793 7794 7795 7796 7797 7798 7799 7800 7801 7802 7803 7804 7805 7806 7807 7808 7809 7810 7811 7812 7813 7814 7815 7816 7817 7818 7819 7820 7821 7822 7823 7824 7825 7826 7827 7828 7829 7830 7831 7832 7833 7834 7835 7836 7837 7838 7839 7840 7841 7842 7843 7844 7845 7846 7847 7848 7849 7850 7851 7852 7853 7854 7855 7856 7857 7858 7859 7860 7861 7862 7863 7864 7865 7866 7867 7868 7869 7870 7871 7872 7873 7874 7875 7876 7877 7878 7879 7880 7881 7882 7883 7884 7885 7886
static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.",
  NULL, NULL, 1*1024*1024L, 512*1024L, ~0L, 1024);

static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment,
  PLUGIN_VAR_RQCMDARG,
  "Data file autoextend increment in megabytes",
  NULL, NULL, 8L, 1L, 1000L, 0);

static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
  NULL, NULL, 8*1024*1024L, 1024*1024L, LONGLONG_MAX, 1024*1024L);

static MYSQL_SYSVAR_ULONG(commit_concurrency, srv_commit_concurrency,
  PLUGIN_VAR_RQCMDARG,
  "Helps in performance tuning in heavily concurrent environments.",
  NULL, NULL, 0, 0, 1000, 0);

static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
  PLUGIN_VAR_RQCMDARG,
  "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
  NULL, NULL, 500L, 1L, ~0L, 0);

static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Number of file I/O threads in InnoDB.",
  NULL, NULL, 4, 4, 64, 0);

static MYSQL_SYSVAR_LONG(force_recovery, innobase_force_recovery,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Helps to save your data in case the disk image of the database becomes corrupt.",
  NULL, NULL, 0, 0, 6, 0);

static MYSQL_SYSVAR_LONG(lock_wait_timeout, innobase_lock_wait_timeout,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back.",
  NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);

static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "The size of the buffer which InnoDB uses to write log to the log files on disk.",
  NULL, NULL, 1024*1024L, 256*1024L, ~0L, 1024);

static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Size of each log file in a log group.",
  NULL, NULL, 5*1024*1024L, 1*1024*1024L, LONGLONG_MAX, 1024*1024L);

static MYSQL_SYSVAR_LONG(log_files_in_group, innobase_log_files_in_group,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Number of log files in the log group. InnoDB writes to the files in a circular fashion. Value 3 is recommended here.",
  NULL, NULL, 2, 2, 100, 0);

static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
  NULL, NULL, 1, 1, 10, 0);

static MYSQL_SYSVAR_LONG(open_files, innobase_open_files,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "How many files at the maximum InnoDB keeps open at the same time.",
  NULL, NULL, 300L, 10L, ~0L, 0);

static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
  PLUGIN_VAR_RQCMDARG,
  "Count of spin-loop rounds in InnoDB mutexes",
  NULL, NULL, 20L, 0L, ~0L, 0);

static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
  PLUGIN_VAR_RQCMDARG,
  "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
  NULL, NULL, 8, 0, 1000, 0);

static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
  PLUGIN_VAR_RQCMDARG,
  "Time of innodb thread sleeping before joining InnoDB queue (usec). Value 0 disable a sleep",
  NULL, NULL, 10000L, 0L, ~0L, 0);

static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path,
  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
  "Path to individual files and their sizes.",
  NULL, NULL, NULL);

static struct st_mysql_sys_var* innobase_system_variables[]= {
  MYSQL_SYSVAR(additional_mem_pool_size),
  MYSQL_SYSVAR(autoextend_increment),
  MYSQL_SYSVAR(buffer_pool_size),
  MYSQL_SYSVAR(checksums),
  MYSQL_SYSVAR(commit_concurrency),
  MYSQL_SYSVAR(concurrency_tickets),
  MYSQL_SYSVAR(data_file_path),
  MYSQL_SYSVAR(data_home_dir),
  MYSQL_SYSVAR(doublewrite),
  MYSQL_SYSVAR(fast_shutdown),
  MYSQL_SYSVAR(file_io_threads),
  MYSQL_SYSVAR(file_per_table),
  MYSQL_SYSVAR(flush_log_at_trx_commit),
  MYSQL_SYSVAR(flush_method),
  MYSQL_SYSVAR(force_recovery),
  MYSQL_SYSVAR(locks_unsafe_for_binlog),
  MYSQL_SYSVAR(lock_wait_timeout),
  MYSQL_SYSVAR(log_arch_dir),
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7887
#ifdef UNIV_LOG_ARCHIVE
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7888
  MYSQL_SYSVAR(log_archive),
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7889
#endif /* UNIV_LOG_ARCHIVE */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7890 7891 7892 7893 7894 7895 7896 7897 7898
  MYSQL_SYSVAR(log_buffer_size),
  MYSQL_SYSVAR(log_file_size),
  MYSQL_SYSVAR(log_files_in_group),
  MYSQL_SYSVAR(log_group_home_dir),
  MYSQL_SYSVAR(max_dirty_pages_pct),
  MYSQL_SYSVAR(max_purge_lag),
  MYSQL_SYSVAR(mirrored_log_groups),
  MYSQL_SYSVAR(open_files),
  MYSQL_SYSVAR(rollback_on_timeout),
7899
  MYSQL_SYSVAR(stats_on_metadata),
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7900 7901 7902 7903 7904 7905 7906 7907 7908
  MYSQL_SYSVAR(status_file),
  MYSQL_SYSVAR(support_xa),
  MYSQL_SYSVAR(sync_spin_loops),
  MYSQL_SYSVAR(table_locks),
  MYSQL_SYSVAR(thread_concurrency),
  MYSQL_SYSVAR(thread_sleep_delay),
  NULL
};

acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
7909 7910 7911
mysql_declare_plugin(innobase)
{
  MYSQL_STORAGE_ENGINE_PLUGIN,
7912
  &innobase_storage_engine,
7913
  innobase_hton_name,
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
7914
  "Innobase OY",
7915
  "Supports transactions, row-level locking, and foreign keys",
7916
  PLUGIN_LICENSE_GPL,
7917
  innobase_init, /* Plugin Init */
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
7918 7919
  NULL, /* Plugin Deinit */
  0x0100 /* 1.0 */,
7920
  innodb_status_variables_export,/* status variables             */
antony@ppcg5.local's avatar
antony@ppcg5.local committed
7921 7922
  innobase_system_variables, /* system variables */
  NULL /* reserved */
acurtis@xiphis.org's avatar
acurtis@xiphis.org committed
7923 7924 7925
}
mysql_declare_plugin_end;

7926
#endif