rtas.c 57.8 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-or-later
Linus Torvalds's avatar
Linus Torvalds committed
2 3 4 5 6 7 8 9
/*
 *
 * Procedures for interfacing to the RTAS on CHRP machines.
 *
 * Peter Bergner, IBM	March 2001.
 * Copyright (C) 2001 IBM.
 */

10 11
#define pr_fmt(fmt)	"rtas: " fmt

12
#include <linux/bsearch.h>
13
#include <linux/capability.h>
14
#include <linux/delay.h>
15 16
#include <linux/export.h>
#include <linux/init.h>
17
#include <linux/kconfig.h>
18
#include <linux/kernel.h>
19
#include <linux/lockdep.h>
Yinghai Lu's avatar
Yinghai Lu committed
20
#include <linux/memblock.h>
21
#include <linux/mutex.h>
22 23
#include <linux/of.h>
#include <linux/of_fdt.h>
24
#include <linux/reboot.h>
25
#include <linux/sched.h>
26
#include <linux/security.h>
27 28 29
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/stdarg.h>
30
#include <linux/syscalls.h>
31 32
#include <linux/types.h>
#include <linux/uaccess.h>
33
#include <linux/xarray.h>
Linus Torvalds's avatar
Linus Torvalds committed
34

35 36
#include <asm/delay.h>
#include <asm/firmware.h>
37
#include <asm/interrupt.h>
Linus Torvalds's avatar
Linus Torvalds committed
38
#include <asm/machdep.h>
39
#include <asm/mmu.h>
Linus Torvalds's avatar
Linus Torvalds committed
40
#include <asm/page.h>
41
#include <asm/rtas-work-area.h>
42
#include <asm/rtas.h>
43
#include <asm/time.h>
44
#include <asm/trace.h>
45
#include <asm/udbg.h>
Linus Torvalds's avatar
Linus Torvalds committed
46

47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
struct rtas_filter {
	/* Indexes into the args buffer, -1 if not used */
	const int buf_idx1;
	const int size_idx1;
	const int buf_idx2;
	const int size_idx2;
	/*
	 * Assumed buffer size per the spec if the function does not
	 * have a size parameter, e.g. ibm,errinjct. 0 if unused.
	 */
	const int fixed_size;
};

/**
 * struct rtas_function - Descriptor for RTAS functions.
 *
 * @token: Value of @name if it exists under the /rtas node.
 * @name: Function name.
 * @filter: If non-NULL, invoking this function via the rtas syscall is
 *          generally allowed, and @filter describes constraints on the
 *          arguments. See also @banned_for_syscall_on_le.
 * @banned_for_syscall_on_le: Set when call via sys_rtas is generally allowed
 *                            but specifically restricted on ppc64le. Such
 *                            functions are believed to have no users on
 *                            ppc64le, and we want to keep it that way. It does
 *                            not make sense for this to be set when @filter
73
 *                            is NULL.
74 75 76 77 78 79 80
 * @lock: Pointer to an optional dedicated per-function mutex. This
 *        should be set for functions that require multiple calls in
 *        sequence to complete a single operation, and such sequences
 *        will disrupt each other if allowed to interleave. Users of
 *        this function are required to hold the associated lock for
 *        the duration of the call sequence. Add an explanatory
 *        comment to the function table entry if setting this member.
81 82 83 84 85 86
 */
struct rtas_function {
	s32 token;
	const bool banned_for_syscall_on_le:1;
	const char * const name;
	const struct rtas_filter *filter;
87
	struct mutex *lock;
88 89
};

90 91 92 93 94 95 96 97 98 99 100
/*
 * Per-function locks for sequence-based RTAS functions.
 */
static DEFINE_MUTEX(rtas_ibm_activate_firmware_lock);
static DEFINE_MUTEX(rtas_ibm_get_dynamic_sensor_state_lock);
static DEFINE_MUTEX(rtas_ibm_get_indices_lock);
static DEFINE_MUTEX(rtas_ibm_lpar_perftools_lock);
static DEFINE_MUTEX(rtas_ibm_physical_attestation_lock);
static DEFINE_MUTEX(rtas_ibm_set_dynamic_indicator_lock);
DEFINE_MUTEX(rtas_ibm_get_vpd_lock);

101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
static struct rtas_function rtas_function_table[] __ro_after_init = {
	[RTAS_FNIDX__CHECK_EXCEPTION] = {
		.name = "check-exception",
	},
	[RTAS_FNIDX__DISPLAY_CHARACTER] = {
		.name = "display-character",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__EVENT_SCAN] = {
		.name = "event-scan",
	},
	[RTAS_FNIDX__FREEZE_TIME_BASE] = {
		.name = "freeze-time-base",
	},
	[RTAS_FNIDX__GET_POWER_LEVEL] = {
		.name = "get-power-level",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__GET_SENSOR_STATE] = {
		.name = "get-sensor-state",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__GET_TERM_CHAR] = {
		.name = "get-term-char",
	},
	[RTAS_FNIDX__GET_TIME_OF_DAY] = {
		.name = "get-time-of-day",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__IBM_ACTIVATE_FIRMWARE] = {
		.name = "ibm,activate-firmware",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
148 149 150 151 152 153 154
		/*
		 * PAPR+ as of v2.13 doesn't explicitly impose any
		 * restriction, but this typically requires multiple
		 * calls before success, and there's no reason to
		 * allow sequences to interleave.
		 */
		.lock = &rtas_ibm_activate_firmware_lock,
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
	},
	[RTAS_FNIDX__IBM_CBE_START_PTCAL] = {
		.name = "ibm,cbe-start-ptcal",
	},
	[RTAS_FNIDX__IBM_CBE_STOP_PTCAL] = {
		.name = "ibm,cbe-stop-ptcal",
	},
	[RTAS_FNIDX__IBM_CHANGE_MSI] = {
		.name = "ibm,change-msi",
	},
	[RTAS_FNIDX__IBM_CLOSE_ERRINJCT] = {
		.name = "ibm,close-errinjct",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__IBM_CONFIGURE_BRIDGE] = {
		.name = "ibm,configure-bridge",
	},
	[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR] = {
		.name = "ibm,configure-connector",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 0, .size_idx1 = -1,
			.buf_idx2 = 1, .size_idx2 = -1,
			.fixed_size = 4096,
		},
	},
	[RTAS_FNIDX__IBM_CONFIGURE_KERNEL_DUMP] = {
		.name = "ibm,configure-kernel-dump",
	},
	[RTAS_FNIDX__IBM_CONFIGURE_PE] = {
		.name = "ibm,configure-pe",
	},
	[RTAS_FNIDX__IBM_CREATE_PE_DMA_WINDOW] = {
		.name = "ibm,create-pe-dma-window",
	},
	[RTAS_FNIDX__IBM_DISPLAY_MESSAGE] = {
		.name = "ibm,display-message",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 0, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__IBM_ERRINJCT] = {
		.name = "ibm,errinjct",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 2, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
			.fixed_size = 1024,
		},
	},
	[RTAS_FNIDX__IBM_EXTI2C] = {
		.name = "ibm,exti2c",
	},
	[RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO] = {
		.name = "ibm,get-config-addr-info",
	},
	[RTAS_FNIDX__IBM_GET_CONFIG_ADDR_INFO2] = {
		.name = "ibm,get-config-addr-info2",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__IBM_GET_DYNAMIC_SENSOR_STATE] = {
		.name = "ibm,get-dynamic-sensor-state",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
226 227 228 229 230 231 232
		/*
		 * PAPR+ v2.13 R1–7.3.19–3 is explicit that the OS
		 * must not call ibm,get-dynamic-sensor-state with
		 * different inputs until a non-retry status has been
		 * returned.
		 */
		.lock = &rtas_ibm_get_dynamic_sensor_state_lock,
233 234 235 236 237 238 239
	},
	[RTAS_FNIDX__IBM_GET_INDICES] = {
		.name = "ibm,get-indices",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 2, .size_idx1 = 3,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
240 241 242 243 244 245
		/*
		 * PAPR+ v2.13 R1–7.3.17–2 says that the OS must not
		 * interleave ibm,get-indices call sequences with
		 * different inputs.
		 */
		.lock = &rtas_ibm_get_indices_lock,
246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
	},
	[RTAS_FNIDX__IBM_GET_RIO_TOPOLOGY] = {
		.name = "ibm,get-rio-topology",
	},
	[RTAS_FNIDX__IBM_GET_SYSTEM_PARAMETER] = {
		.name = "ibm,get-system-parameter",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 1, .size_idx1 = 2,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__IBM_GET_VPD] = {
		.name = "ibm,get-vpd",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 0, .size_idx1 = -1,
			.buf_idx2 = 1, .size_idx2 = 2,
		},
263 264 265 266 267
		/*
		 * PAPR+ v2.13 R1–7.3.20–4 indicates that sequences
		 * should not be allowed to interleave.
		 */
		.lock = &rtas_ibm_get_vpd_lock,
268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
	},
	[RTAS_FNIDX__IBM_GET_XIVE] = {
		.name = "ibm,get-xive",
	},
	[RTAS_FNIDX__IBM_INT_OFF] = {
		.name = "ibm,int-off",
	},
	[RTAS_FNIDX__IBM_INT_ON] = {
		.name = "ibm,int-on",
	},
	[RTAS_FNIDX__IBM_IO_QUIESCE_ACK] = {
		.name = "ibm,io-quiesce-ack",
	},
	[RTAS_FNIDX__IBM_LPAR_PERFTOOLS] = {
		.name = "ibm,lpar-perftools",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 2, .size_idx1 = 3,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
287 288 289 290 291
		/*
		 * PAPR+ v2.13 R1–7.3.26–6 says the OS should allow
		 * only one call sequence in progress at a time.
		 */
		.lock = &rtas_ibm_lpar_perftools_lock,
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
	},
	[RTAS_FNIDX__IBM_MANAGE_FLASH_IMAGE] = {
		.name = "ibm,manage-flash-image",
	},
	[RTAS_FNIDX__IBM_MANAGE_STORAGE_PRESERVATION] = {
		.name = "ibm,manage-storage-preservation",
	},
	[RTAS_FNIDX__IBM_NMI_INTERLOCK] = {
		.name = "ibm,nmi-interlock",
	},
	[RTAS_FNIDX__IBM_NMI_REGISTER] = {
		.name = "ibm,nmi-register",
	},
	[RTAS_FNIDX__IBM_OPEN_ERRINJCT] = {
		.name = "ibm,open-errinjct",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__IBM_OPEN_SRIOV_ALLOW_UNFREEZE] = {
		.name = "ibm,open-sriov-allow-unfreeze",
	},
	[RTAS_FNIDX__IBM_OPEN_SRIOV_MAP_PE_NUMBER] = {
		.name = "ibm,open-sriov-map-pe-number",
	},
	[RTAS_FNIDX__IBM_OS_TERM] = {
		.name = "ibm,os-term",
	},
	[RTAS_FNIDX__IBM_PARTNER_CONTROL] = {
		.name = "ibm,partner-control",
	},
	[RTAS_FNIDX__IBM_PHYSICAL_ATTESTATION] = {
		.name = "ibm,physical-attestation",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 0, .size_idx1 = 1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
330 331 332 333 334 335 336 337
		/*
		 * This follows a sequence-based pattern similar to
		 * ibm,get-vpd et al. Since PAPR+ restricts
		 * interleaving call sequences for other functions of
		 * this style, assume the restriction applies here,
		 * even though it's not explicit in the spec.
		 */
		.lock = &rtas_ibm_physical_attestation_lock,
338 339 340 341 342 343 344
	},
	[RTAS_FNIDX__IBM_PLATFORM_DUMP] = {
		.name = "ibm,platform-dump",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 4, .size_idx1 = 5,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
345 346 347 348 349 350 351
		/*
		 * PAPR+ v2.13 7.3.3.4.1 indicates that concurrent
		 * sequences of ibm,platform-dump are allowed if they
		 * are operating on different dump tags. So leave the
		 * lock pointer unset for now. This may need
		 * reconsideration if kernel-internal users appear.
		 */
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
	},
	[RTAS_FNIDX__IBM_POWER_OFF_UPS] = {
		.name = "ibm,power-off-ups",
	},
	[RTAS_FNIDX__IBM_QUERY_INTERRUPT_SOURCE_NUMBER] = {
		.name = "ibm,query-interrupt-source-number",
	},
	[RTAS_FNIDX__IBM_QUERY_PE_DMA_WINDOW] = {
		.name = "ibm,query-pe-dma-window",
	},
	[RTAS_FNIDX__IBM_READ_PCI_CONFIG] = {
		.name = "ibm,read-pci-config",
	},
	[RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE] = {
		.name = "ibm,read-slot-reset-state",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__IBM_READ_SLOT_RESET_STATE2] = {
		.name = "ibm,read-slot-reset-state2",
	},
	[RTAS_FNIDX__IBM_REMOVE_PE_DMA_WINDOW] = {
		.name = "ibm,remove-pe-dma-window",
	},
	[RTAS_FNIDX__IBM_RESET_PE_DMA_WINDOWS] = {
		.name = "ibm,reset-pe-dma-windows",
	},
	[RTAS_FNIDX__IBM_SCAN_LOG_DUMP] = {
		.name = "ibm,scan-log-dump",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 0, .size_idx1 = 1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__IBM_SET_DYNAMIC_INDICATOR] = {
		.name = "ibm,set-dynamic-indicator",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 2, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
394 395 396 397 398 399
		/*
		 * PAPR+ v2.13 R1–7.3.18–3 says the OS must not call
		 * this function with different inputs until a
		 * non-retry status has been returned.
		 */
		.lock = &rtas_ibm_set_dynamic_indicator_lock,
400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527
	},
	[RTAS_FNIDX__IBM_SET_EEH_OPTION] = {
		.name = "ibm,set-eeh-option",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__IBM_SET_SLOT_RESET] = {
		.name = "ibm,set-slot-reset",
	},
	[RTAS_FNIDX__IBM_SET_SYSTEM_PARAMETER] = {
		.name = "ibm,set-system-parameter",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__IBM_SET_XIVE] = {
		.name = "ibm,set-xive",
	},
	[RTAS_FNIDX__IBM_SLOT_ERROR_DETAIL] = {
		.name = "ibm,slot-error-detail",
	},
	[RTAS_FNIDX__IBM_SUSPEND_ME] = {
		.name = "ibm,suspend-me",
		.banned_for_syscall_on_le = true,
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__IBM_TUNE_DMA_PARMS] = {
		.name = "ibm,tune-dma-parms",
	},
	[RTAS_FNIDX__IBM_UPDATE_FLASH_64_AND_REBOOT] = {
		.name = "ibm,update-flash-64-and-reboot",
	},
	[RTAS_FNIDX__IBM_UPDATE_NODES] = {
		.name = "ibm,update-nodes",
		.banned_for_syscall_on_le = true,
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 0, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
			.fixed_size = 4096,
		},
	},
	[RTAS_FNIDX__IBM_UPDATE_PROPERTIES] = {
		.name = "ibm,update-properties",
		.banned_for_syscall_on_le = true,
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = 0, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
			.fixed_size = 4096,
		},
	},
	[RTAS_FNIDX__IBM_VALIDATE_FLASH_IMAGE] = {
		.name = "ibm,validate-flash-image",
	},
	[RTAS_FNIDX__IBM_WRITE_PCI_CONFIG] = {
		.name = "ibm,write-pci-config",
	},
	[RTAS_FNIDX__NVRAM_FETCH] = {
		.name = "nvram-fetch",
	},
	[RTAS_FNIDX__NVRAM_STORE] = {
		.name = "nvram-store",
	},
	[RTAS_FNIDX__POWER_OFF] = {
		.name = "power-off",
	},
	[RTAS_FNIDX__PUT_TERM_CHAR] = {
		.name = "put-term-char",
	},
	[RTAS_FNIDX__QUERY_CPU_STOPPED_STATE] = {
		.name = "query-cpu-stopped-state",
	},
	[RTAS_FNIDX__READ_PCI_CONFIG] = {
		.name = "read-pci-config",
	},
	[RTAS_FNIDX__RTAS_LAST_ERROR] = {
		.name = "rtas-last-error",
	},
	[RTAS_FNIDX__SET_INDICATOR] = {
		.name = "set-indicator",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__SET_POWER_LEVEL] = {
		.name = "set-power-level",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__SET_TIME_FOR_POWER_ON] = {
		.name = "set-time-for-power-on",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__SET_TIME_OF_DAY] = {
		.name = "set-time-of-day",
		.filter = &(const struct rtas_filter) {
			.buf_idx1 = -1, .size_idx1 = -1,
			.buf_idx2 = -1, .size_idx2 = -1,
		},
	},
	[RTAS_FNIDX__START_CPU] = {
		.name = "start-cpu",
	},
	[RTAS_FNIDX__STOP_SELF] = {
		.name = "stop-self",
	},
	[RTAS_FNIDX__SYSTEM_REBOOT] = {
		.name = "system-reboot",
	},
	[RTAS_FNIDX__THAW_TIME_BASE] = {
		.name = "thaw-time-base",
	},
	[RTAS_FNIDX__WRITE_PCI_CONFIG] = {
		.name = "write-pci-config",
	},
};

528 529 530 531 532
#define for_each_rtas_function(funcp)                                       \
	for (funcp = &rtas_function_table[0];                               \
	     funcp < &rtas_function_table[ARRAY_SIZE(rtas_function_table)]; \
	     ++funcp)

533 534 535 536 537 538 539 540 541 542
/*
 * Nearly all RTAS calls need to be serialized. All uses of the
 * default rtas_args block must hold rtas_lock.
 *
 * Exceptions to the RTAS serialization requirement (e.g. stop-self)
 * must use a separate rtas_args structure.
 */
static DEFINE_RAW_SPINLOCK(rtas_lock);
static struct rtas_args rtas_args;

543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568
/**
 * rtas_function_token() - RTAS function token lookup.
 * @handle: Function handle, e.g. RTAS_FN_EVENT_SCAN.
 *
 * Context: Any context.
 * Return: the token value for the function if implemented by this platform,
 *         otherwise RTAS_UNKNOWN_SERVICE.
 */
s32 rtas_function_token(const rtas_fn_handle_t handle)
{
	const size_t index = handle.index;
	const bool out_of_bounds = index >= ARRAY_SIZE(rtas_function_table);

	if (WARN_ONCE(out_of_bounds, "invalid function index %zu", index))
		return RTAS_UNKNOWN_SERVICE;
	/*
	 * Various drivers attempt token lookups on non-RTAS
	 * platforms.
	 */
	if (!rtas.dev)
		return RTAS_UNKNOWN_SERVICE;

	return rtas_function_table[index].token;
}
EXPORT_SYMBOL_GPL(rtas_function_token);

569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603
static int rtas_function_cmp(const void *a, const void *b)
{
	const struct rtas_function *f1 = a;
	const struct rtas_function *f2 = b;

	return strcmp(f1->name, f2->name);
}

/*
 * Boot-time initialization of the function table needs the lookup to
 * return a non-const-qualified object. Use rtas_name_to_function()
 * in all other contexts.
 */
static struct rtas_function *__rtas_name_to_function(const char *name)
{
	const struct rtas_function key = {
		.name = name,
	};
	struct rtas_function *found;

	found = bsearch(&key, rtas_function_table, ARRAY_SIZE(rtas_function_table),
			sizeof(rtas_function_table[0]), rtas_function_cmp);

	return found;
}

static const struct rtas_function *rtas_name_to_function(const char *name)
{
	return __rtas_name_to_function(name);
}

static DEFINE_XARRAY(rtas_token_to_function_xarray);

static int __init rtas_token_to_function_xarray_init(void)
{
604
	const struct rtas_function *func;
605 606
	int err = 0;

607
	for_each_rtas_function(func) {
608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
		const s32 token = func->token;

		if (token == RTAS_UNKNOWN_SERVICE)
			continue;

		err = xa_err(xa_store(&rtas_token_to_function_xarray,
				      token, (void *)func, GFP_KERNEL));
		if (err)
			break;
	}

	return err;
}
arch_initcall(rtas_token_to_function_xarray_init);

623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
/*
 * For use by sys_rtas(), where the token value is provided by user
 * space and we don't want to warn on failed lookups.
 */
static const struct rtas_function *rtas_token_to_function_untrusted(s32 token)
{
	return xa_load(&rtas_token_to_function_xarray, token);
}

/*
 * Reverse lookup for deriving the function descriptor from a
 * known-good token value in contexts where the former is not already
 * available. @token must be valid, e.g. derived from the result of a
 * prior lookup against the function table.
 */
638 639 640 641 642 643 644
static const struct rtas_function *rtas_token_to_function(s32 token)
{
	const struct rtas_function *func;

	if (WARN_ONCE(token < 0, "invalid token %d", token))
		return NULL;

645
	func = rtas_token_to_function_untrusted(token);
646 647 648 649 650 651 652 653 654 655 656 657
	if (func)
		return func;
	/*
	 * Fall back to linear scan in case the reverse mapping hasn't
	 * been initialized yet.
	 */
	if (xa_empty(&rtas_token_to_function_xarray)) {
		for_each_rtas_function(func) {
			if (func->token == token)
				return func;
		}
	}
658

659 660
	WARN_ONCE(true, "unexpected failed lookup for token %d", token);
	return NULL;
661 662
}

663 664 665
/* This is here deliberately so it's only used in this file */
void enter_rtas(unsigned long);

666
static void __do_enter_rtas(struct rtas_args *args)
667
{
668 669 670
	enter_rtas(__pa(args));
	srr_regs_clobbered(); /* rtas uses SRRs, invalidate */
}
671

672 673
static void __do_enter_rtas_trace(struct rtas_args *args)
{
674
	const struct rtas_function *func = rtas_token_to_function(be32_to_cpu(args->token));
675

676
	/*
677 678
	 * If there is a per-function lock, it must be held by the
	 * caller.
679
	 */
680 681
	if (func->lock)
		lockdep_assert_held(func->lock);
682

683 684
	if (args == &rtas_args)
		lockdep_assert_held(&rtas_lock);
685

686
	trace_rtas_input(args, func->name);
687 688 689 690 691
	trace_rtas_ll_entry(args);

	__do_enter_rtas(args);

	trace_rtas_ll_exit(args);
692
	trace_rtas_output(args, func->name);
693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715
}

static void do_enter_rtas(struct rtas_args *args)
{
	const unsigned long msr = mfmsr();
	/*
	 * Situations where we want to skip any active tracepoints for
	 * safety reasons:
	 *
	 * 1. The last code executed on an offline CPU as it stops,
	 *    i.e. we're about to call stop-self. The tracepoints'
	 *    function name lookup uses xarray, which uses RCU, which
	 *    isn't valid to call on an offline CPU.  Any events
	 *    emitted on an offline CPU will be discarded anyway.
	 *
	 * 2. In real mode, as when invoking ibm,nmi-interlock from
	 *    the pseries MCE handler. We cannot count on trace
	 *    buffers or the entries in rtas_token_to_function_xarray
	 *    to be contained in the RMO.
	 */
	const unsigned long mask = MSR_IR | MSR_DR;
	const bool can_trace = likely(cpu_online(raw_smp_processor_id()) &&
				      (msr & mask) == mask);
716 717 718 719 720 721
	/*
	 * Make sure MSR[RI] is currently enabled as it will be forced later
	 * in enter_rtas.
	 */
	BUG_ON(!(msr & MSR_RI));

722 723 724 725
	BUG_ON(!irqs_disabled());

	hard_irq_disable(); /* Ensure MSR[EE] is disabled on PPC64 */

726 727 728 729
	if (can_trace)
		__do_enter_rtas_trace(args);
	else
		__do_enter_rtas(args);
730 731
}

732 733
struct rtas_t rtas;

Linus Torvalds's avatar
Linus Torvalds committed
734
DEFINE_SPINLOCK(rtas_data_buf_lock);
735
EXPORT_SYMBOL_GPL(rtas_data_buf_lock);
736

737
char rtas_data_buf[RTAS_DATA_BUF_SIZE] __aligned(SZ_4K);
738
EXPORT_SYMBOL_GPL(rtas_data_buf);
739

Linus Torvalds's avatar
Linus Torvalds committed
740 741
unsigned long rtas_rmo_buf;

742 743 744 745 746
/*
 * If non-NULL, this gets called when the kernel terminates.
 * This is done like this so rtas_flash can be a module.
 */
void (*rtas_flash_term_hook)(int);
747
EXPORT_SYMBOL_GPL(rtas_flash_term_hook);
748

749 750 751 752 753
/*
 * call_rtas_display_status and call_rtas_display_status_delay
 * are designed only for very early low-level debugging, which
 * is why the token is hard-coded to 10.
 */
754
static void call_rtas_display_status(unsigned char c)
Linus Torvalds's avatar
Linus Torvalds committed
755
{
756
	unsigned long flags;
Linus Torvalds's avatar
Linus Torvalds committed
757 758 759 760

	if (!rtas.base)
		return;

761
	raw_spin_lock_irqsave(&rtas_lock, flags);
762
	rtas_call_unlocked(&rtas_args, 10, 1, 1, NULL, c);
763
	raw_spin_unlock_irqrestore(&rtas_lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
764 765
}

766
static void call_rtas_display_status_delay(char c)
Linus Torvalds's avatar
Linus Torvalds committed
767 768 769 770
{
	static int pending_newline = 0;  /* did last write end with unprinted newline? */
	static int width = 16;

771
	if (c == '\n') {
Linus Torvalds's avatar
Linus Torvalds committed
772 773 774
		while (width-- > 0)
			call_rtas_display_status(' ');
		width = 16;
775
		mdelay(500);
Linus Torvalds's avatar
Linus Torvalds committed
776 777 778 779 780
		pending_newline = 1;
	} else {
		if (pending_newline) {
			call_rtas_display_status('\r');
			call_rtas_display_status('\n');
781
		}
Linus Torvalds's avatar
Linus Torvalds committed
782 783 784 785 786 787 788 789
		pending_newline = 0;
		if (width--) {
			call_rtas_display_status(c);
			udelay(10000);
		}
	}
}

790
void __init udbg_init_rtas_panel(void)
791 792 793 794
{
	udbg_putc = call_rtas_display_status_delay;
}

795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854
#ifdef CONFIG_UDBG_RTAS_CONSOLE

/* If you think you're dying before early_init_dt_scan_rtas() does its
 * work, you can hard code the token values for your firmware here and
 * hardcode rtas.base/entry etc.
 */
static unsigned int rtas_putchar_token = RTAS_UNKNOWN_SERVICE;
static unsigned int rtas_getchar_token = RTAS_UNKNOWN_SERVICE;

static void udbg_rtascon_putc(char c)
{
	int tries;

	if (!rtas.base)
		return;

	/* Add CRs before LFs */
	if (c == '\n')
		udbg_rtascon_putc('\r');

	/* if there is more than one character to be displayed, wait a bit */
	for (tries = 0; tries < 16; tries++) {
		if (rtas_call(rtas_putchar_token, 1, 1, NULL, c) == 0)
			break;
		udelay(1000);
	}
}

static int udbg_rtascon_getc_poll(void)
{
	int c;

	if (!rtas.base)
		return -1;

	if (rtas_call(rtas_getchar_token, 0, 2, &c))
		return -1;

	return c;
}

static int udbg_rtascon_getc(void)
{
	int c;

	while ((c = udbg_rtascon_getc_poll()) == -1)
		;

	return c;
}


void __init udbg_init_rtas_console(void)
{
	udbg_putc = udbg_rtascon_putc;
	udbg_getc = udbg_rtascon_getc;
	udbg_getc_poll = udbg_rtascon_getc_poll;
}
#endif /* CONFIG_UDBG_RTAS_CONSOLE */

855
void rtas_progress(char *s, unsigned short hex)
856 857
{
	struct device_node *root;
858
	int width;
859
	const __be32 *p;
860 861
	char *os;
	static int display_character, set_indicator;
862
	static int display_width, display_lines, form_feed;
863
	static const int *row_width;
864
	static DEFINE_SPINLOCK(progress_lock);
865
	static int current_line;
866 867 868 869 870
	static int pending_newline = 0;  /* did last write end with unprinted newline? */

	if (!rtas.base)
		return;

871 872
	if (display_width == 0) {
		display_width = 0x10;
873
		if ((root = of_find_node_by_path("/rtas"))) {
874
			if ((p = of_get_property(root,
875
					"ibm,display-line-length", NULL)))
876
				display_width = be32_to_cpu(*p);
877
			if ((p = of_get_property(root,
878
					"ibm,form-feed", NULL)))
879
				form_feed = be32_to_cpu(*p);
880
			if ((p = of_get_property(root,
881
					"ibm,display-number-of-lines", NULL)))
882
				display_lines = be32_to_cpu(*p);
883
			row_width = of_get_property(root,
884
					"ibm,display-truncation-length", NULL);
885
			of_node_put(root);
886
		}
887 888
		display_character = rtas_function_token(RTAS_FN_DISPLAY_CHARACTER);
		set_indicator = rtas_function_token(RTAS_FN_SET_INDICATOR);
889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904
	}

	if (display_character == RTAS_UNKNOWN_SERVICE) {
		/* use hex display if available */
		if (set_indicator != RTAS_UNKNOWN_SERVICE)
			rtas_call(set_indicator, 3, 1, NULL, 6, 0, hex);
		return;
	}

	spin_lock(&progress_lock);

	/*
	 * Last write ended with newline, but we didn't print it since
	 * it would just clear the bottom line of output. Print it now
	 * instead.
	 *
905 906 907
	 * If no newline is pending and form feed is supported, clear the
	 * display with a form feed; otherwise, print a CR to start output
	 * at the beginning of the line.
908 909 910 911 912 913
	 */
	if (pending_newline) {
		rtas_call(display_character, 1, 1, NULL, '\r');
		rtas_call(display_character, 1, 1, NULL, '\n');
		pending_newline = 0;
	} else {
914 915 916 917 918 919
		current_line = 0;
		if (form_feed)
			rtas_call(display_character, 1, 1, NULL,
				  (char)form_feed);
		else
			rtas_call(display_character, 1, 1, NULL, '\r');
920
	}
921

922 923 924 925
	if (row_width)
		width = row_width[current_line];
	else
		width = display_width;
926 927 928 929 930 931 932 933 934
	os = s;
	while (*os) {
		if (*os == '\n' || *os == '\r') {
			/* If newline is the last character, save it
			 * until next call to avoid bumping up the
			 * display output.
			 */
			if (*os == '\n' && !os[1]) {
				pending_newline = 1;
935 936 937
				current_line++;
				if (current_line > display_lines-1)
					current_line = display_lines-1;
938 939 940
				spin_unlock(&progress_lock);
				return;
			}
941

942
			/* RTAS wants CR-LF, not just LF */
943

944 945 946 947 948 949 950 951 952
			if (*os == '\n') {
				rtas_call(display_character, 1, 1, NULL, '\r');
				rtas_call(display_character, 1, 1, NULL, '\n');
			} else {
				/* CR might be used to re-draw a line, so we'll
				 * leave it alone and not add LF.
				 */
				rtas_call(display_character, 1, 1, NULL, *os);
			}
953

954 955 956 957
			if (row_width)
				width = row_width[current_line];
			else
				width = display_width;
958 959 960 961
		} else {
			width--;
			rtas_call(display_character, 1, 1, NULL, *os);
		}
962

963
		os++;
964

965 966 967 968 969
		/* if we overwrite the screen length */
		if (width <= 0)
			while ((*os != 0) && (*os != '\n') && (*os != '\r'))
				os++;
	}
970

971 972
	spin_unlock(&progress_lock);
}
973
EXPORT_SYMBOL_GPL(rtas_progress);		/* needed by rtas_flash module */
974

975
int rtas_token(const char *service)
Linus Torvalds's avatar
Linus Torvalds committed
976
{
977
	const struct rtas_function *func;
978
	const __be32 *tokp;
979

980
	if (rtas.dev == NULL)
Linus Torvalds's avatar
Linus Torvalds committed
981
		return RTAS_UNKNOWN_SERVICE;
982 983 984 985 986 987 988 989 990 991 992 993 994 995

	func = rtas_name_to_function(service);
	if (func)
		return func->token;
	/*
	 * The caller is looking up a name that is not known to be an
	 * RTAS function. Either it's a function that needs to be
	 * added to the table, or they're misusing rtas_token() to
	 * access non-function properties of the /rtas node. Warn and
	 * fall back to the legacy behavior.
	 */
	WARN_ONCE(1, "unknown function `%s`, should it be added to rtas_function_table?\n",
		  service);

996
	tokp = of_get_property(rtas.dev, service, NULL);
997
	return tokp ? be32_to_cpu(*tokp) : RTAS_UNKNOWN_SERVICE;
Linus Torvalds's avatar
Linus Torvalds committed
998
}
999
EXPORT_SYMBOL_GPL(rtas_token);
Linus Torvalds's avatar
Linus Torvalds committed
1000

1001
#ifdef CONFIG_RTAS_ERROR_LOGGING
1002 1003 1004

static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX;

Linus Torvalds's avatar
Linus Torvalds committed
1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
/*
 * Return the firmware-specified size of the error log buffer
 *  for all rtas calls that require an error buffer argument.
 *  This includes 'check-exception' and 'rtas-last-error'.
 */
int rtas_get_error_log_max(void)
{
	return rtas_error_log_max;
}

1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034
static void __init init_error_log_max(void)
{
	static const char propname[] __initconst = "rtas-error-log-max";
	u32 max;

	if (of_property_read_u32(rtas.dev, propname, &max)) {
		pr_warn("%s not found, using default of %u\n",
			propname, RTAS_ERROR_LOG_MAX);
		max = RTAS_ERROR_LOG_MAX;
	}

	if (max > RTAS_ERROR_LOG_MAX) {
		pr_warn("%s = %u, clamping max error log size to %u\n",
			propname, max, RTAS_ERROR_LOG_MAX);
		max = RTAS_ERROR_LOG_MAX;
	}

	rtas_error_log_max = max;
}

Linus Torvalds's avatar
Linus Torvalds committed
1035

1036
static char rtas_err_buf[RTAS_ERROR_LOG_MAX];
1037

Linus Torvalds's avatar
Linus Torvalds committed
1038 1039 1040 1041
/** Return a copy of the detailed error text associated with the
 *  most recent failed call to rtas.  Because the error text
 *  might go stale if there are any other intervening rtas calls,
 *  this routine must be called atomically with whatever produced
1042
 *  the error (i.e. with rtas_lock still held from the previous call).
Linus Torvalds's avatar
Linus Torvalds committed
1043
 */
1044
static char *__fetch_rtas_last_error(char *altbuf)
Linus Torvalds's avatar
Linus Torvalds committed
1045
{
1046
	const s32 token = rtas_function_token(RTAS_FN_RTAS_LAST_ERROR);
Linus Torvalds's avatar
Linus Torvalds committed
1047 1048
	struct rtas_args err_args, save_args;
	u32 bufsz;
1049 1050
	char *buf = NULL;

1051 1052
	lockdep_assert_held(&rtas_lock);

1053
	if (token == -1)
1054
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1055 1056 1057

	bufsz = rtas_get_error_log_max();

1058
	err_args.token = cpu_to_be32(token);
1059 1060 1061 1062
	err_args.nargs = cpu_to_be32(2);
	err_args.nret = cpu_to_be32(1);
	err_args.args[0] = cpu_to_be32(__pa(rtas_err_buf));
	err_args.args[1] = cpu_to_be32(bufsz);
Linus Torvalds's avatar
Linus Torvalds committed
1063 1064
	err_args.args[2] = 0;

1065 1066
	save_args = rtas_args;
	rtas_args = err_args;
Linus Torvalds's avatar
Linus Torvalds committed
1067

1068
	do_enter_rtas(&rtas_args);
Linus Torvalds's avatar
Linus Torvalds committed
1069

1070 1071
	err_args = rtas_args;
	rtas_args = save_args;
Linus Torvalds's avatar
Linus Torvalds committed
1072

1073 1074 1075 1076 1077 1078
	/* Log the error in the unlikely case that there was one. */
	if (unlikely(err_args.args[2] == 0)) {
		if (altbuf) {
			buf = altbuf;
		} else {
			buf = rtas_err_buf;
1079
			if (slab_is_available())
1080 1081 1082
				buf = kmalloc(RTAS_ERROR_LOG_MAX, GFP_ATOMIC);
		}
		if (buf)
1083
			memmove(buf, rtas_err_buf, RTAS_ERROR_LOG_MAX);
1084 1085 1086
	}

	return buf;
Linus Torvalds's avatar
Linus Torvalds committed
1087 1088
}

1089 1090 1091 1092 1093
#define get_errorlog_buffer()	kmalloc(RTAS_ERROR_LOG_MAX, GFP_KERNEL)

#else /* CONFIG_RTAS_ERROR_LOGGING */
#define __fetch_rtas_last_error(x)	NULL
#define get_errorlog_buffer()		NULL
1094
static void __init init_error_log_max(void) {}
1095 1096
#endif

1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114

static void
va_rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret,
		      va_list list)
{
	int i;

	args->token = cpu_to_be32(token);
	args->nargs = cpu_to_be32(nargs);
	args->nret  = cpu_to_be32(nret);
	args->rets  = &(args->args[nargs]);

	for (i = 0; i < nargs; ++i)
		args->args[i] = cpu_to_be32(va_arg(list, __u32));

	for (i = 0; i < nret; ++i)
		args->rets[i] = 0;

1115
	do_enter_rtas(args);
1116 1117
}

1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134
/**
 * rtas_call_unlocked() - Invoke an RTAS firmware function without synchronization.
 * @args: RTAS parameter block to be used for the call, must obey RTAS addressing
 *        constraints.
 * @token: Identifies the function being invoked.
 * @nargs: Number of input parameters. Does not include token.
 * @nret: Number of output parameters, including the call status.
 * @....: List of @nargs input parameters.
 *
 * Invokes the RTAS function indicated by @token, which the caller
 * should obtain via rtas_function_token().
 *
 * This function is similar to rtas_call(), but must be used with a
 * limited set of RTAS calls specifically exempted from the general
 * requirement that only one RTAS call may be in progress at any
 * time. Examples include stop-self and ibm,nmi-interlock.
 */
1135 1136 1137 1138 1139 1140 1141 1142 1143
void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret, ...)
{
	va_list list;

	va_start(list, nret);
	va_rtas_call_unlocked(args, token, nargs, nret, list);
	va_end(list);
}

1144 1145 1146 1147 1148
static bool token_is_restricted_errinjct(s32 token)
{
	return token == rtas_function_token(RTAS_FN_IBM_OPEN_ERRINJCT) ||
	       token == rtas_function_token(RTAS_FN_IBM_ERRINJCT);
}
1149

1150 1151 1152 1153 1154 1155 1156 1157 1158
/**
 * rtas_call() - Invoke an RTAS firmware function.
 * @token: Identifies the function being invoked.
 * @nargs: Number of input parameters. Does not include token.
 * @nret: Number of output parameters, including the call status.
 * @outputs: Array of @nret output words.
 * @....: List of @nargs input parameters.
 *
 * Invokes the RTAS function indicated by @token, which the caller
1159
 * should obtain via rtas_function_token().
1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207
 *
 * The @nargs and @nret arguments must match the number of input and
 * output parameters specified for the RTAS function.
 *
 * rtas_call() returns RTAS status codes, not conventional Linux errno
 * values. Callers must translate any failure to an appropriate errno
 * in syscall context. Most callers of RTAS functions that can return
 * -2 or 990x should use rtas_busy_delay() to correctly handle those
 * statuses before calling again.
 *
 * The return value descriptions are adapted from 7.2.8 [RTAS] Return
 * Codes of the PAPR and CHRP specifications.
 *
 * Context: Process context preferably, interrupt context if
 *          necessary.  Acquires an internal spinlock and may perform
 *          GFP_ATOMIC slab allocation in error path. Unsafe for NMI
 *          context.
 * Return:
 * *                          0 - RTAS function call succeeded.
 * *                         -1 - RTAS function encountered a hardware or
 *                                platform error, or the token is invalid,
 *                                or the function is restricted by kernel policy.
 * *                         -2 - Specs say "A necessary hardware device was busy,
 *                                and the requested function could not be
 *                                performed. The operation should be retried at
 *                                a later time." This is misleading, at least with
 *                                respect to current RTAS implementations. What it
 *                                usually means in practice is that the function
 *                                could not be completed while meeting RTAS's
 *                                deadline for returning control to the OS (250us
 *                                for PAPR/PowerVM, typically), but the call may be
 *                                immediately reattempted to resume work on it.
 * *                         -3 - Parameter error.
 * *                         -7 - Unexpected state change.
 * *                9000...9899 - Vendor-specific success codes.
 * *                9900...9905 - Advisory extended delay. Caller should try
 *                                again after ~10^x ms has elapsed, where x is
 *                                the last digit of the status [0-5]. Again going
 *                                beyond the PAPR text, 990x on PowerVM indicates
 *                                contention for RTAS-internal resources. Other
 *                                RTAS call sequences in progress should be
 *                                allowed to complete before reattempting the
 *                                call.
 * *                      -9000 - Multi-level isolation error.
 * *              -9999...-9004 - Vendor-specific error codes.
 * * Additional negative values - Function-specific error.
 * * Additional positive values - Function-specific success.
 */
Linus Torvalds's avatar
Linus Torvalds committed
1208 1209
int rtas_call(int token, int nargs, int nret, int *outputs, ...)
{
1210
	struct pin_cookie cookie;
Linus Torvalds's avatar
Linus Torvalds committed
1211
	va_list list;
1212
	int i;
1213
	unsigned long flags;
1214
	struct rtas_args *args;
1215
	char *buff_copy = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
1216 1217
	int ret;

1218
	if (!rtas.entry || token == RTAS_UNKNOWN_SERVICE)
Linus Torvalds's avatar
Linus Torvalds committed
1219 1220
		return -1;

1221
	if (token_is_restricted_errinjct(token)) {
1222 1223 1224 1225 1226 1227 1228 1229 1230
		/*
		 * It would be nicer to not discard the error value
		 * from security_locked_down(), but callers expect an
		 * RTAS status, not an errno.
		 */
		if (security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION))
			return -1;
	}

1231 1232 1233 1234 1235
	if ((mfmsr() & (MSR_IR|MSR_DR)) != (MSR_IR|MSR_DR)) {
		WARN_ON_ONCE(1);
		return -1;
	}

1236
	raw_spin_lock_irqsave(&rtas_lock, flags);
1237 1238
	cookie = lockdep_pin_lock(&rtas_lock);

1239
	/* We use the global rtas args buffer */
1240
	args = &rtas_args;
Linus Torvalds's avatar
Linus Torvalds committed
1241 1242

	va_start(list, outputs);
1243
	va_rtas_call_unlocked(args, token, nargs, nret, list);
Linus Torvalds's avatar
Linus Torvalds committed
1244 1245 1246 1247
	va_end(list);

	/* A -1 return code indicates that the last command couldn't
	   be completed due to a hardware error. */
1248
	if (be32_to_cpu(args->rets[0]) == -1)
1249
		buff_copy = __fetch_rtas_last_error(NULL);
Linus Torvalds's avatar
Linus Torvalds committed
1250 1251 1252

	if (nret > 1 && outputs != NULL)
		for (i = 0; i < nret-1; ++i)
1253 1254
			outputs[i] = be32_to_cpu(args->rets[i + 1]);
	ret = (nret > 0) ? be32_to_cpu(args->rets[0]) : 0;
Linus Torvalds's avatar
Linus Torvalds committed
1255

1256
	lockdep_unpin_lock(&rtas_lock, cookie);
1257
	raw_spin_unlock_irqrestore(&rtas_lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
1258 1259 1260

	if (buff_copy) {
		log_error(buff_copy, ERR_TYPE_RTAS_LOG, 0);
1261
		if (slab_is_available())
Linus Torvalds's avatar
Linus Torvalds committed
1262 1263 1264 1265
			kfree(buff_copy);
	}
	return ret;
}
1266
EXPORT_SYMBOL_GPL(rtas_call);
Linus Torvalds's avatar
Linus Torvalds committed
1267

1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286
/**
 * rtas_busy_delay_time() - From an RTAS status value, calculate the
 *                          suggested delay time in milliseconds.
 *
 * @status: a value returned from rtas_call() or similar APIs which return
 *          the status of a RTAS function call.
 *
 * Context: Any context.
 *
 * Return:
 * * 100000 - If @status is 9905.
 * * 10000  - If @status is 9904.
 * * 1000   - If @status is 9903.
 * * 100    - If @status is 9902.
 * * 10     - If @status is 9901.
 * * 1      - If @status is either 9900 or -2. This is "wrong" for -2, but
 *            some callers depend on this behavior, and the worst outcome
 *            is that they will delay for longer than necessary.
 * * 0      - If @status is not a busy or extended delay value.
Linus Torvalds's avatar
Linus Torvalds committed
1287
 */
1288
unsigned int rtas_busy_delay_time(int status)
Linus Torvalds's avatar
Linus Torvalds committed
1289
{
1290 1291 1292 1293 1294
	int order;
	unsigned int ms = 0;

	if (status == RTAS_BUSY) {
		ms = 1;
1295 1296 1297
	} else if (status >= RTAS_EXTENDED_DELAY_MIN &&
		   status <= RTAS_EXTENDED_DELAY_MAX) {
		order = status - RTAS_EXTENDED_DELAY_MIN;
1298 1299 1300
		for (ms = 1; order > 0; order--)
			ms *= 10;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1301

1302 1303
	return ms;
}
Linus Torvalds's avatar
Linus Torvalds committed
1304

1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345
/*
 * Early boot fallback for rtas_busy_delay().
 */
static bool __init rtas_busy_delay_early(int status)
{
	static size_t successive_ext_delays __initdata;
	bool retry;

	switch (status) {
	case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
		/*
		 * In the unlikely case that we receive an extended
		 * delay status in early boot, the OS is probably not
		 * the cause, and there's nothing we can do to clear
		 * the condition. Best we can do is delay for a bit
		 * and hope it's transient. Lie to the caller if it
		 * seems like we're stuck in a retry loop.
		 */
		mdelay(1);
		retry = true;
		successive_ext_delays += 1;
		if (successive_ext_delays > 1000) {
			pr_err("too many extended delays, giving up\n");
			dump_stack();
			retry = false;
			successive_ext_delays = 0;
		}
		break;
	case RTAS_BUSY:
		retry = true;
		successive_ext_delays = 0;
		break;
	default:
		retry = false;
		successive_ext_delays = 0;
		break;
	}

	return retry;
}

1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363
/**
 * rtas_busy_delay() - helper for RTAS busy and extended delay statuses
 *
 * @status: a value returned from rtas_call() or similar APIs which return
 *          the status of a RTAS function call.
 *
 * Context: Process context. May sleep or schedule.
 *
 * Return:
 * * true  - @status is RTAS_BUSY or an extended delay hint. The
 *           caller may assume that the CPU has been yielded if necessary,
 *           and that an appropriate delay for @status has elapsed.
 *           Generally the caller should reattempt the RTAS call which
 *           yielded @status.
 *
 * * false - @status is not @RTAS_BUSY nor an extended delay hint. The
 *           caller is responsible for handling @status.
 */
1364
bool __ref rtas_busy_delay(int status)
1365 1366
{
	unsigned int ms;
1367 1368
	bool ret;

1369 1370 1371 1372 1373 1374
	/*
	 * Can't do timed sleeps before timekeeping is up.
	 */
	if (system_state < SYSTEM_SCHEDULING)
		return rtas_busy_delay_early(status);

1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420
	switch (status) {
	case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX:
		ret = true;
		ms = rtas_busy_delay_time(status);
		/*
		 * The extended delay hint can be as high as 100 seconds.
		 * Surely any function returning such a status is either
		 * buggy or isn't going to be significantly slowed by us
		 * polling at 1HZ. Clamp the sleep time to one second.
		 */
		ms = clamp(ms, 1U, 1000U);
		/*
		 * The delay hint is an order-of-magnitude suggestion, not
		 * a minimum. It is fine, possibly even advantageous, for
		 * us to pause for less time than hinted. For small values,
		 * use usleep_range() to ensure we don't sleep much longer
		 * than actually needed.
		 *
		 * See Documentation/timers/timers-howto.rst for
		 * explanation of the threshold used here. In effect we use
		 * usleep_range() for 9900 and 9901, msleep() for
		 * 9902-9905.
		 */
		if (ms <= 20)
			usleep_range(ms * 100, ms * 1000);
		else
			msleep(ms);
		break;
	case RTAS_BUSY:
		ret = true;
		/*
		 * We should call again immediately if there's no other
		 * work to do.
		 */
		cond_resched();
		break;
	default:
		ret = false;
		/*
		 * Not a busy or extended delay status; the caller should
		 * handle @status itself. Ensure we warn on misuses in
		 * atomic context regardless.
		 */
		might_sleep();
		break;
	}
Linus Torvalds's avatar
Linus Torvalds committed
1421

1422
	return ret;
Linus Torvalds's avatar
Linus Torvalds committed
1423
}
1424
EXPORT_SYMBOL_GPL(rtas_busy_delay);
Linus Torvalds's avatar
Linus Torvalds committed
1425

1426
int rtas_error_rc(int rtas_rc)
Linus Torvalds's avatar
Linus Torvalds committed
1427 1428 1429 1430
{
	int rc;

	switch (rtas_rc) {
1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449
	case RTAS_HARDWARE_ERROR:	/* Hardware Error */
		rc = -EIO;
		break;
	case RTAS_INVALID_PARAMETER:	/* Bad indicator/domain/etc */
		rc = -EINVAL;
		break;
	case -9000:			/* Isolation error */
		rc = -EFAULT;
		break;
	case -9001:			/* Outstanding TCE/PTE */
		rc = -EEXIST;
		break;
	case -9002:			/* No usable slot */
		rc = -ENODEV;
		break;
	default:
		pr_err("%s: unexpected error %d\n", __func__, rtas_rc);
		rc = -ERANGE;
		break;
Linus Torvalds's avatar
Linus Torvalds committed
1450 1451 1452
	}
	return rc;
}
1453
EXPORT_SYMBOL_GPL(rtas_error_rc);
Linus Torvalds's avatar
Linus Torvalds committed
1454 1455 1456

int rtas_get_power_level(int powerdomain, int *level)
{
1457
	int token = rtas_function_token(RTAS_FN_GET_POWER_LEVEL);
Linus Torvalds's avatar
Linus Torvalds committed
1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469
	int rc;

	if (token == RTAS_UNKNOWN_SERVICE)
		return -ENOENT;

	while ((rc = rtas_call(token, 1, 2, level, powerdomain)) == RTAS_BUSY)
		udelay(1);

	if (rc < 0)
		return rtas_error_rc(rc);
	return rc;
}
1470
EXPORT_SYMBOL_GPL(rtas_get_power_level);
Linus Torvalds's avatar
Linus Torvalds committed
1471 1472 1473

int rtas_set_power_level(int powerdomain, int level, int *setlevel)
{
1474
	int token = rtas_function_token(RTAS_FN_SET_POWER_LEVEL);
Linus Torvalds's avatar
Linus Torvalds committed
1475 1476 1477 1478 1479
	int rc;

	if (token == RTAS_UNKNOWN_SERVICE)
		return -ENOENT;

1480
	do {
Linus Torvalds's avatar
Linus Torvalds committed
1481
		rc = rtas_call(token, 2, 2, setlevel, powerdomain, level);
1482
	} while (rtas_busy_delay(rc));
Linus Torvalds's avatar
Linus Torvalds committed
1483 1484 1485 1486 1487

	if (rc < 0)
		return rtas_error_rc(rc);
	return rc;
}
1488
EXPORT_SYMBOL_GPL(rtas_set_power_level);
Linus Torvalds's avatar
Linus Torvalds committed
1489 1490 1491

int rtas_get_sensor(int sensor, int index, int *state)
{
1492
	int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE);
Linus Torvalds's avatar
Linus Torvalds committed
1493 1494 1495 1496 1497
	int rc;

	if (token == RTAS_UNKNOWN_SERVICE)
		return -ENOENT;

1498
	do {
Linus Torvalds's avatar
Linus Torvalds committed
1499
		rc = rtas_call(token, 2, 2, state, sensor, index);
1500
	} while (rtas_busy_delay(rc));
Linus Torvalds's avatar
Linus Torvalds committed
1501 1502 1503 1504 1505

	if (rc < 0)
		return rtas_error_rc(rc);
	return rc;
}
1506
EXPORT_SYMBOL_GPL(rtas_get_sensor);
Linus Torvalds's avatar
Linus Torvalds committed
1507

1508 1509
int rtas_get_sensor_fast(int sensor, int index, int *state)
{
1510
	int token = rtas_function_token(RTAS_FN_GET_SENSOR_STATE);
1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524
	int rc;

	if (token == RTAS_UNKNOWN_SERVICE)
		return -ENOENT;

	rc = rtas_call(token, 2, 2, state, sensor, index);
	WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
				    rc <= RTAS_EXTENDED_DELAY_MAX));

	if (rc < 0)
		return rtas_error_rc(rc);
	return rc;
}

1525 1526 1527 1528
bool rtas_indicator_present(int token, int *maxindex)
{
	int proplen, count, i;
	const struct indicator_elem {
1529 1530
		__be32 token;
		__be32 maxindex;
1531 1532 1533 1534 1535 1536 1537 1538 1539
	} *indicators;

	indicators = of_get_property(rtas.dev, "rtas-indicators", &proplen);
	if (!indicators)
		return false;

	count = proplen / sizeof(struct indicator_elem);

	for (i = 0; i < count; i++) {
1540
		if (__be32_to_cpu(indicators[i].token) != token)
1541 1542
			continue;
		if (maxindex)
1543
			*maxindex = __be32_to_cpu(indicators[i].maxindex);
1544 1545 1546 1547 1548 1549
		return true;
	}

	return false;
}

Linus Torvalds's avatar
Linus Torvalds committed
1550 1551
int rtas_set_indicator(int indicator, int index, int new_value)
{
1552
	int token = rtas_function_token(RTAS_FN_SET_INDICATOR);
Linus Torvalds's avatar
Linus Torvalds committed
1553 1554 1555 1556 1557
	int rc;

	if (token == RTAS_UNKNOWN_SERVICE)
		return -ENOENT;

1558
	do {
Linus Torvalds's avatar
Linus Torvalds committed
1559
		rc = rtas_call(token, 3, 1, NULL, indicator, index, new_value);
1560
	} while (rtas_busy_delay(rc));
Linus Torvalds's avatar
Linus Torvalds committed
1561 1562 1563 1564 1565

	if (rc < 0)
		return rtas_error_rc(rc);
	return rc;
}
1566
EXPORT_SYMBOL_GPL(rtas_set_indicator);
Linus Torvalds's avatar
Linus Torvalds committed
1567

1568 1569 1570 1571 1572
/*
 * Ignoring RTAS extended delay
 */
int rtas_set_indicator_fast(int indicator, int index, int new_value)
{
1573
	int token = rtas_function_token(RTAS_FN_SET_INDICATOR);
1574 1575 1576 1577 1578 1579 1580
	int rc;

	if (token == RTAS_UNKNOWN_SERVICE)
		return -ENOENT;

	rc = rtas_call(token, 3, 1, NULL, indicator, index, new_value);

1581 1582
	WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN &&
				    rc <= RTAS_EXTENDED_DELAY_MAX));
1583 1584 1585 1586 1587 1588 1589

	if (rc < 0)
		return rtas_error_rc(rc);

	return rc;
}

1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615
/**
 * rtas_ibm_suspend_me() - Call ibm,suspend-me to suspend the LPAR.
 *
 * @fw_status: RTAS call status will be placed here if not NULL.
 *
 * rtas_ibm_suspend_me() should be called only on a CPU which has
 * received H_CONTINUE from the H_JOIN hcall. All other active CPUs
 * should be waiting to return from H_JOIN.
 *
 * rtas_ibm_suspend_me() may suspend execution of the OS
 * indefinitely. Callers should take appropriate measures upon return, such as
 * resetting watchdog facilities.
 *
 * Callers may choose to retry this call if @fw_status is
 * %RTAS_THREADS_ACTIVE.
 *
 * Return:
 * 0          - The partition has resumed from suspend, possibly after
 *              migration to a different host.
 * -ECANCELED - The operation was aborted.
 * -EAGAIN    - There were other CPUs not in H_JOIN at the time of the call.
 * -EBUSY     - Some other condition prevented the suspend from succeeding.
 * -EIO       - Hardware/platform error.
 */
int rtas_ibm_suspend_me(int *fw_status)
{
1616
	int token = rtas_function_token(RTAS_FN_IBM_SUSPEND_ME);
1617 1618 1619
	int fwrc;
	int ret;

1620
	fwrc = rtas_call(token, 0, 1, NULL);
1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647

	switch (fwrc) {
	case 0:
		ret = 0;
		break;
	case RTAS_SUSPEND_ABORTED:
		ret = -ECANCELED;
		break;
	case RTAS_THREADS_ACTIVE:
		ret = -EAGAIN;
		break;
	case RTAS_NOT_SUSPENDABLE:
	case RTAS_OUTSTANDING_COPROC:
		ret = -EBUSY;
		break;
	case -1:
	default:
		ret = -EIO;
		break;
	}

	if (fw_status)
		*fw_status = fwrc;

	return ret;
}

1648
void __noreturn rtas_restart(char *cmd)
Linus Torvalds's avatar
Linus Torvalds committed
1649
{
1650 1651
	if (rtas_flash_term_hook)
		rtas_flash_term_hook(SYS_RESTART);
1652
	pr_emerg("system-reboot returned %d\n",
1653
		 rtas_call(rtas_function_token(RTAS_FN_SYSTEM_REBOOT), 0, 1, NULL));
Linus Torvalds's avatar
Linus Torvalds committed
1654 1655 1656
	for (;;);
}

1657
void rtas_power_off(void)
Linus Torvalds's avatar
Linus Torvalds committed
1658
{
1659 1660
	if (rtas_flash_term_hook)
		rtas_flash_term_hook(SYS_POWER_OFF);
Linus Torvalds's avatar
Linus Torvalds committed
1661
	/* allow power on only with power button press */
1662
	pr_emerg("power-off returned %d\n",
1663
		 rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1));
Linus Torvalds's avatar
Linus Torvalds committed
1664 1665 1666
	for (;;);
}

1667
void __noreturn rtas_halt(void)
Linus Torvalds's avatar
Linus Torvalds committed
1668
{
1669 1670 1671
	if (rtas_flash_term_hook)
		rtas_flash_term_hook(SYS_HALT);
	/* allow power on only with power button press */
1672
	pr_emerg("power-off returned %d\n",
1673
		 rtas_call(rtas_function_token(RTAS_FN_POWER_OFF), 2, 1, NULL, -1, -1));
1674
	for (;;);
Linus Torvalds's avatar
Linus Torvalds committed
1675 1676 1677 1678
}

/* Must be in the RMO region, so we place it here */
static char rtas_os_term_buf[2048];
1679
static bool ibm_extended_os_term;
Linus Torvalds's avatar
Linus Torvalds committed
1680

1681
void rtas_os_term(char *str)
1682
{
1683
	s32 token = rtas_function_token(RTAS_FN_IBM_OS_TERM);
1684
	static struct rtas_args args;
1685
	int status;
1686

1687 1688 1689 1690 1691 1692
	/*
	 * Firmware with the ibm,extended-os-term property is guaranteed
	 * to always return from an ibm,os-term call. Earlier versions without
	 * this property may terminate the partition which we want to avoid
	 * since it interferes with panic_timeout.
	 */
1693 1694

	if (token == RTAS_UNKNOWN_SERVICE || !ibm_extended_os_term)
Linus Torvalds's avatar
Linus Torvalds committed
1695 1696
		return;

1697 1698
	snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str);

1699 1700 1701 1702 1703
	/*
	 * Keep calling as long as RTAS returns a "try again" status,
	 * but don't use rtas_busy_delay(), which potentially
	 * schedules.
	 */
Linus Torvalds's avatar
Linus Torvalds committed
1704
	do {
1705 1706
		rtas_call_unlocked(&args, token, 1, 1, NULL, __pa(rtas_os_term_buf));
		status = be32_to_cpu(args.rets[0]);
1707
	} while (rtas_busy_delay_time(status));
Linus Torvalds's avatar
Linus Torvalds committed
1708

1709
	if (status != 0)
1710
		pr_emerg("ibm,os-term call failed %d\n", status);
Linus Torvalds's avatar
Linus Torvalds committed
1711 1712
}

1713 1714 1715
/**
 * rtas_activate_firmware() - Activate a new version of firmware.
 *
1716 1717
 * Context: This function may sleep.
 *
1718 1719 1720 1721 1722 1723 1724 1725
 * Activate a new version of partition firmware. The OS must call this
 * after resuming from a partition hibernation or migration in order
 * to maintain the ability to perform live firmware updates. It's not
 * catastrophic for this method to be absent or to fail; just log the
 * condition in that case.
 */
void rtas_activate_firmware(void)
{
1726
	int token = rtas_function_token(RTAS_FN_IBM_ACTIVATE_FIRMWARE);
1727 1728 1729 1730 1731 1732 1733
	int fwrc;

	if (token == RTAS_UNKNOWN_SERVICE) {
		pr_notice("ibm,activate-firmware method unavailable\n");
		return;
	}

1734 1735
	mutex_lock(&rtas_ibm_activate_firmware_lock);

1736 1737 1738 1739
	do {
		fwrc = rtas_call(token, 0, 1, NULL);
	} while (rtas_busy_delay(fwrc));

1740 1741
	mutex_unlock(&rtas_ibm_activate_firmware_lock);

1742 1743 1744 1745
	if (fwrc)
		pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
}

1746
/**
1747 1748
 * get_pseries_errorlog() - Find a specific pseries error log in an RTAS
 *                          extended event log.
1749 1750 1751
 * @log: RTAS error/event log
 * @section_id: two character section identifier
 *
1752
 * Return: A pointer to the specified errorlog or NULL if not found.
1753
 */
1754 1755
noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
						      uint16_t section_id)
1756 1757 1758 1759 1760
{
	struct rtas_ext_event_log_v6 *ext_log =
		(struct rtas_ext_event_log_v6 *)log->buffer;
	struct pseries_errorlog *sect;
	unsigned char *p, *log_end;
1761 1762 1763
	uint32_t ext_log_length = rtas_error_extended_log_length(log);
	uint8_t log_format = rtas_ext_event_log_format(ext_log);
	uint32_t company_id = rtas_ext_event_company_id(ext_log);
1764 1765

	/* Check that we understand the format */
1766 1767 1768
	if (ext_log_length < sizeof(struct rtas_ext_event_log_v6) ||
	    log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG ||
	    company_id != RTAS_V6EXT_COMPANY_ID_IBM)
1769 1770
		return NULL;

1771
	log_end = log->buffer + ext_log_length;
1772 1773 1774 1775
	p = ext_log->vendor_log;

	while (p < log_end) {
		sect = (struct pseries_errorlog *)p;
1776
		if (pseries_errorlog_id(sect) == section_id)
1777
			return sect;
1778
		p += pseries_errorlog_length(sect);
1779 1780 1781 1782 1783
	}

	return NULL;
}

1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796
/*
 * The sys_rtas syscall, as originally designed, allows root to pass
 * arbitrary physical addresses to RTAS calls. A number of RTAS calls
 * can be abused to write to arbitrary memory and do other things that
 * are potentially harmful to system integrity, and thus should only
 * be used inside the kernel and not exposed to userspace.
 *
 * All known legitimate users of the sys_rtas syscall will only ever
 * pass addresses that fall within the RMO buffer, and use a known
 * subset of RTAS calls.
 *
 * Accordingly, we filter RTAS requests to check that the call is
 * permitted, and that provided pointers fall within the RMO buffer.
1797 1798 1799 1800 1801
 * If a function is allowed to be invoked via the syscall, then its
 * entry in the rtas_functions table points to a rtas_filter that
 * describes its constraints, with the indexes of the parameters which
 * are expected to contain addresses and sizes of buffers allocated
 * inside the RMO buffer.
1802 1803 1804 1805 1806
 */

static bool in_rmo_buf(u32 base, u32 end)
{
	return base >= rtas_rmo_buf &&
1807
		base < (rtas_rmo_buf + RTAS_USER_REGION_SIZE) &&
1808 1809
		base <= end &&
		end >= rtas_rmo_buf &&
1810
		end < (rtas_rmo_buf + RTAS_USER_REGION_SIZE);
1811 1812
}

1813
static bool block_rtas_call(const struct rtas_function *func, int nargs,
1814 1815
			    struct rtas_args *args)
{
1816
	const struct rtas_filter *f;
1817 1818 1819 1820
	const bool is_platform_dump =
		func == &rtas_function_table[RTAS_FNIDX__IBM_PLATFORM_DUMP];
	const bool is_config_conn =
		func == &rtas_function_table[RTAS_FNIDX__IBM_CONFIGURE_CONNECTOR];
1821
	u32 base, size, end;
1822

1823
	/*
1824
	 * Only functions with filters attached are allowed.
1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842
	 */
	f = func->filter;
	if (!f)
		goto err;
	/*
	 * And some functions aren't allowed on LE.
	 */
	if (IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) && func->banned_for_syscall_on_le)
		goto err;

	if (f->buf_idx1 != -1) {
		base = be32_to_cpu(args->args[f->buf_idx1]);
		if (f->size_idx1 != -1)
			size = be32_to_cpu(args->args[f->size_idx1]);
		else if (f->fixed_size)
			size = f->fixed_size;
		else
			size = 1;
1843

1844
		end = base + size - 1;
1845

1846 1847 1848 1849
		/*
		 * Special case for ibm,platform-dump - NULL buffer
		 * address is used to indicate end of dump processing
		 */
1850
		if (is_platform_dump && base == 0)
1851
			return false;
1852

1853 1854 1855
		if (!in_rmo_buf(base, end))
			goto err;
	}
1856

1857 1858 1859 1860 1861 1862 1863 1864 1865
	if (f->buf_idx2 != -1) {
		base = be32_to_cpu(args->args[f->buf_idx2]);
		if (f->size_idx2 != -1)
			size = be32_to_cpu(args->args[f->size_idx2]);
		else if (f->fixed_size)
			size = f->fixed_size;
		else
			size = 1;
		end = base + size - 1;
1866

1867 1868 1869 1870
		/*
		 * Special case for ibm,configure-connector where the
		 * address can be 0
		 */
1871
		if (is_config_conn && base == 0)
1872
			return false;
1873

1874 1875
		if (!in_rmo_buf(base, end))
			goto err;
1876 1877
	}

1878
	return false;
1879 1880
err:
	pr_err_ratelimited("sys_rtas: RTAS call blocked - exploit attempt?\n");
1881 1882
	pr_err_ratelimited("sys_rtas: %s nargs=%d (called by %s)\n",
			   func->name, nargs, current->comm);
1883 1884 1885
	return true;
}

1886
/* We assume to be passed big endian arguments */
1887
SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
Linus Torvalds's avatar
Linus Torvalds committed
1888
{
1889
	const struct rtas_function *func;
1890
	struct pin_cookie cookie;
Linus Torvalds's avatar
Linus Torvalds committed
1891 1892
	struct rtas_args args;
	unsigned long flags;
1893
	char *buff_copy, *errbuf = NULL;
1894
	int nargs, nret, token;
Linus Torvalds's avatar
Linus Torvalds committed
1895 1896 1897 1898

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

1899 1900 1901
	if (!rtas.entry)
		return -EINVAL;

Linus Torvalds's avatar
Linus Torvalds committed
1902 1903 1904
	if (copy_from_user(&args, uargs, 3 * sizeof(u32)) != 0)
		return -EFAULT;

1905 1906 1907 1908
	nargs = be32_to_cpu(args.nargs);
	nret  = be32_to_cpu(args.nret);
	token = be32_to_cpu(args.token);

1909
	if (nargs >= ARRAY_SIZE(args.args)
1910 1911
	    || nret > ARRAY_SIZE(args.args)
	    || nargs + nret > ARRAY_SIZE(args.args))
Linus Torvalds's avatar
Linus Torvalds committed
1912 1913 1914 1915 1916 1917 1918
		return -EINVAL;

	/* Copy in args. */
	if (copy_from_user(args.args, uargs->args,
			   nargs * sizeof(rtas_arg_t)) != 0)
		return -EFAULT;

1919 1920 1921 1922 1923 1924
	/*
	 * If this token doesn't correspond to a function the kernel
	 * understands, you're not allowed to call it.
	 */
	func = rtas_token_to_function_untrusted(token);
	if (!func)
1925 1926
		return -EINVAL;

1927
	args.rets = &args.args[nargs];
1928
	memset(args.rets, 0, nret * sizeof(rtas_arg_t));
1929

1930
	if (block_rtas_call(func, nargs, &args))
1931 1932
		return -EINVAL;

1933
	if (token_is_restricted_errinjct(token)) {
1934 1935 1936 1937 1938 1939 1940
		int err;

		err = security_locked_down(LOCKDOWN_RTAS_ERROR_INJECTION);
		if (err)
			return err;
	}

1941
	/* Need to handle ibm,suspend_me call specially */
1942
	if (token == rtas_function_token(RTAS_FN_IBM_SUSPEND_ME)) {
1943 1944

		/*
1945 1946
		 * rtas_ibm_suspend_me assumes the streamid handle is in cpu
		 * endian, or at least the hcall within it requires it.
1947
		 */
1948
		int rc = 0;
1949 1950
		u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32)
		              | be32_to_cpu(args.args[1]);
1951
		rc = rtas_syscall_dispatch_ibm_suspend_me(handle);
1952 1953 1954 1955 1956
		if (rc == -EAGAIN)
			args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE);
		else if (rc == -EIO)
			args.rets[0] = cpu_to_be32(-1);
		else if (rc)
1957 1958 1959 1960
			return rc;
		goto copy_return;
	}

1961
	buff_copy = get_errorlog_buffer();
Linus Torvalds's avatar
Linus Torvalds committed
1962

1963 1964 1965 1966 1967 1968 1969 1970 1971
	/*
	 * If this function has a mutex assigned to it, we must
	 * acquire it to avoid interleaving with any kernel-based uses
	 * of the same function. Kernel-based sequences acquire the
	 * appropriate mutex explicitly.
	 */
	if (func->lock)
		mutex_lock(func->lock);

1972
	raw_spin_lock_irqsave(&rtas_lock, flags);
1973
	cookie = lockdep_pin_lock(&rtas_lock);
Linus Torvalds's avatar
Linus Torvalds committed
1974

1975
	rtas_args = args;
1976
	do_enter_rtas(&rtas_args);
1977
	args = rtas_args;
Linus Torvalds's avatar
Linus Torvalds committed
1978 1979 1980

	/* A -1 return code indicates that the last command couldn't
	   be completed due to a hardware error. */
1981
	if (be32_to_cpu(args.rets[0]) == -1)
1982
		errbuf = __fetch_rtas_last_error(buff_copy);
Linus Torvalds's avatar
Linus Torvalds committed
1983

1984
	lockdep_unpin_lock(&rtas_lock, cookie);
1985
	raw_spin_unlock_irqrestore(&rtas_lock, flags);
Linus Torvalds's avatar
Linus Torvalds committed
1986

1987 1988 1989
	if (func->lock)
		mutex_unlock(func->lock);

Linus Torvalds's avatar
Linus Torvalds committed
1990
	if (buff_copy) {
1991 1992
		if (errbuf)
			log_error(errbuf, ERR_TYPE_RTAS_LOG, 0);
Linus Torvalds's avatar
Linus Torvalds committed
1993 1994 1995
		kfree(buff_copy);
	}

1996
 copy_return:
Linus Torvalds's avatar
Linus Torvalds committed
1997 1998 1999
	/* Copy out args. */
	if (copy_to_user(uargs->args + nargs,
			 args.args + nargs,
2000
			 nret * sizeof(rtas_arg_t)) != 0)
Linus Torvalds's avatar
Linus Torvalds committed
2001 2002 2003 2004 2005
		return -EFAULT;

	return 0;
}

2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053
static void __init rtas_function_table_init(void)
{
	struct property *prop;

	for (size_t i = 0; i < ARRAY_SIZE(rtas_function_table); ++i) {
		struct rtas_function *curr = &rtas_function_table[i];
		struct rtas_function *prior;
		int cmp;

		curr->token = RTAS_UNKNOWN_SERVICE;

		if (i == 0)
			continue;
		/*
		 * Ensure table is sorted correctly for binary search
		 * on function names.
		 */
		prior = &rtas_function_table[i - 1];

		cmp = strcmp(prior->name, curr->name);
		if (cmp < 0)
			continue;

		if (cmp == 0) {
			pr_err("'%s' has duplicate function table entries\n",
			       curr->name);
		} else {
			pr_err("function table unsorted: '%s' wrongly precedes '%s'\n",
			       prior->name, curr->name);
		}
	}

	for_each_property_of_node(rtas.dev, prop) {
		struct rtas_function *func;

		if (prop->length != sizeof(u32))
			continue;

		func = __rtas_name_to_function(prop->name);
		if (!func)
			continue;

		func->token = be32_to_cpup((__be32 *)prop->value);

		pr_debug("function %s has token %u\n", func->name, func->token);
	}
}

Linus Torvalds's avatar
Linus Torvalds committed
2054
/*
2055 2056
 * Call early during boot, before mem init, to retrieve the RTAS
 * information from the device-tree and allocate the RMO buffer for userland
Linus Torvalds's avatar
Linus Torvalds committed
2057 2058 2059 2060
 * accesses.
 */
void __init rtas_initialize(void)
{
2061
	unsigned long rtas_region = RTAS_INSTANTIATE_MAX;
2062 2063
	u32 base, size, entry;
	int no_base, no_size, no_entry;
2064

Linus Torvalds's avatar
Linus Torvalds committed
2065 2066 2067 2068
	/* Get RTAS dev node and fill up our "rtas" structure with infos
	 * about it.
	 */
	rtas.dev = of_find_node_by_name(NULL, "rtas");
2069 2070 2071
	if (!rtas.dev)
		return;

2072 2073 2074
	no_base = of_property_read_u32(rtas.dev, "linux,rtas-base", &base);
	no_size = of_property_read_u32(rtas.dev, "rtas-size", &size);
	if (no_base || no_size) {
2075
		of_node_put(rtas.dev);
2076 2077 2078 2079
		rtas.dev = NULL;
		return;
	}

2080 2081 2082 2083
	rtas.base = base;
	rtas.size = size;
	no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry);
	rtas.entry = no_entry ? rtas.base : entry;
2084

2085 2086
	init_error_log_max();

2087 2088 2089
	/* Must be called before any function token lookups */
	rtas_function_table_init();

2090
	/*
2091
	 * Discover this now to avoid a device tree lookup in the
2092 2093
	 * panic path.
	 */
2094
	ibm_extended_os_term = of_property_read_bool(rtas.dev, "ibm,extended-os-term");
2095

Linus Torvalds's avatar
Linus Torvalds committed
2096 2097 2098
	/* If RTAS was found, allocate the RMO buffer for it and look for
	 * the stop-self token if any
	 */
2099
#ifdef CONFIG_PPC64
2100
	if (firmware_has_feature(FW_FEATURE_LPAR))
2101
		rtas_region = min(ppc64_rma_size, RTAS_INSTANTIATE_MAX);
2102
#endif
2103
	rtas_rmo_buf = memblock_phys_alloc_range(RTAS_USER_REGION_SIZE, PAGE_SIZE,
2104 2105 2106 2107
						 0, rtas_region);
	if (!rtas_rmo_buf)
		panic("ERROR: RTAS: Failed to allocate %lx bytes below %pa\n",
		      PAGE_SIZE, &rtas_region);
Linus Torvalds's avatar
Linus Torvalds committed
2108

2109
	rtas_work_area_reserve_arena(rtas_region);
Linus Torvalds's avatar
Linus Torvalds committed
2110
}
2111 2112 2113 2114

int __init early_init_dt_scan_rtas(unsigned long node,
		const char *uname, int depth, void *data)
{
2115
	const u32 *basep, *entryp, *sizep;
2116 2117 2118 2119 2120 2121 2122 2123

	if (depth != 1 || strcmp(uname, "rtas") != 0)
		return 0;

	basep  = of_get_flat_dt_prop(node, "linux,rtas-base", NULL);
	entryp = of_get_flat_dt_prop(node, "linux,rtas-entry", NULL);
	sizep  = of_get_flat_dt_prop(node, "rtas-size", NULL);

2124 2125 2126 2127 2128 2129
#ifdef CONFIG_PPC64
	/* need this feature to decide the crashkernel offset */
	if (of_get_flat_dt_prop(node, "ibm,hypertas-functions", NULL))
		powerpc_firmware_features |= FW_FEATURE_LPAR;
#endif

2130 2131 2132 2133 2134 2135
	if (basep && entryp && sizep) {
		rtas.base = *basep;
		rtas.entry = *entryp;
		rtas.size = *sizep;
	}

2136 2137 2138 2139 2140 2141 2142 2143
#ifdef CONFIG_UDBG_RTAS_CONSOLE
	basep = of_get_flat_dt_prop(node, "put-term-char", NULL);
	if (basep)
		rtas_putchar_token = *basep;

	basep = of_get_flat_dt_prop(node, "get-term-char", NULL);
	if (basep)
		rtas_getchar_token = *basep;
2144 2145 2146 2147 2148

	if (rtas_putchar_token != RTAS_UNKNOWN_SERVICE &&
	    rtas_getchar_token != RTAS_UNKNOWN_SERVICE)
		udbg_init_rtas_console();

2149 2150
#endif

2151 2152 2153
	/* break now */
	return 1;
}
2154

2155
static DEFINE_RAW_SPINLOCK(timebase_lock);
2156 2157
static u64 timebase = 0;

2158
void rtas_give_timebase(void)
2159 2160 2161
{
	unsigned long flags;

2162
	raw_spin_lock_irqsave(&timebase_lock, flags);
2163
	hard_irq_disable();
2164
	rtas_call(rtas_function_token(RTAS_FN_FREEZE_TIME_BASE), 0, 1, NULL);
2165
	timebase = get_tb();
2166
	raw_spin_unlock(&timebase_lock);
2167 2168 2169

	while (timebase)
		barrier();
2170
	rtas_call(rtas_function_token(RTAS_FN_THAW_TIME_BASE), 0, 1, NULL);
2171 2172 2173
	local_irq_restore(flags);
}

2174
void rtas_take_timebase(void)
2175 2176 2177
{
	while (!timebase)
		barrier();
2178
	raw_spin_lock(&timebase_lock);
2179 2180
	set_tb(timebase >> 32, timebase & 0xffffffff);
	timebase = 0;
2181
	raw_spin_unlock(&timebase_lock);
2182
}