Commit 21b4aa5d authored by Jens Axboe's avatar Jens Axboe

io_uring: add a few test tools

This adds two test programs in tools/io_uring/ that demonstrate both
the raw io_uring API (and all features) through a small benchmark
app, io_uring-bench, and the liburing exposed API in a simplified
cp(1) implementation through io_uring-cp.
Signed-off-by: default avatarJens Axboe <axboe@kernel.dk>
parent 31b51510
# SPDX-License-Identifier: GPL-2.0
# Makefile for io_uring test tools
CFLAGS += -Wall -Wextra -g -D_GNU_SOURCE
LDLIBS += -lpthread
all: io_uring-cp io_uring-bench
%: %.c
$(CC) $(CFLAGS) -o $@ $^
io_uring-bench: syscall.o io_uring-bench.o
$(CC) $(CFLAGS) $(LDLIBS) -o $@ $^
io_uring-cp: setup.o syscall.o queue.o
clean:
$(RM) io_uring-cp io_uring-bench *.o
.PHONY: all clean
This directory includes a few programs that demonstrate how to use io_uring
in an application. The examples are:
io_uring-cp
A very basic io_uring implementation of cp(1). It takes two
arguments, copies the first argument to the second. This example
is part of liburing, and hence uses the simplified liburing API
for setting up an io_uring instance, submitting IO, completing IO,
etc. The support functions in queue.c and setup.c are straight
out of liburing.
io_uring-bench
Benchmark program that does random reads on a number of files. This
app demonstrates the various features of io_uring, like fixed files,
fixed buffers, and polled IO. There are options in the program to
control which features to use. Arguments is the file (or files) that
io_uring-bench should operate on. This uses the raw io_uring
interface.
liburing can be cloned with git here:
git://git.kernel.dk/liburing
and contains a number of unit tests as well for testing io_uring. It also
comes with man pages for the three system calls.
Fio includes an io_uring engine, you can clone fio here:
git://git.kernel.dk/fio
#ifndef LIBURING_BARRIER_H
#define LIBURING_BARRIER_H
#if defined(__x86_64) || defined(__i386__)
#define read_barrier() __asm__ __volatile__("":::"memory")
#define write_barrier() __asm__ __volatile__("":::"memory")
#else
/*
* Add arch appropriate definitions. Be safe and use full barriers for
* archs we don't have support for.
*/
#define read_barrier() __sync_synchronize()
#define write_barrier() __sync_synchronize()
#endif
#endif
This diff is collapsed.
// SPDX-License-Identifier: GPL-2.0
/*
* Simple test program that demonstrates a file copy through io_uring. This
* uses the API exposed by liburing.
*
* Copyright (C) 2018-2019 Jens Axboe
*/
#include <stdio.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <errno.h>
#include <inttypes.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include "liburing.h"
#define QD 64
#define BS (32*1024)
static int infd, outfd;
struct io_data {
int read;
off_t first_offset, offset;
size_t first_len;
struct iovec iov;
};
static int setup_context(unsigned entries, struct io_uring *ring)
{
int ret;
ret = io_uring_queue_init(entries, ring, 0);
if (ret < 0) {
fprintf(stderr, "queue_init: %s\n", strerror(-ret));
return -1;
}
return 0;
}
static int get_file_size(int fd, off_t *size)
{
struct stat st;
if (fstat(fd, &st) < 0)
return -1;
if (S_ISREG(st.st_mode)) {
*size = st.st_size;
return 0;
} else if (S_ISBLK(st.st_mode)) {
unsigned long long bytes;
if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
return -1;
*size = bytes;
return 0;
}
return -1;
}
static void queue_prepped(struct io_uring *ring, struct io_data *data)
{
struct io_uring_sqe *sqe;
sqe = io_uring_get_sqe(ring);
assert(sqe);
if (data->read)
io_uring_prep_readv(sqe, infd, &data->iov, 1, data->offset);
else
io_uring_prep_writev(sqe, outfd, &data->iov, 1, data->offset);
io_uring_sqe_set_data(sqe, data);
}
static int queue_read(struct io_uring *ring, off_t size, off_t offset)
{
struct io_uring_sqe *sqe;
struct io_data *data;
sqe = io_uring_get_sqe(ring);
if (!sqe)
return 1;
data = malloc(size + sizeof(*data));
data->read = 1;
data->offset = data->first_offset = offset;
data->iov.iov_base = data + 1;
data->iov.iov_len = size;
data->first_len = size;
io_uring_prep_readv(sqe, infd, &data->iov, 1, offset);
io_uring_sqe_set_data(sqe, data);
return 0;
}
static void queue_write(struct io_uring *ring, struct io_data *data)
{
data->read = 0;
data->offset = data->first_offset;
data->iov.iov_base = data + 1;
data->iov.iov_len = data->first_len;
queue_prepped(ring, data);
io_uring_submit(ring);
}
static int copy_file(struct io_uring *ring, off_t insize)
{
unsigned long reads, writes;
struct io_uring_cqe *cqe;
off_t write_left, offset;
int ret;
write_left = insize;
writes = reads = offset = 0;
while (insize || write_left) {
unsigned long had_reads;
int got_comp;
/*
* Queue up as many reads as we can
*/
had_reads = reads;
while (insize) {
off_t this_size = insize;
if (reads + writes >= QD)
break;
if (this_size > BS)
this_size = BS;
else if (!this_size)
break;
if (queue_read(ring, this_size, offset))
break;
insize -= this_size;
offset += this_size;
reads++;
}
if (had_reads != reads) {
ret = io_uring_submit(ring);
if (ret < 0) {
fprintf(stderr, "io_uring_submit: %s\n", strerror(-ret));
break;
}
}
/*
* Queue is full at this point. Find at least one completion.
*/
got_comp = 0;
while (write_left) {
struct io_data *data;
if (!got_comp) {
ret = io_uring_wait_completion(ring, &cqe);
got_comp = 1;
} else
ret = io_uring_get_completion(ring, &cqe);
if (ret < 0) {
fprintf(stderr, "io_uring_get_completion: %s\n",
strerror(-ret));
return 1;
}
if (!cqe)
break;
data = (struct io_data *) (uintptr_t) cqe->user_data;
if (cqe->res < 0) {
if (cqe->res == -EAGAIN) {
queue_prepped(ring, data);
continue;
}
fprintf(stderr, "cqe failed: %s\n",
strerror(-cqe->res));
return 1;
} else if ((size_t) cqe->res != data->iov.iov_len) {
/* Short read/write, adjust and requeue */
data->iov.iov_base += cqe->res;
data->iov.iov_len -= cqe->res;
data->offset += cqe->res;
queue_prepped(ring, data);
continue;
}
/*
* All done. if write, nothing else to do. if read,
* queue up corresponding write.
*/
if (data->read) {
queue_write(ring, data);
write_left -= data->first_len;
reads--;
writes++;
} else {
free(data);
writes--;
}
}
}
return 0;
}
int main(int argc, char *argv[])
{
struct io_uring ring;
off_t insize;
int ret;
if (argc < 3) {
printf("%s: infile outfile\n", argv[0]);
return 1;
}
infd = open(argv[1], O_RDONLY);
if (infd < 0) {
perror("open infile");
return 1;
}
outfd = open(argv[2], O_WRONLY | O_CREAT | O_TRUNC, 0644);
if (outfd < 0) {
perror("open outfile");
return 1;
}
if (setup_context(QD, &ring))
return 1;
if (get_file_size(infd, &insize))
return 1;
ret = copy_file(&ring, insize);
close(infd);
close(outfd);
io_uring_queue_exit(&ring);
return ret;
}
#ifndef LIB_URING_H
#define LIB_URING_H
#include <sys/uio.h>
#include <signal.h>
#include <string.h>
#include "../../include/uapi/linux/io_uring.h"
/*
* Library interface to io_uring
*/
struct io_uring_sq {
unsigned *khead;
unsigned *ktail;
unsigned *kring_mask;
unsigned *kring_entries;
unsigned *kflags;
unsigned *kdropped;
unsigned *array;
struct io_uring_sqe *sqes;
unsigned sqe_head;
unsigned sqe_tail;
size_t ring_sz;
};
struct io_uring_cq {
unsigned *khead;
unsigned *ktail;
unsigned *kring_mask;
unsigned *kring_entries;
unsigned *koverflow;
struct io_uring_cqe *cqes;
size_t ring_sz;
};
struct io_uring {
struct io_uring_sq sq;
struct io_uring_cq cq;
int ring_fd;
};
/*
* System calls
*/
extern int io_uring_setup(unsigned entries, struct io_uring_params *p);
extern int io_uring_enter(unsigned fd, unsigned to_submit,
unsigned min_complete, unsigned flags, sigset_t *sig);
extern int io_uring_register(int fd, unsigned int opcode, void *arg,
unsigned int nr_args);
/*
* Library interface
*/
extern int io_uring_queue_init(unsigned entries, struct io_uring *ring,
unsigned flags);
extern int io_uring_queue_mmap(int fd, struct io_uring_params *p,
struct io_uring *ring);
extern void io_uring_queue_exit(struct io_uring *ring);
extern int io_uring_get_completion(struct io_uring *ring,
struct io_uring_cqe **cqe_ptr);
extern int io_uring_wait_completion(struct io_uring *ring,
struct io_uring_cqe **cqe_ptr);
extern int io_uring_submit(struct io_uring *ring);
extern struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
/*
* Command prep helpers
*/
static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
{
sqe->user_data = (unsigned long) data;
}
static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
void *addr, unsigned len, off_t offset)
{
memset(sqe, 0, sizeof(*sqe));
sqe->opcode = op;
sqe->fd = fd;
sqe->off = offset;
sqe->addr = (unsigned long) addr;
sqe->len = len;
}
static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
struct iovec *iovecs, unsigned nr_vecs,
off_t offset)
{
io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
}
static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
void *buf, unsigned nbytes,
off_t offset)
{
io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset);
}
static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
struct iovec *iovecs, unsigned nr_vecs,
off_t offset)
{
io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
}
static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
void *buf, unsigned nbytes,
off_t offset)
{
io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
}
static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
short poll_mask)
{
memset(sqe, 0, sizeof(*sqe));
sqe->opcode = IORING_OP_POLL_ADD;
sqe->fd = fd;
sqe->poll_events = poll_mask;
}
static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
void *user_data)
{
memset(sqe, 0, sizeof(*sqe));
sqe->opcode = IORING_OP_POLL_REMOVE;
sqe->addr = (unsigned long) user_data;
}
static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
int datasync)
{
memset(sqe, 0, sizeof(*sqe));
sqe->opcode = IORING_OP_FSYNC;
sqe->fd = fd;
if (datasync)
sqe->fsync_flags = IORING_FSYNC_DATASYNC;
}
#endif
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include "liburing.h"
#include "barrier.h"
static int __io_uring_get_completion(struct io_uring *ring,
struct io_uring_cqe **cqe_ptr, int wait)
{
struct io_uring_cq *cq = &ring->cq;
const unsigned mask = *cq->kring_mask;
unsigned head;
int ret;
*cqe_ptr = NULL;
head = *cq->khead;
do {
/*
* It's necessary to use a read_barrier() before reading
* the CQ tail, since the kernel updates it locklessly. The
* kernel has the matching store barrier for the update. The
* kernel also ensures that previous stores to CQEs are ordered
* with the tail update.
*/
read_barrier();
if (head != *cq->ktail) {
*cqe_ptr = &cq->cqes[head & mask];
break;
}
if (!wait)
break;
ret = io_uring_enter(ring->ring_fd, 0, 1,
IORING_ENTER_GETEVENTS, NULL);
if (ret < 0)
return -errno;
} while (1);
if (*cqe_ptr) {
*cq->khead = head + 1;
/*
* Ensure that the kernel sees our new head, the kernel has
* the matching read barrier.
*/
write_barrier();
}
return 0;
}
/*
* Return an IO completion, if one is readily available
*/
int io_uring_get_completion(struct io_uring *ring,
struct io_uring_cqe **cqe_ptr)
{
return __io_uring_get_completion(ring, cqe_ptr, 0);
}
/*
* Return an IO completion, waiting for it if necessary
*/
int io_uring_wait_completion(struct io_uring *ring,
struct io_uring_cqe **cqe_ptr)
{
return __io_uring_get_completion(ring, cqe_ptr, 1);
}
/*
* Submit sqes acquired from io_uring_get_sqe() to the kernel.
*
* Returns number of sqes submitted
*/
int io_uring_submit(struct io_uring *ring)
{
struct io_uring_sq *sq = &ring->sq;
const unsigned mask = *sq->kring_mask;
unsigned ktail, ktail_next, submitted;
int ret;
/*
* If we have pending IO in the kring, submit it first. We need a
* read barrier here to match the kernels store barrier when updating
* the SQ head.
*/
read_barrier();
if (*sq->khead != *sq->ktail) {
submitted = *sq->kring_entries;
goto submit;
}
if (sq->sqe_head == sq->sqe_tail)
return 0;
/*
* Fill in sqes that we have queued up, adding them to the kernel ring
*/
submitted = 0;
ktail = ktail_next = *sq->ktail;
while (sq->sqe_head < sq->sqe_tail) {
ktail_next++;
read_barrier();
sq->array[ktail & mask] = sq->sqe_head & mask;
ktail = ktail_next;
sq->sqe_head++;
submitted++;
}
if (!submitted)
return 0;
if (*sq->ktail != ktail) {
/*
* First write barrier ensures that the SQE stores are updated
* with the tail update. This is needed so that the kernel
* will never see a tail update without the preceeding sQE
* stores being done.
*/
write_barrier();
*sq->ktail = ktail;
/*
* The kernel has the matching read barrier for reading the
* SQ tail.
*/
write_barrier();
}
submit:
ret = io_uring_enter(ring->ring_fd, submitted, 0,
IORING_ENTER_GETEVENTS, NULL);
if (ret < 0)
return -errno;
return 0;
}
/*
* Return an sqe to fill. Application must later call io_uring_submit()
* when it's ready to tell the kernel about it. The caller may call this
* function multiple times before calling io_uring_submit().
*
* Returns a vacant sqe, or NULL if we're full.
*/
struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
{
struct io_uring_sq *sq = &ring->sq;
unsigned next = sq->sqe_tail + 1;
struct io_uring_sqe *sqe;
/*
* All sqes are used
*/
if (next - sq->sqe_head > *sq->kring_entries)
return NULL;
sqe = &sq->sqes[sq->sqe_tail & *sq->kring_mask];
sq->sqe_tail = next;
return sqe;
}
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include "liburing.h"
static int io_uring_mmap(int fd, struct io_uring_params *p,
struct io_uring_sq *sq, struct io_uring_cq *cq)
{
size_t size;
void *ptr;
int ret;
sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned);
ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING);
if (ptr == MAP_FAILED)
return -errno;
sq->khead = ptr + p->sq_off.head;
sq->ktail = ptr + p->sq_off.tail;
sq->kring_mask = ptr + p->sq_off.ring_mask;
sq->kring_entries = ptr + p->sq_off.ring_entries;
sq->kflags = ptr + p->sq_off.flags;
sq->kdropped = ptr + p->sq_off.dropped;
sq->array = ptr + p->sq_off.array;
size = p->sq_entries * sizeof(struct io_uring_sqe),
sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd,
IORING_OFF_SQES);
if (sq->sqes == MAP_FAILED) {
ret = -errno;
err:
munmap(sq->khead, sq->ring_sz);
return ret;
}
cq->ring_sz = p->cq_off.cqes + p->cq_entries * sizeof(struct io_uring_cqe);
ptr = mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_CQ_RING);
if (ptr == MAP_FAILED) {
ret = -errno;
munmap(sq->sqes, p->sq_entries * sizeof(struct io_uring_sqe));
goto err;
}
cq->khead = ptr + p->cq_off.head;
cq->ktail = ptr + p->cq_off.tail;
cq->kring_mask = ptr + p->cq_off.ring_mask;
cq->kring_entries = ptr + p->cq_off.ring_entries;
cq->koverflow = ptr + p->cq_off.overflow;
cq->cqes = ptr + p->cq_off.cqes;
return 0;
}
/*
* For users that want to specify sq_thread_cpu or sq_thread_idle, this
* interface is a convenient helper for mmap()ing the rings.
* Returns -1 on error, or zero on success. On success, 'ring'
* contains the necessary information to read/write to the rings.
*/
int io_uring_queue_mmap(int fd, struct io_uring_params *p, struct io_uring *ring)
{
int ret;
memset(ring, 0, sizeof(*ring));
ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq);
if (!ret)
ring->ring_fd = fd;
return ret;
}
/*
* Returns -1 on error, or zero on success. On success, 'ring'
* contains the necessary information to read/write to the rings.
*/
int io_uring_queue_init(unsigned entries, struct io_uring *ring, unsigned flags)
{
struct io_uring_params p;
int fd;
memset(&p, 0, sizeof(p));
p.flags = flags;
fd = io_uring_setup(entries, &p);
if (fd < 0)
return fd;
return io_uring_queue_mmap(fd, &p, ring);
}
void io_uring_queue_exit(struct io_uring *ring)
{
struct io_uring_sq *sq = &ring->sq;
struct io_uring_cq *cq = &ring->cq;
munmap(sq->sqes, *sq->kring_entries * sizeof(struct io_uring_sqe));
munmap(sq->khead, sq->ring_sz);
munmap(cq->khead, cq->ring_sz);
close(ring->ring_fd);
}
/*
* Will go away once libc support is there
*/
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/uio.h>
#include <signal.h>
#include "liburing.h"
#if defined(__x86_64) || defined(__i386__)
#ifndef __NR_sys_io_uring_setup
#define __NR_sys_io_uring_setup 425
#endif
#ifndef __NR_sys_io_uring_enter
#define __NR_sys_io_uring_enter 426
#endif
#ifndef __NR_sys_io_uring_register
#define __NR_sys_io_uring_register 427
#endif
#else
#error "Arch not supported yet"
#endif
int io_uring_register(int fd, unsigned int opcode, void *arg,
unsigned int nr_args)
{
return syscall(__NR_sys_io_uring_register, fd, opcode, arg, nr_args);
}
int io_uring_setup(unsigned entries, struct io_uring_params *p)
{
return syscall(__NR_sys_io_uring_setup, entries, p);
}
int io_uring_enter(unsigned fd, unsigned to_submit, unsigned min_complete,
unsigned flags, sigset_t *sig)
{
return syscall(__NR_sys_io_uring_enter, fd, to_submit, min_complete,
flags, sig, _NSIG / 8);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment