deadline-iosched.c 11.6 KB
Newer Older
Linus Torvalds's avatar
Linus Torvalds committed
1 2 3
/*
 *  Deadline i/o scheduler.
 *
4
 *  Copyright (C) 2002 Jens Axboe <axboe@kernel.dk>
Linus Torvalds's avatar
Linus Torvalds committed
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
 */
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/bio.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/rbtree.h>

/*
 * See Documentation/block/deadline-iosched.txt
 */
20 21 22 23
static const int read_expire = HZ / 2;  /* max time before a read is submitted. */
static const int write_expire = 5 * HZ; /* ditto for writes, these limits are SOFT! */
static const int writes_starved = 2;    /* max times reads can starve a write */
static const int fifo_batch = 16;       /* # of sequential requests treated as one
Linus Torvalds's avatar
Linus Torvalds committed
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
				     by the above parameters. For throughput. */

struct deadline_data {
	/*
	 * run time data
	 */

	/*
	 * requests (deadline_rq s) are present on both sort_list and fifo_list
	 */
	struct rb_root sort_list[2];	
	struct list_head fifo_list[2];
	
	/*
	 * next in sort order. read, write or both are NULL
	 */
40
	struct request *next_rq[2];
Linus Torvalds's avatar
Linus Torvalds committed
41 42 43 44 45 46 47 48 49 50 51 52 53
	unsigned int batching;		/* number of sequential requests made */
	sector_t last_sector;		/* head position */
	unsigned int starved;		/* times reads have starved writes */

	/*
	 * settings that change how the i/o scheduler behaves
	 */
	int fifo_expire[2];
	int fifo_batch;
	int writes_starved;
	int front_merges;
};

54
static void deadline_move_request(struct deadline_data *, struct request *);
Linus Torvalds's avatar
Linus Torvalds committed
55

56
#define RQ_RB_ROOT(dd, rq)	(&(dd)->sort_list[rq_data_dir((rq))])
Linus Torvalds's avatar
Linus Torvalds committed
57 58

static void
59
deadline_add_rq_rb(struct deadline_data *dd, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
60
{
61 62
	struct rb_root *root = RQ_RB_ROOT(dd, rq);
	struct request *__alias;
Linus Torvalds's avatar
Linus Torvalds committed
63 64

retry:
65 66
	__alias = elv_rb_add(root, rq);
	if (unlikely(__alias)) {
67
		deadline_move_request(dd, __alias);
68
		goto retry;
Linus Torvalds's avatar
Linus Torvalds committed
69 70 71 72
	}
}

static inline void
73
deadline_del_rq_rb(struct deadline_data *dd, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
74
{
75
	const int data_dir = rq_data_dir(rq);
Linus Torvalds's avatar
Linus Torvalds committed
76

77
	if (dd->next_rq[data_dir] == rq) {
78
		struct rb_node *rbnext = rb_next(&rq->rb_node);
Linus Torvalds's avatar
Linus Torvalds committed
79

80
		dd->next_rq[data_dir] = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
81
		if (rbnext)
82
			dd->next_rq[data_dir] = rb_entry_rq(rbnext);
Linus Torvalds's avatar
Linus Torvalds committed
83 84
	}

85
	elv_rb_del(RQ_RB_ROOT(dd, rq), rq);
Linus Torvalds's avatar
Linus Torvalds committed
86 87 88
}

/*
89
 * add rq to rbtree and fifo
Linus Torvalds's avatar
Linus Torvalds committed
90
 */
91
static void
Linus Torvalds's avatar
Linus Torvalds committed
92 93 94
deadline_add_request(struct request_queue *q, struct request *rq)
{
	struct deadline_data *dd = q->elevator->elevator_data;
95
	const int data_dir = rq_data_dir(rq);
Linus Torvalds's avatar
Linus Torvalds committed
96

97
	deadline_add_rq_rb(dd, rq);
98

Linus Torvalds's avatar
Linus Torvalds committed
99 100 101
	/*
	 * set expire time (only used for reads) and add to fifo list
	 */
102 103
	rq_set_fifo_time(rq, jiffies + dd->fifo_expire[data_dir]);
	list_add_tail(&rq->queuelist, &dd->fifo_list[data_dir]);
Linus Torvalds's avatar
Linus Torvalds committed
104 105 106
}

/*
107
 * remove rq from rbtree and fifo.
Linus Torvalds's avatar
Linus Torvalds committed
108
 */
109
static void deadline_remove_request(struct request_queue *q, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
110
{
111
	struct deadline_data *dd = q->elevator->elevator_data;
Linus Torvalds's avatar
Linus Torvalds committed
112

113 114
	rq_fifo_clear(rq);
	deadline_del_rq_rb(dd, rq);
Linus Torvalds's avatar
Linus Torvalds committed
115 116 117
}

static int
118
deadline_merge(struct request_queue *q, struct request **req, struct bio *bio)
Linus Torvalds's avatar
Linus Torvalds committed
119 120 121 122 123 124 125 126 127
{
	struct deadline_data *dd = q->elevator->elevator_data;
	struct request *__rq;
	int ret;

	/*
	 * check for front merge
	 */
	if (dd->front_merges) {
128
		sector_t sector = bio->bi_sector + bio_sectors(bio);
Linus Torvalds's avatar
Linus Torvalds committed
129

130
		__rq = elv_rb_find(&dd->sort_list[bio_data_dir(bio)], sector);
Linus Torvalds's avatar
Linus Torvalds committed
131
		if (__rq) {
132
			BUG_ON(sector != __rq->sector);
Linus Torvalds's avatar
Linus Torvalds committed
133 134 135 136 137 138 139 140 141 142 143 144 145 146

			if (elv_rq_merge_ok(__rq, bio)) {
				ret = ELEVATOR_FRONT_MERGE;
				goto out;
			}
		}
	}

	return ELEVATOR_NO_MERGE;
out:
	*req = __rq;
	return ret;
}

147 148
static void deadline_merged_request(struct request_queue *q,
				    struct request *req, int type)
Linus Torvalds's avatar
Linus Torvalds committed
149 150 151 152 153 154
{
	struct deadline_data *dd = q->elevator->elevator_data;

	/*
	 * if the merge was a front merge, we need to reposition request
	 */
155 156
	if (type == ELEVATOR_FRONT_MERGE) {
		elv_rb_del(RQ_RB_ROOT(dd, req), req);
157
		deadline_add_rq_rb(dd, req);
Linus Torvalds's avatar
Linus Torvalds committed
158 159 160 161
	}
}

static void
162
deadline_merged_requests(struct request_queue *q, struct request *req,
Linus Torvalds's avatar
Linus Torvalds committed
163 164 165
			 struct request *next)
{
	/*
166 167
	 * if next expires before rq, assign its expire time to rq
	 * and move into next position (next will be deleted) in fifo
Linus Torvalds's avatar
Linus Torvalds committed
168
	 */
169 170 171 172
	if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) {
		if (time_before(rq_fifo_time(next), rq_fifo_time(req))) {
			list_move(&req->queuelist, &next->queuelist);
			rq_set_fifo_time(req, rq_fifo_time(next));
Linus Torvalds's avatar
Linus Torvalds committed
173 174 175 176 177 178 179 180 181 182 183 184 185
		}
	}

	/*
	 * kill knowledge of next, this one is a goner
	 */
	deadline_remove_request(q, next);
}

/*
 * move request from sort list to dispatch queue.
 */
static inline void
186
deadline_move_to_dispatch(struct deadline_data *dd, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
187
{
188
	struct request_queue *q = rq->q;
Linus Torvalds's avatar
Linus Torvalds committed
189

190 191
	deadline_remove_request(q, rq);
	elv_dispatch_add_tail(q, rq);
Linus Torvalds's avatar
Linus Torvalds committed
192 193 194 195 196 197
}

/*
 * move an entry to dispatch queue
 */
static void
198
deadline_move_request(struct deadline_data *dd, struct request *rq)
Linus Torvalds's avatar
Linus Torvalds committed
199
{
200 201
	const int data_dir = rq_data_dir(rq);
	struct rb_node *rbnext = rb_next(&rq->rb_node);
Linus Torvalds's avatar
Linus Torvalds committed
202

203 204
	dd->next_rq[READ] = NULL;
	dd->next_rq[WRITE] = NULL;
Linus Torvalds's avatar
Linus Torvalds committed
205 206

	if (rbnext)
207
		dd->next_rq[data_dir] = rb_entry_rq(rbnext);
Linus Torvalds's avatar
Linus Torvalds committed
208
	
209
	dd->last_sector = rq->sector + rq->nr_sectors;
Linus Torvalds's avatar
Linus Torvalds committed
210 211 212 213 214

	/*
	 * take it off the sort and fifo list, move
	 * to dispatch queue
	 */
215
	deadline_move_to_dispatch(dd, rq);
Linus Torvalds's avatar
Linus Torvalds committed
216 217 218 219 220 221 222 223
}

/*
 * deadline_check_fifo returns 0 if there are no expired reads on the fifo,
 * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
 */
static inline int deadline_check_fifo(struct deadline_data *dd, int ddir)
{
224
	struct request *rq = rq_entry_fifo(dd->fifo_list[ddir].next);
Linus Torvalds's avatar
Linus Torvalds committed
225 226

	/*
227
	 * rq is expired!
Linus Torvalds's avatar
Linus Torvalds committed
228
	 */
229
	if (time_after(jiffies, rq_fifo_time(rq)))
Linus Torvalds's avatar
Linus Torvalds committed
230 231 232 233 234 235 236 237 238
		return 1;

	return 0;
}

/*
 * deadline_dispatch_requests selects the best request according to
 * read/write expire, fifo_batch, etc
 */
239
static int deadline_dispatch_requests(struct request_queue *q, int force)
Linus Torvalds's avatar
Linus Torvalds committed
240
{
241
	struct deadline_data *dd = q->elevator->elevator_data;
Linus Torvalds's avatar
Linus Torvalds committed
242 243
	const int reads = !list_empty(&dd->fifo_list[READ]);
	const int writes = !list_empty(&dd->fifo_list[WRITE]);
244
	struct request *rq;
245
	int data_dir;
Linus Torvalds's avatar
Linus Torvalds committed
246 247 248 249

	/*
	 * batches are currently reads XOR writes
	 */
250 251
	if (dd->next_rq[WRITE])
		rq = dd->next_rq[WRITE];
252
	else
253
		rq = dd->next_rq[READ];
Linus Torvalds's avatar
Linus Torvalds committed
254

255
	if (rq) {
Linus Torvalds's avatar
Linus Torvalds committed
256 257
		/* we have a "next request" */
		
258
		if (dd->last_sector != rq->sector)
Linus Torvalds's avatar
Linus Torvalds committed
259 260 261 262 263 264 265 266 267 268 269 270 271 272
			/* end the batch on a non sequential request */
			dd->batching += dd->fifo_batch;
		
		if (dd->batching < dd->fifo_batch)
			/* we are still entitled to batch */
			goto dispatch_request;
	}

	/*
	 * at this point we are not running a batch. select the appropriate
	 * data direction (read / write)
	 */

	if (reads) {
273
		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[READ]));
Linus Torvalds's avatar
Linus Torvalds committed
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288

		if (writes && (dd->starved++ >= dd->writes_starved))
			goto dispatch_writes;

		data_dir = READ;

		goto dispatch_find_request;
	}

	/*
	 * there are either no reads or writes have been starved
	 */

	if (writes) {
dispatch_writes:
289
		BUG_ON(RB_EMPTY_ROOT(&dd->sort_list[WRITE]));
Linus Torvalds's avatar
Linus Torvalds committed
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306

		dd->starved = 0;

		data_dir = WRITE;

		goto dispatch_find_request;
	}

	return 0;

dispatch_find_request:
	/*
	 * we are not running a batch, find best request for selected data_dir
	 */
	if (deadline_check_fifo(dd, data_dir)) {
		/* An expired request exists - satisfy it */
		dd->batching = 0;
307
		rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
Linus Torvalds's avatar
Linus Torvalds committed
308
		
309
	} else if (dd->next_rq[data_dir]) {
Linus Torvalds's avatar
Linus Torvalds committed
310 311 312 313
		/*
		 * The last req was the same dir and we have a next request in
		 * sort order. No expired requests so continue on from here.
		 */
314
		rq = dd->next_rq[data_dir];
Linus Torvalds's avatar
Linus Torvalds committed
315
	} else {
316
		struct rb_node *node;
Linus Torvalds's avatar
Linus Torvalds committed
317 318 319 320 321 322
		/*
		 * The last req was the other direction or we have run out of
		 * higher-sectored requests. Go back to the lowest sectored
		 * request (1 way elevator) and start a new batch.
		 */
		dd->batching = 0;
323 324 325
		node = rb_first(&dd->sort_list[data_dir]);
		if (node)
			rq = rb_entry_rq(node);
Linus Torvalds's avatar
Linus Torvalds committed
326 327 328 329
	}

dispatch_request:
	/*
330
	 * rq is the selected appropriate request.
Linus Torvalds's avatar
Linus Torvalds committed
331 332
	 */
	dd->batching++;
333
	deadline_move_request(dd, rq);
Linus Torvalds's avatar
Linus Torvalds committed
334 335 336 337

	return 1;
}

338
static int deadline_queue_empty(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
339 340 341
{
	struct deadline_data *dd = q->elevator->elevator_data;

342 343
	return list_empty(&dd->fifo_list[WRITE])
		&& list_empty(&dd->fifo_list[READ]);
Linus Torvalds's avatar
Linus Torvalds committed
344 345 346 347 348 349 350 351 352 353 354 355 356
}

static void deadline_exit_queue(elevator_t *e)
{
	struct deadline_data *dd = e->elevator_data;

	BUG_ON(!list_empty(&dd->fifo_list[READ]));
	BUG_ON(!list_empty(&dd->fifo_list[WRITE]));

	kfree(dd);
}

/*
357
 * initialize elevator private data (deadline_data).
Linus Torvalds's avatar
Linus Torvalds committed
358
 */
359
static void *deadline_init_queue(struct request_queue *q)
Linus Torvalds's avatar
Linus Torvalds committed
360 361 362
{
	struct deadline_data *dd;

363
	dd = kmalloc_node(sizeof(*dd), GFP_KERNEL | __GFP_ZERO, q->node);
Linus Torvalds's avatar
Linus Torvalds committed
364
	if (!dd)
Jens Axboe's avatar
Jens Axboe committed
365
		return NULL;
Linus Torvalds's avatar
Linus Torvalds committed
366 367 368 369 370 371 372 373 374 375

	INIT_LIST_HEAD(&dd->fifo_list[READ]);
	INIT_LIST_HEAD(&dd->fifo_list[WRITE]);
	dd->sort_list[READ] = RB_ROOT;
	dd->sort_list[WRITE] = RB_ROOT;
	dd->fifo_expire[READ] = read_expire;
	dd->fifo_expire[WRITE] = write_expire;
	dd->writes_starved = writes_starved;
	dd->front_merges = 1;
	dd->fifo_batch = fifo_batch;
Jens Axboe's avatar
Jens Axboe committed
376
	return dd;
Linus Torvalds's avatar
Linus Torvalds committed
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398
}

/*
 * sysfs parts below
 */

static ssize_t
deadline_var_show(int var, char *page)
{
	return sprintf(page, "%d\n", var);
}

static ssize_t
deadline_var_store(int *var, const char *page, size_t count)
{
	char *p = (char *) page;

	*var = simple_strtol(p, &p, 10);
	return count;
}

#define SHOW_FUNCTION(__FUNC, __VAR, __CONV)				\
399
static ssize_t __FUNC(elevator_t *e, char *page)			\
Linus Torvalds's avatar
Linus Torvalds committed
400
{									\
401 402
	struct deadline_data *dd = e->elevator_data;			\
	int __data = __VAR;						\
Linus Torvalds's avatar
Linus Torvalds committed
403 404 405 406
	if (__CONV)							\
		__data = jiffies_to_msecs(__data);			\
	return deadline_var_show(__data, (page));			\
}
407 408 409 410 411
SHOW_FUNCTION(deadline_read_expire_show, dd->fifo_expire[READ], 1);
SHOW_FUNCTION(deadline_write_expire_show, dd->fifo_expire[WRITE], 1);
SHOW_FUNCTION(deadline_writes_starved_show, dd->writes_starved, 0);
SHOW_FUNCTION(deadline_front_merges_show, dd->front_merges, 0);
SHOW_FUNCTION(deadline_fifo_batch_show, dd->fifo_batch, 0);
Linus Torvalds's avatar
Linus Torvalds committed
412 413 414
#undef SHOW_FUNCTION

#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV)			\
415
static ssize_t __FUNC(elevator_t *e, const char *page, size_t count)	\
Linus Torvalds's avatar
Linus Torvalds committed
416
{									\
417
	struct deadline_data *dd = e->elevator_data;			\
Linus Torvalds's avatar
Linus Torvalds committed
418 419 420 421 422 423 424 425 426 427 428 429
	int __data;							\
	int ret = deadline_var_store(&__data, (page), count);		\
	if (__data < (MIN))						\
		__data = (MIN);						\
	else if (__data > (MAX))					\
		__data = (MAX);						\
	if (__CONV)							\
		*(__PTR) = msecs_to_jiffies(__data);			\
	else								\
		*(__PTR) = __data;					\
	return ret;							\
}
430 431 432 433 434
STORE_FUNCTION(deadline_read_expire_store, &dd->fifo_expire[READ], 0, INT_MAX, 1);
STORE_FUNCTION(deadline_write_expire_store, &dd->fifo_expire[WRITE], 0, INT_MAX, 1);
STORE_FUNCTION(deadline_writes_starved_store, &dd->writes_starved, INT_MIN, INT_MAX, 0);
STORE_FUNCTION(deadline_front_merges_store, &dd->front_merges, 0, 1, 0);
STORE_FUNCTION(deadline_fifo_batch_store, &dd->fifo_batch, 0, INT_MAX, 0);
Linus Torvalds's avatar
Linus Torvalds committed
435 436
#undef STORE_FUNCTION

437 438 439 440 441 442 443 444 445 446 447
#define DD_ATTR(name) \
	__ATTR(name, S_IRUGO|S_IWUSR, deadline_##name##_show, \
				      deadline_##name##_store)

static struct elv_fs_entry deadline_attrs[] = {
	DD_ATTR(read_expire),
	DD_ATTR(write_expire),
	DD_ATTR(writes_starved),
	DD_ATTR(front_merges),
	DD_ATTR(fifo_batch),
	__ATTR_NULL
Linus Torvalds's avatar
Linus Torvalds committed
448 449 450 451 452 453 454
};

static struct elevator_type iosched_deadline = {
	.ops = {
		.elevator_merge_fn = 		deadline_merge,
		.elevator_merged_fn =		deadline_merged_request,
		.elevator_merge_req_fn =	deadline_merged_requests,
455 456
		.elevator_dispatch_fn =		deadline_dispatch_requests,
		.elevator_add_req_fn =		deadline_add_request,
Linus Torvalds's avatar
Linus Torvalds committed
457
		.elevator_queue_empty_fn =	deadline_queue_empty,
458 459
		.elevator_former_req_fn =	elv_rb_former_request,
		.elevator_latter_req_fn =	elv_rb_latter_request,
Linus Torvalds's avatar
Linus Torvalds committed
460 461 462 463
		.elevator_init_fn =		deadline_init_queue,
		.elevator_exit_fn =		deadline_exit_queue,
	},

464
	.elevator_attrs = deadline_attrs,
Linus Torvalds's avatar
Linus Torvalds committed
465 466 467 468 469 470
	.elevator_name = "deadline",
	.elevator_owner = THIS_MODULE,
};

static int __init deadline_init(void)
{
471
	return elv_register(&iosched_deadline);
Linus Torvalds's avatar
Linus Torvalds committed
472 473 474 475 476 477 478 479 480 481 482 483 484
}

static void __exit deadline_exit(void)
{
	elv_unregister(&iosched_deadline);
}

module_init(deadline_init);
module_exit(deadline_exit);

MODULE_AUTHOR("Jens Axboe");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("deadline IO scheduler");