[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH][4/4][IOMGR] Turn-based I/O request scheduler


  • To: <xen-devel@xxxxxxxxxxxxxxxxxxx>
  • From: "Satoshi Uchida" <s-uchida@xxxxxxxxxxxxx>
  • Date: Mon, 30 Jul 2007 17:51:23 +0900
  • Delivery-date: Mon, 30 Jul 2007 01:50:43 -0700
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>
  • Thread-index: AcfShs5zYOlRLLgkTcC7bATyj29lKA==

This patch provides turn-based I/O request scheduler.

The turn-besed I/O scheduler makes virtual block device threads to control I/O 
performance based on the number of reqeusts per "turn".

 --------------------------------------------------
 Satoshi UCHIDA 
    NEC Corporation, Japan


# HG changeset patch
# User s-uchida@xxxxxxxxxxxxx
# Date 1184302442 -32400
# Node ID 216f7a2a56c03aecc08a40ed0f687874f56c89c5
# Parent  aee77a9230c1b19873e60761d080af517bbfb189
[IOMGR] Add the turn-based I/O scheduler.
  This module control I/O request based on the number of request per "turn".

  Signed-off-by  Satoshi UCHIDA <s-uchida@xxxxxxxxxxxxx>

diff -r aee77a9230c1 -r 216f7a2a56c0 drivers/xen/Kconfig
--- a/drivers/xen/Kconfig       Fri Jun 15 13:33:47 2007 -0600
+++ b/drivers/xen/Kconfig       Fri Jul 13 13:45:30 2007 +0900
@@ -74,6 +74,14 @@ config XEN_BLKDEV_TAP
          The Backend I/O request manager framework provides interface
          which makes backend driver to control I/O requests by I/O 
          control modules
+
+config XEN_IOSCHED_TURN
+       tristate "Trun-based I/O request scheduler"
+       depends on XEN_IOMGR
+       default m
+       help
+         The turn-based I/O scheduler controls I/O requests based on the number
+         of requests "per turn".
 
 config XEN_NETDEV_BACKEND
        tristate "Network-device backend driver"
diff -r aee77a9230c1 -r 216f7a2a56c0 drivers/xen/Makefile
--- a/drivers/xen/Makefile      Fri Jun 15 13:33:47 2007 -0600
+++ b/drivers/xen/Makefile      Fri Jul 13 13:45:30 2007 +0900
@@ -11,6 +11,7 @@ obj-y += util.o
 obj-$(CONFIG_XEN_BLKDEV_BACKEND)       += blkback/
 obj-$(CONFIG_XEN_BLKDEV_TAP)           += blktap/
 obj-$(CONFIG_XEN_IOMGR)                        += iomgr/
+obj-$(CONFIG_XEN_IOSCHED_TURN)         += iomgr/
 obj-$(CONFIG_XEN_NETDEV_BACKEND)       += netback/
 obj-$(CONFIG_XEN_TPMDEV_BACKEND)       += tpmback/
 obj-$(CONFIG_XEN_BLKDEV_FRONTEND)      += blkfront/
diff -r aee77a9230c1 -r 216f7a2a56c0 drivers/xen/iomgr/Makefile
+++ a/drivers/xen/iomgr/Makefile        Fri Jul 13 13:52:39 2007 +0900
+++ b/drivers/xen/iomgr/Makefile        Fri Jul 13 13:52:39 2007 +0900
@@ -1,3 +1,4 @@
 
 xeniomgr-y := iomgr.o
 
+obj-$(CONFIG_XEN_IOSCHED_TURN) := turn_iosched.o
diff -r aee77a9230c1 -r 216f7a2a56c0 drivers/xen/iomgr/turn_iosched.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/drivers/xen/iomgr/turn_iosched.c  Fri Jul 13 13:54:02 2007 +0900
@@ -0,0 +1,368 @@
+/*****************************************************************************
+ * turn_iosched.c
+ *
+ *  Management stream for I/O request among virtual machines .
+ *  The turn-based I/O scheduler control amount of request in a turn.
+ *
+ * Copyright(c) 2007, Satoshi UCHIDA, NEC Corporation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/wait.h>
+#include <linux/list.h>
+#include <asm/atomic.h>
+#include <linux/kthread.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <xen/iomgr.h>
+#include <xen/xenbus.h>
+
+
+static int default_max_cap=64;
+module_param(default_max_cap, int, 644);
+
+static DECLARE_WAIT_QUEUE_HEAD(noncap_wq);
+
+struct hlist_head turn_hash;
+
+
+/* turn scheduler parameter */
+struct turn_data {
+       /* Identification of VBD/TAP device */
+       struct xenbus_device *dev;
+
+        /* capacity updating flag */
+       int update_f;
+
+               /* counting parameter */
+       atomic_t            req_cap;
+       atomic_t            max_cap;
+
+       struct hlist_node   list;
+};
+
+
+/* Find according data to VBD/TAP device. */
+struct turn_data *find_turn_data(struct xenbus_device *dev)
+{
+       struct hlist_head *hash_list = &turn_hash;
+       struct hlist_node *entry;
+       struct turn_data *__data;
+
+       hlist_for_each_entry(__data, entry, hash_list, list) {
+               if (__data->dev == dev)
+                       return __data;
+       }
+
+       return NULL;
+}
+
+
+/* Sysfs Interface */
+static ssize_t show_req_cap(struct device *_dev,
+                           struct device_attribute *attr,
+                           char *buf)
+{
+       struct xenbus_device *dev = to_xenbus_device(_dev);
+       struct turn_data *data = find_turn_data(dev);
+       
+       return sprintf(buf, "%d\n", atomic_read(&data->req_cap));
+}
+
+DEVICE_ATTR(req_cap, S_IRUGO , show_req_cap, NULL);
+
+static ssize_t show_max_cap(struct device *_dev,
+                           struct device_attribute *attr,
+                           char *buf)
+{
+       struct xenbus_device *dev = to_xenbus_device(_dev);
+       struct turn_data *data = find_turn_data(dev);
+       
+       return sprintf(buf, "%d\n", atomic_read(&data->max_cap));
+}
+
+static ssize_t store_max_cap(struct device *_dev,
+                            struct device_attribute *attr,
+                            const char *buf,
+                            size_t count)
+{
+       struct xenbus_device *dev = to_xenbus_device(_dev);
+       struct turn_data *data = find_turn_data(dev);
+       char *endp;
+       int new;
+
+       new = (int)simple_strtoul(buf, &endp, 10);
+       if (endp == buf)
+               return -EINVAL;
+
+       atomic_set(&data->max_cap, new);
+
+       return strnlen(buf, count);
+}
+
+DEVICE_ATTR(max_cap, S_IRUGO | S_IWUSR, show_max_cap, store_max_cap);
+
+static struct attribute *turn_attrs[] = {
+       &dev_attr_req_cap.attr,
+       &dev_attr_max_cap.attr,
+       NULL
+};
+
+static struct attribute_group turn_group = {
+       .name = "iomgr",
+       .attrs = turn_attrs,
+};
+
+int turn_sysfs_addif(struct xenbus_device *dev)
+{
+       int error;
+
+       error = sysfs_create_group(&dev->dev.kobj, &turn_group);
+       if (error)
+               goto fail1;
+
+       return 0;
+
+fail1:  sysfs_remove_group(&dev->dev.kobj, &turn_group);
+
+       return error;
+}
+
+void turn_sysfs_delif(struct xenbus_device *dev)
+{
+       sysfs_remove_group(&dev->dev.kobj, &turn_group);
+}
+
+
+/* Create turn parameter for new VBD/TAP device. */
+struct turn_data *create_turn_data(struct xenbus_device *dev)
+{
+       struct turn_data *data;
+
+       data = kmalloc(sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return NULL;
+
+       memset(data, 0, sizeof(*data));
+
+       data->dev = dev;
+
+       hlist_add_head(&data->list, &turn_hash);
+       atomic_set(&data->max_cap, default_max_cap);
+       data->req_cap = data->max_cap;
+       data->update_f = 0;
+
+       turn_sysfs_addif(dev);
+
+       return data;
+}
+
+
+/* Wake up for original waiting condition. */
+void turn_wake_up(void)
+{
+       struct hlist_head *hash_list = &turn_hash;
+       struct hlist_node *entry;
+       struct turn_data *__data;
+
+       hlist_for_each_entry(__data, entry, hash_list, list) {
+               __data->update_f = 1;
+       }
+       wake_up(&noncap_wq);
+} 
+
+
+/*
+ *   Waiting condition for the turn-based I/O scheduler.
+ *   Wait VBD/TAP thread when it is no capacity.
+ */
+int turn_waiting_request(struct xenbus_device *dev)
+{
+       struct turn_data *data;
+
+       data = find_turn_data(dev);
+       if (!data)
+               return 0;
+
+       if (data->update_f) {
+               data->req_cap = data->max_cap;
+               data->update_f = 0;
+       } else {
+               if (atomic_sub_and_test(0, &data->req_cap)) {
+                       wait_event_interruptible(
+                               noncap_wq,
+                               (data->update_f) ||
+                               hlist_empty(&turn_hash) ||
+                               kthread_should_stop());                 
+                       data->req_cap = data->max_cap;
+                       data->update_f = 0;
+               }
+       }
+       
+       return 0;
+}
+
+
+/*
+ *  Check that VBD/TAP thread have capacity yet.
+ */
+int turn_allow_request(struct xenbus_device *dev)
+{
+       struct turn_data *data;
+
+       data = find_turn_data(dev);
+       if (data == NULL){
+               data = create_turn_data(dev);
+               if (data == NULL) {
+                       printk("FAILED : TURN_IOSCHED : allocating 
paramater\n");
+                       return -ENOMEM;
+               }
+       }
+
+       
+       if (atomic_sub_and_test(0, &data->req_cap)) {
+               return -IOMGR_ALLOW_NG;
+       }
+      
+       return 0;
+}
+
+
+/*
+ *  Decrease VBD/TAP thread capacity, because new request is allocated.
+ */
+int turn_alloc_request(struct xenbus_device *dev) {
+       struct turn_data *data;
+
+       data = find_turn_data(dev);
+       if (!data)
+               return 0;
+
+       atomic_dec(&data->req_cap);
+       return 0;
+}
+
+
+/*
+ *  Wake up, if no request is pending.
+ *  Namely, turn is increase.
+ *  In above case,
+ *    1. all threads no have capacity, or
+ *    2. all threads no have requests which are process as soon as.
+ */
+void turn_free_request(struct xenbus_device *dev)
+{
+       if (atomic_sub_and_test(0, num_pending_req())) {
+               turn_wake_up();
+       }       
+}
+
+
+/*
+ *  Management when VBD/TAP thread starts.
+ */
+int turn_dev_start(struct xenbus_device *dev)
+{
+       struct turn_data *data;
+
+       data = create_turn_data(dev);
+       if (data == NULL) {
+               printk("FAILED : TURN_IOSCHED : allocating paramater\n");
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+
+/*
+ *  Management when VBD/TAP thread stops.
+ */
+void turn_dev_stop(struct xenbus_device *dev)
+{
+       struct turn_data *data;
+
+       data = find_turn_data(dev);
+       if (!data)
+               return;
+
+       hlist_del(&data->list);
+       turn_sysfs_delif(data->dev);
+       kfree(data);
+}
+
+
+/*
+ *  Definition of the turn-based I/O scheduler.
+ */ 
+static struct iomgr iomgr_turn = {
+       .ops = {
+               .iomgr_waiting_request_fn   = turn_waiting_request,
+               .iomgr_allow_request_fn     = turn_allow_request,
+               .iomgr_alloc_request_fn     = turn_alloc_request,
+               .iomgr_oo_abort_request_fn  = NULL,
+               .iomgr_free_request_fn      = turn_free_request,
+               .iomgr_dev_start_fn         = turn_dev_start,
+               .iomgr_dev_stop_fn          = turn_dev_stop,
+       },
+       .iomgr_name = "TURN",
+};
+
+
+/* Initializing function. */
+static int __init turn_init(void)
+{      
+       INIT_HLIST_HEAD(&turn_hash);
+       return xen_iomgr_register(&iomgr_turn);
+}
+module_init(turn_init);
+
+
+/* Finishing function. */
+static void __exit turn_exit(void)
+{
+       struct hlist_head *hash_list = &turn_hash;
+       struct hlist_node *entry,*next;
+       struct turn_data *__data;
+
+       xen_iomgr_unregister(&iomgr_turn);
+       hlist_for_each_entry_safe(__data, entry, next, hash_list, list) {
+               if (__data != NULL) {
+                       turn_sysfs_delif(__data->dev);
+                       kfree(__data);
+               }
+       }
+       INIT_HLIST_HEAD(&turn_hash);
+       wake_up(&noncap_wq);
+}
+module_exit(turn_exit);
+
+MODULE_AUTHOR("Satoshi UCHIDA");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Turn-based IO scheduler");


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.