---
drivers/md/dm-iostats.c | 488 ++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 488 insertions(+)
Index: linux/drivers/md/dm-iostats.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux/drivers/md/dm-iostats.c 2007-06-06 20:40:10.000000000 +0100
@@ -0,0 +1,488 @@
+/*
+ * Copyright (C) 2007 Red Hat GmbH
+ *
+ * Module Author: Heinz Mauelshagen (Mauelshagen@RedHat.com)
+ *
+ * Gather I/O statistics.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm.h"
+
+#include <linux/ctype.h>
+
+#define DM_MSG_PREFIX "dm-iostats"
+
+static const char *version = "v1.0";
+
+/* Cook up 32 bit jiffies on 64 bit platform. */
+#if BITS_PER_LONG > 32
+#define JIFFIES_32 (jiffies & 0xFFFFFFFF)
+#else
+#define JIFFIES_32 jiffies
+#endif
+
+/* Feature flags. */
+enum feature_flags {
+ IOF_LATENCY = 0x01, /* IO latency. */
+ IOF_SIZE = 0x02, /* IO size sums. */
+ IOF_ERROR = 0x04, /* IO errors. */
+};
+
+/* IO statistics context. */
+struct iostats_c {
+ unsigned long flags;
+ struct dm_dev *dev;
+
+ atomic_t ios[2]; /* Counter of read/write IOs. */
+
+ /* This field is present in case we count IO errors. */
+ atomic_t errors[2]; /* Number of IO errors. */
+
+ /*
+ * These fields are optionally only present,
+ * if we are recording the IO latency.
+ */
+ spinlock_t lock;
+ unsigned long last_jiffies; /* Jiffies overrun. */
+ unsigned long long start[2]; /* Sum start jiffies. */
+ unsigned long long start_inflight[2]; /* Sum in flight IO jiffies.*/
+ unsigned long long end[2]; /* Sum end jiffies. */
+ atomic_t ios_inflight[2]; /* Counter of IOs in flight. */
+
+ /*
+ * These fields are optionally only present,
+ * if we are recording the IO sizes sums.
+ */
+ unsigned long long size[2]; /* Sum of IO sizes. */
+};
+
+/* Reset IO latency vars in case of overrun and preset IO counter. */
+static void reset_latency(struct iostats_c *ic, int rw)
+{
+ ic->start[rw] = ic->end[rw] = 0;
+ atomic_set(ic->ios + rw, 0);
+}
+
+/* Reset all counters/sums on init or resume. */
+static void reset_all(struct iostats_c *ic)
+{
+ if (test_bit(IOF_LATENCY, &ic->flags)) {
+ reset_latency(ic, READ);
+ reset_latency(ic, WRITE);
+ }
+
+ if (test_bit(IOF_SIZE, &ic->flags))
+ ic->size[READ] = ic->size[WRITE] = 0;
+
+ if (test_bit(IOF_ERROR, &ic->flags)) {
+ atomic_set(ic->errors + READ, 0);
+ atomic_set(ic->errors + WRITE, 0);
+ }
+}
+
+/*
+ * Construct an IO status mapping:
+ *
+ * <dev_path> [<type>...]
+ *
+ * available types: latency, size, error
+ */
+/* iostats <type> parameter definitions. */
+#define STR_LATENCY "latency"
+#define STR_SIZE "size"
+#define STR_ERROR "error"
+
+/* Structure offset macro for iostats_size definitions below. */
+#define OFFSET(member) ((size_t) &((struct iostats_c*) NULL)->member)
+
+/* iostats feature <type> specs array. */
+struct f_type {
+ char *name; /* <type> */
+ size_t len; /* String length of name. */
+ enum feature_flags flag; /* Feature flag to set. */
+ size_t size; /* Size of structure to allocate. */
+} static const f_types[] = {
+ { STR_LATENCY, sizeof(STR_LATENCY) - 1, IOF_LATENCY, OFFSET(size) },
+ { STR_SIZE, sizeof(STR_SIZE) - 1, IOF_SIZE, sizeof(struct iostats_c) },
+ { STR_ERROR, sizeof(STR_ERROR) - 1, IOF_ERROR, OFFSET(lock) },
+};
+
+#define for_each_ft(ft) for (ft = f_types; ft < ARRAY_END(f_types); ft++)
+
+static int iostats_ctr(struct dm_target *ti, unsigned argc, char **argv)
+{
+ int i;
+ unsigned long flags = 0;
+ size_t size = OFFSET(errors); /* Smallest possible structure size. */
+ struct iostats_c *ic;
+
+ if (argc > ARRAY_SIZE(f_types) + 1) {
+ ti->error = "dm-iostats: incorrect number of arguments";
+ return -EINVAL;
+ }
+
+ /* Check constructor <type> arguments. */
+ for (i = 1; i < argc; i++) {
+ const struct f_type *ft;
+
+ for_each_ft(ft) {
+ if (strncmp(argv[i], ft->name, ft->len))
+ continue;
+
+ set_bit(ft->flag, &flags);
+ if (ft->size > size)
+ size = ft->size;
+
+ break;
+ }
+
+ if (ft == ARRAY_END(f_types)) {
+ ti->error = "dm-iostats: invalid iostats <type>";
+ return -EINVAL;
+ }
+ }
+
+ /* Check senseful iostats types given. */
+ if (!test_bit(IOF_LATENCY, &flags) &&
+ test_bit(IOF_SIZE, &flags)) {
+ ti->error = "dm-iostats: mandatory type 'latency' with 'size'";
+ return -EINVAL;
+ }
+
+ ic = kmalloc(size, GFP_KERNEL);
+ if (ic)
+ memset(ic, 0, size);
+ else {
+ ti->error = "dm-iostats: cannot allocate iostats conetext";
+ return -ENOMEM;
+ }
+
+ if (dm_get_device(ti, *argv, ti->begin, ti->len,
+ dm_table_get_mode(ti->table), &ic->dev)) {
+ ti->error = "dm-iostats: device lookup failed";
+ kfree(ic);
+ return -ENXIO;
+ }
+
+ ic->flags = flags;
+ if (test_bit(IOF_LATENCY, &flags)) {
+ spin_lock_init(&ic->lock);
+ atomic_set(ic->ios_inflight + READ, 0);
+ atomic_set(ic->ios_inflight + WRITE, 0);
+ reset_all(ic);
+ }
+
+ ti->private = ic;
+
+ return 0;
+}
+
+/*
+ * Destruct an iostats mapping.
+ */
+static void iostats_dtr(struct dm_target *ti)
+{
+ struct iostats_c *ic = ti->private;
+
+ dm_put_device(ti, ic->dev);
+ kfree(ic);
+}
+
+/*
+ * iostats_map() and iostats_end_io() support functions.
+ */
+/* Summarize jiffies (checking overrun). */
+static inline int calc_sum(unsigned long long *sum, unsigned long now)
+{
+ unsigned long long s = *sum + now;
+
+ if (unlikely(s < *sum))
+ s = 0;
+
+ return (*sum = s);
+}
+
+/* Set latency and IO counter for READ or WRITE to actual in flight IO data. */
+static void set_inflight_latency(struct iostats_c *ic, int rw)
+{
+ atomic_set(ic->ios + rw, atomic_read(ic->ios_inflight + rw));
+ ic->start[rw] = ic->start_inflight[rw];
+ ic->end[rw] = 0;
+}
+
+static void set_inflight_latencies(struct iostats_c *ic)
+{
+ set_inflight_latency(ic, READ);
+ set_inflight_latency(ic, WRITE);
+}
+
+/*
+ * Check for jiffies overrun.
+ *
+ * In case of overrun ->
+ * set both READ and WRITE latencies to in flight ones.
+ */
+static inline void check_jiffies(struct iostats_c *ic, unsigned long now)
+{
+ unsigned long lj = ic->last_jiffies;
+
+ ic->last_jiffies = now;
+
+ if (unlikely(now < lj))
+ set_inflight_latencies(ic);
+}
+
+/*
+ * Read/write statistics mapping:
+ *
+ * o checks for jiffies or sum variable overrun.
+ * o sums up read and write counts
+ *
+ * In case of 'latency' <type> configured:
+ *
+ * o increments IO in flight counters
+ * o sums up IO start jiffies for better accuracy; see calc_latency()
+ *
+ * In case of 'size' <type> configured:
+ *
+ * o sums up IO sizes
+ */
+static int iostats_map(struct dm_target *ti, struct bio *bio,
+ union map_info *map_context)
+{
+ int rw = bio_data_dir(bio);
+ struct iostats_c *ic = ti->private;
+
+ if (likely(test_bit(IOF_LATENCY, &ic->flags))) {
+ unsigned long flags, now;
+
+ spin_lock_irqsave(&ic->lock, flags);
+
+ now = JIFFIES_32;
+ ic->start_inflight[rw] += now;
+ atomic_inc(ic->ios_inflight + rw);
+
+ /* Check for jiffies overrun. */
+ check_jiffies(ic, now);
+
+ /*
+ * In case of IO counter or start sum overrun ->
+ * set rw latency to in flight one.
+ */
+ if (unlikely(atomic_inc_and_test(ic->ios + rw)) ||
+ !calc_sum(ic->start + rw, now))
+ set_inflight_latency(ic, rw);
+
+ if (likely(test_bit(IOF_SIZE, &ic->flags)))
+ ic->size[rw] += bio->bi_size;
+
+ spin_unlock_irqrestore(&ic->lock, flags);
+
+ /* Preserve for subtraction in iostats_end_io(). */
+ map_context->ll = now;
+ } else
+ atomic_inc(ic->ios + rw);
+
+ /* Map to the underlying device. */
+ bio->bi_bdev = ic->dev->bdev;
+
+ return 1;
+}
+
+/*
+ * End IO handler:
+ *
+ * o checks for jiffies or sum variable overrun.
+ *
+ * In case of 'latency' <type> configured:
+ *
+ * o decrements IO in flight counters
+ * o sums up IO end jiffies
+ * o subtracts start jiffies from in flight sums
+ *
+ * In case of 'error' <type> configured:
+ *
+ * o counts any IO errors
+ */
+static int iostats_end_io(struct dm_target *ti, struct bio *bio,
+ int error, union map_info *map_context)
+{
+ int rw = bio_data_dir(bio);
+ struct iostats_c *ic = ti->private;
+
+ if (likely(test_bit(IOF_LATENCY, &ic->flags))) {
+ unsigned long flags, now;
+
+ spin_lock_irqsave(&ic->lock, flags);
+
+ /* Subtract in flight start time and decrement in flight ios.*/
+ ic->start_inflight[rw] -= map_context->ll;
+ atomic_dec(ic->ios_inflight + rw);
+ now = JIFFIES_32;
+
+ /* Check for jiffies overrun. */
+ check_jiffies(ic, now);
+
+ /*
+ * In case of end sum overrun ->
+ * set rw latency to in flight one.
+ */
+ if (unlikely(!calc_sum(ic->end + rw, now)))
+ set_inflight_latency(ic, rw);
+
+ /* Correct IO sizes sum in case of error. */
+ /* FIXME: correct content in bio->bi_size on error ? */
+ if (unlikely(error && test_bit(IOF_SIZE, &ic->flags)))
+ ic->size[rw] -= bio->bi_size;
+
+ spin_unlock_irqrestore(&ic->lock, flags);
+ }
+
+ if (unlikely(error) && test_bit(IOF_ERROR, &ic->flags))
+ atomic_inc(ic->errors + rw);
+
+ return 0;
+}
+
+/* Calculates the average latency in milliseconds. */
+static unsigned long calc_latency(struct iostats_c *ic, int rw)
+{
+ unsigned long flags, ios;
+ unsigned long long start, start_inflight, end;
+
+ /* Quickly grab values in order to do consistent calculation. */
+ spin_lock_irqsave(&ic->lock, flags);
+ ios = atomic_read(ic->ios + rw);
+ start = ic->start[rw];
+ start_inflight = ic->start_inflight[rw];
+ end = ic->end[rw];
+ spin_unlock_irqrestore(&ic->lock, flags);
+
+ if (likely(ios))
+ return jiffies_to_msecs(end - (start - start_inflight)) / ios;
+
+ return 0;
+}
+
+/*
+ * Resume used to reset statistics in order to
+ * avoid a complete table reload for this purpose.
+ *
+ * No need to take out a lock here, because no
+ * IOs will get queued before we're resumed.
+ */
+static void iostats_resume(struct dm_target *ti)
+{
+ reset_all(ti->private);
+}
+
+/*
+ * Status.
+ *
+ * In case of in flight ios, the values displayed will be a bit inconsistent
+ * with respect to IO counters, IO latencies, IO size sums and errors being
+ * retrieved non-atomically.
+ */
+static int iostats_status(struct dm_target *ti, status_type_t type,
+ char *result, unsigned maxlen)
+{
+ unsigned sz = 0;
+ char buffer[16];
+ struct iostats_c *ic = ti->private;
+ const struct f_type *ft;
+
+ format_dev_t(buffer, ic->dev->bdev->bd_dev);
+
+ switch (type) {
+ case STATUSTYPE_INFO:
+ DMEMIT("%s r=%u w=%u", buffer,
+ atomic_read(ic->ios + READ),
+ atomic_read(ic->ios + WRITE));
+
+ /* Show latency in units of mllisecs. */
+ if (test_bit(IOF_LATENCY, &ic->flags))
+ DMEMIT(" rl=%lu wl=%lu",
+ calc_latency(ic, READ),
+ calc_latency(ic, WRITE));
+
+ /* Show sizes in units of sectors. */
+ if (test_bit(IOF_SIZE, &ic->flags)) {
+ unsigned long flags;
+ unsigned long long sr, sw;
+
+ spin_lock_irqsave(&ic->lock, flags);
+ sr = ic->size[READ] >> 9;
+ sw = ic->size[WRITE] >> 9;
+ spin_unlock_irqrestore(&ic->lock, flags);
+
+ DMEMIT(" rs=%llu ws=%llu", sr, sw);
+ }
+
+ /* Show number of errors */
+ if (test_bit(IOF_ERROR, &ic->flags)) {
+ unsigned re = atomic_read(ic->errors + READ);
+ unsigned we = atomic_read(ic->errors + WRITE);
+
+ if (re || we)
+ DMEMIT(" re=%u we=%u", re, we);
+
+ }
+
+ break;
+
+ case STATUSTYPE_TABLE:
+ DMEMIT("%s", buffer);
+ for_each_ft(ft)
+ if (test_bit(ft->flag, &ic->flags))
+ DMEMIT(" %s", ft->name);
+ }
+
+ return 0;
+}
+
+static struct target_type iostats_target = {
+ .name = "iostats",
+ .version = {1, 0, 0},
+ .module = THIS_MODULE,
+ .ctr = iostats_ctr,
+ .dtr = iostats_dtr,
+ .map = iostats_map,
+ .end_io = iostats_end_io,
+ .resume = iostats_resume,
+ .status = iostats_status,
+};
+
+static int __init dm_iostats_init(void)
+{
+ int r;
+
+ r = dm_register_target(&iostats_target);
+ if (r)
+ DMERR("Failed to register target [%d]", r);
+ else
+ DMINFO("initialized %s", version);
+
+ return r;
+}
+
+static void __exit dm_iostats_exit(void)
+{
+ int r = dm_unregister_target(&iostats_target);
+
+ if (r)
+ DMERR("dm-iostats unregister failed %d", r);
+ else
+ DMINFO("exit %s", version);
+}
+
+/*
+ * Module hooks.
+ */
+module_init(dm_iostats_init);
+module_exit(dm_iostats_exit);
+
+MODULE_DESCRIPTION(DM_NAME " iostats target");
+MODULE_AUTHOR("Heinz Mauelshagen <hjm@redhat.com>");
+MODULE_LICENSE("GPL");