diff --git a/sys/kern/subr_devstat.c b/sys/kern/subr_devstat.c index 81740ed7cb92..c86d16b966f3 100644 --- a/sys/kern/subr_devstat.c +++ b/sys/kern/subr_devstat.c @@ -245,6 +245,23 @@ devstat_end_transaction_buf(struct devstat *ds, struct buf *bp) DEVSTAT_TAG_ORDERED : DEVSTAT_TAG_SIMPLE, flg); } +void +devstat_end_transaction_bio(struct devstat *ds, struct bio *bp) +{ + devstat_trans_flags flg; + + if (bp->bio_cmd == BIO_DELETE) + flg = DEVSTAT_FREE; + else if (bp->bio_cmd == BIO_READ) + flg = DEVSTAT_READ; + else + flg = DEVSTAT_WRITE; + + devstat_end_transaction(ds, bp->bio_bcount - bp->bio_resid, + (bp->bio_flags & BIO_ORDERED) ? + DEVSTAT_TAG_ORDERED : DEVSTAT_TAG_SIMPLE, flg); +} + /* * This is the sysctl handler for the devstat package. The data pushed out * on the kern.devstat.all sysctl variable consists of the current devstat diff --git a/sys/kern/subr_disklabel.c b/sys/kern/subr_disklabel.c index fb2064a338c0..551652fdc4cf 100644 --- a/sys/kern/subr_disklabel.c +++ b/sys/kern/subr_disklabel.c @@ -160,6 +160,104 @@ bufqdisksort(bufq, bp) } +void +bioqdisksort(bioq, bp) + struct bio_queue_head *bioq; + struct bio *bp; +{ + struct bio *bq; + struct bio *bn; + struct bio *be; + + be = TAILQ_LAST(&bioq->queue, bio_queue); + /* + * If the queue is empty or we are an + * ordered transaction, then it's easy. + */ + if ((bq = bioq_first(bioq)) == NULL + || (bp->bio_flags & BIO_ORDERED) != 0) { + bioq_insert_tail(bioq, bp); + return; + } else if (bioq->insert_point != NULL) { + + /* + * A certain portion of the list is + * "locked" to preserve ordering, so + * we can only insert after the insert + * point. + */ + bq = bioq->insert_point; + } else { + + /* + * If we lie before the last removed (currently active) + * request, and are not inserting ourselves into the + * "locked" portion of the list, then we must add ourselves + * to the second request list. + */ + if (bp->bio_pblkno < bioq->last_pblkno) { + + bq = bioq->switch_point; + /* + * If we are starting a new secondary list, + * then it's easy. + */ + if (bq == NULL) { + bioq->switch_point = bp; + bioq_insert_tail(bioq, bp); + return; + } + /* + * If we lie ahead of the current switch point, + * insert us before the switch point and move + * the switch point. + */ + if (bp->bio_pblkno < bq->bio_pblkno) { + bioq->switch_point = bp; + TAILQ_INSERT_BEFORE(bq, bp, bio_queue); + return; + } + } else { + if (bioq->switch_point != NULL) + be = TAILQ_PREV(bioq->switch_point, + bio_queue, bio_queue); + /* + * If we lie between last_pblkno and bq, + * insert before bq. + */ + if (bp->bio_pblkno < bq->bio_pblkno) { + TAILQ_INSERT_BEFORE(bq, bp, bio_queue); + return; + } + } + } + + /* + * Request is at/after our current position in the list. + * Optimize for sequential I/O by seeing if we go at the tail. + */ + if (bp->bio_pblkno > be->bio_pblkno) { + TAILQ_INSERT_AFTER(&bioq->queue, be, bp, bio_queue); + return; + } + + /* Otherwise, insertion sort */ + while ((bn = TAILQ_NEXT(bq, bio_queue)) != NULL) { + + /* + * We want to go after the current request if it is the end + * of the first request list, or if the next request is a + * larger cylinder than our request. + */ + if (bn == bioq->switch_point + || bp->bio_pblkno < bn->bio_pblkno) + break; + bq = bn; + } + TAILQ_INSERT_AFTER(&bioq->queue, bq, bp, bio_queue); +} + + /* * Attempt to read a disk label from a device using the indicated strategy * routine. The label must be partly set up before this: secpercyl, secsize diff --git a/sys/sys/bio.h b/sys/sys/bio.h index c04f6829df40..5ac55a4e9971 100644 --- a/sys/sys/bio.h +++ b/sys/sys/bio.h @@ -86,6 +86,9 @@ struct bio { u_int bio_cmd; /* I/O operation. */ dev_t bio_dev; /* Device to do I/O on. */ daddr_t bio_blkno; /* Underlying physical block number. */ + off_t bio_offset; /* Offset into file. */ + long bio_bcount; /* Valid bytes in buffer. */ + caddr_t bio_data; /* Memory, superblocks, indirect etc. */ u_int bio_flags; /* BIO_ flags. */ struct buf *_bio_buf; /* Parent buffer. */ int bio_error; /* Errno for BIO_ERROR. */ @@ -121,9 +124,11 @@ struct bio { struct buf { /* XXX: b_io must be the first element of struct buf for now /phk */ struct bio b_io; /* "Builtin" I/O request. */ +#define b_bcount b_io.bio_bcount #define b_blkno b_io.bio_blkno #define b_caller1 b_io.bio_caller1 #define b_caller2 b_io.bio_caller2 +#define b_data b_io.bio_data #define b_dev b_io.bio_dev #define b_driver1 b_io.bio_driver1 #define b_driver2 b_io.bio_driver2 @@ -132,6 +137,7 @@ struct buf { #define b_iodone b_io.bio_done #define b_iodone_chain b_io.bio_done_chain #define b_ioflags b_io.bio_flags +#define b_offset b_io.bio_offset #define b_pblkno b_io.bio_pblkno #define b_resid b_io.bio_resid LIST_ENTRY(buf) b_hash; /* Hash chain. */ @@ -143,12 +149,9 @@ struct buf { unsigned char b_xflags; /* extra flags */ struct lock b_lock; /* Buffer lock */ long b_bufsize; /* Allocated buffer size. */ - long b_bcount; /* Valid bytes in buffer. */ - caddr_t b_data; /* Memory, superblocks, indirect etc. */ caddr_t b_kvabase; /* base kva for buffer */ int b_kvasize; /* size of kva for buffer */ daddr_t b_lblkno; /* Logical block number. */ - off_t b_offset; /* Offset into file */ struct vnode *b_vp; /* Device vnode. */ int b_dirtyoff; /* Offset in buffer of dirty region. */ int b_dirtyend; /* Offset of end of dirty region. */ @@ -220,6 +223,7 @@ struct buf { #define BIO_READ 1 #define BIO_WRITE 2 #define BIO_DELETE 4 +#define BIO_FORMAT 8 #define BIO_ERROR 0x00000001 #define BIO_ORDERED 0x00000002 @@ -385,6 +389,13 @@ struct buf_queue_head { struct buf *switch_point; }; +struct bio_queue_head { + TAILQ_HEAD(bio_queue, bio) queue; + daddr_t last_pblkno; + struct bio *insert_point; + struct bio *switch_point; +}; + /* * This structure describes a clustered I/O. It is stored in the b_saveaddr * field of the buffer on which I/O is done. At I/O completion, cluster @@ -401,14 +412,17 @@ struct cluster_save { #ifdef _KERNEL static __inline void bufq_init __P((struct buf_queue_head *head)); - +static __inline void bioq_init __P((struct bio_queue_head *head)); static __inline void bufq_insert_tail __P((struct buf_queue_head *head, struct buf *bp)); - +static __inline void bioq_insert_tail __P((struct bio_queue_head *head, + struct bio *bp)); static __inline void bufq_remove __P((struct buf_queue_head *head, struct buf *bp)); - +static __inline void bioq_remove __P((struct bio_queue_head *head, + struct bio *bp)); static __inline struct buf *bufq_first __P((struct buf_queue_head *head)); +static __inline struct bio *bioq_first __P((struct bio_queue_head *head)); static __inline void bufq_init(struct buf_queue_head *head) @@ -419,6 +433,15 @@ bufq_init(struct buf_queue_head *head) head->switch_point = NULL; } +static __inline void +bioq_init(struct bio_queue_head *head) +{ + TAILQ_INIT(&head->queue); + head->last_pblkno = 0; + head->insert_point = NULL; + head->switch_point = NULL; +} + static __inline void bufq_insert_tail(struct buf_queue_head *head, struct buf *bp) { @@ -429,6 +452,16 @@ bufq_insert_tail(struct buf_queue_head *head, struct buf *bp) TAILQ_INSERT_TAIL(&head->queue, bp, b_act); } +static __inline void +bioq_insert_tail(struct bio_queue_head *head, struct bio *bp) +{ + if ((bp->bio_flags & BIO_ORDERED) != 0) { + head->insert_point = bp; + head->switch_point = NULL; + } + TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue); +} + static __inline void bufq_remove(struct buf_queue_head *head, struct buf *bp) { @@ -445,12 +478,34 @@ bufq_remove(struct buf_queue_head *head, struct buf *bp) head->switch_point = NULL; } +static __inline void +bioq_remove(struct bio_queue_head *head, struct bio *bp) +{ + if (bp == head->switch_point) + head->switch_point = TAILQ_NEXT(bp, bio_queue); + if (bp == head->insert_point) { + head->insert_point = TAILQ_PREV(bp, bio_queue, bio_queue); + if (head->insert_point == NULL) + head->last_pblkno = 0; + } else if (bp == TAILQ_FIRST(&head->queue)) + head->last_pblkno = bp->bio_pblkno; + TAILQ_REMOVE(&head->queue, bp, bio_queue); + if (TAILQ_FIRST(&head->queue) == head->switch_point) + head->switch_point = NULL; +} + static __inline struct buf * bufq_first(struct buf_queue_head *head) { return (TAILQ_FIRST(&head->queue)); } +static __inline struct bio * +bioq_first(struct bio_queue_head *head) +{ + return (TAILQ_FIRST(&head->queue)); +} + #define BUF_WRITE(bp) VOP_BWRITE((bp)->b_vp, (bp)) #define BUF_STRATEGY(bp) VOP_STRATEGY((bp)->b_vp, (bp)) diff --git a/sys/sys/buf.h b/sys/sys/buf.h index c04f6829df40..5ac55a4e9971 100644 --- a/sys/sys/buf.h +++ b/sys/sys/buf.h @@ -86,6 +86,9 @@ struct bio { u_int bio_cmd; /* I/O operation. */ dev_t bio_dev; /* Device to do I/O on. */ daddr_t bio_blkno; /* Underlying physical block number. */ + off_t bio_offset; /* Offset into file. */ + long bio_bcount; /* Valid bytes in buffer. */ + caddr_t bio_data; /* Memory, superblocks, indirect etc. */ u_int bio_flags; /* BIO_ flags. */ struct buf *_bio_buf; /* Parent buffer. */ int bio_error; /* Errno for BIO_ERROR. */ @@ -121,9 +124,11 @@ struct bio { struct buf { /* XXX: b_io must be the first element of struct buf for now /phk */ struct bio b_io; /* "Builtin" I/O request. */ +#define b_bcount b_io.bio_bcount #define b_blkno b_io.bio_blkno #define b_caller1 b_io.bio_caller1 #define b_caller2 b_io.bio_caller2 +#define b_data b_io.bio_data #define b_dev b_io.bio_dev #define b_driver1 b_io.bio_driver1 #define b_driver2 b_io.bio_driver2 @@ -132,6 +137,7 @@ struct buf { #define b_iodone b_io.bio_done #define b_iodone_chain b_io.bio_done_chain #define b_ioflags b_io.bio_flags +#define b_offset b_io.bio_offset #define b_pblkno b_io.bio_pblkno #define b_resid b_io.bio_resid LIST_ENTRY(buf) b_hash; /* Hash chain. */ @@ -143,12 +149,9 @@ struct buf { unsigned char b_xflags; /* extra flags */ struct lock b_lock; /* Buffer lock */ long b_bufsize; /* Allocated buffer size. */ - long b_bcount; /* Valid bytes in buffer. */ - caddr_t b_data; /* Memory, superblocks, indirect etc. */ caddr_t b_kvabase; /* base kva for buffer */ int b_kvasize; /* size of kva for buffer */ daddr_t b_lblkno; /* Logical block number. */ - off_t b_offset; /* Offset into file */ struct vnode *b_vp; /* Device vnode. */ int b_dirtyoff; /* Offset in buffer of dirty region. */ int b_dirtyend; /* Offset of end of dirty region. */ @@ -220,6 +223,7 @@ struct buf { #define BIO_READ 1 #define BIO_WRITE 2 #define BIO_DELETE 4 +#define BIO_FORMAT 8 #define BIO_ERROR 0x00000001 #define BIO_ORDERED 0x00000002 @@ -385,6 +389,13 @@ struct buf_queue_head { struct buf *switch_point; }; +struct bio_queue_head { + TAILQ_HEAD(bio_queue, bio) queue; + daddr_t last_pblkno; + struct bio *insert_point; + struct bio *switch_point; +}; + /* * This structure describes a clustered I/O. It is stored in the b_saveaddr * field of the buffer on which I/O is done. At I/O completion, cluster @@ -401,14 +412,17 @@ struct cluster_save { #ifdef _KERNEL static __inline void bufq_init __P((struct buf_queue_head *head)); - +static __inline void bioq_init __P((struct bio_queue_head *head)); static __inline void bufq_insert_tail __P((struct buf_queue_head *head, struct buf *bp)); - +static __inline void bioq_insert_tail __P((struct bio_queue_head *head, + struct bio *bp)); static __inline void bufq_remove __P((struct buf_queue_head *head, struct buf *bp)); - +static __inline void bioq_remove __P((struct bio_queue_head *head, + struct bio *bp)); static __inline struct buf *bufq_first __P((struct buf_queue_head *head)); +static __inline struct bio *bioq_first __P((struct bio_queue_head *head)); static __inline void bufq_init(struct buf_queue_head *head) @@ -419,6 +433,15 @@ bufq_init(struct buf_queue_head *head) head->switch_point = NULL; } +static __inline void +bioq_init(struct bio_queue_head *head) +{ + TAILQ_INIT(&head->queue); + head->last_pblkno = 0; + head->insert_point = NULL; + head->switch_point = NULL; +} + static __inline void bufq_insert_tail(struct buf_queue_head *head, struct buf *bp) { @@ -429,6 +452,16 @@ bufq_insert_tail(struct buf_queue_head *head, struct buf *bp) TAILQ_INSERT_TAIL(&head->queue, bp, b_act); } +static __inline void +bioq_insert_tail(struct bio_queue_head *head, struct bio *bp) +{ + if ((bp->bio_flags & BIO_ORDERED) != 0) { + head->insert_point = bp; + head->switch_point = NULL; + } + TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue); +} + static __inline void bufq_remove(struct buf_queue_head *head, struct buf *bp) { @@ -445,12 +478,34 @@ bufq_remove(struct buf_queue_head *head, struct buf *bp) head->switch_point = NULL; } +static __inline void +bioq_remove(struct bio_queue_head *head, struct bio *bp) +{ + if (bp == head->switch_point) + head->switch_point = TAILQ_NEXT(bp, bio_queue); + if (bp == head->insert_point) { + head->insert_point = TAILQ_PREV(bp, bio_queue, bio_queue); + if (head->insert_point == NULL) + head->last_pblkno = 0; + } else if (bp == TAILQ_FIRST(&head->queue)) + head->last_pblkno = bp->bio_pblkno; + TAILQ_REMOVE(&head->queue, bp, bio_queue); + if (TAILQ_FIRST(&head->queue) == head->switch_point) + head->switch_point = NULL; +} + static __inline struct buf * bufq_first(struct buf_queue_head *head) { return (TAILQ_FIRST(&head->queue)); } +static __inline struct bio * +bioq_first(struct bio_queue_head *head) +{ + return (TAILQ_FIRST(&head->queue)); +} + #define BUF_WRITE(bp) VOP_BWRITE((bp)->b_vp, (bp)) #define BUF_STRATEGY(bp) VOP_STRATEGY((bp)->b_vp, (bp)) diff --git a/sys/sys/devicestat.h b/sys/sys/devicestat.h index 172ff75540b1..ec91c18a0436 100644 --- a/sys/sys/devicestat.h +++ b/sys/sys/devicestat.h @@ -218,6 +218,7 @@ void devstat_end_transaction(struct devstat *ds, u_int32_t bytes, devstat_tag_type tag_type, devstat_trans_flags flags); void devstat_end_transaction_buf(struct devstat *ds, struct buf *); +void devstat_end_transaction_bio(struct devstat *ds, struct bio *); #endif #endif /* _DEVICESTAT_H */ diff --git a/sys/sys/disklabel.h b/sys/sys/disklabel.h index 5c5d7463b29b..81a5182a1c74 100644 --- a/sys/sys/disklabel.h +++ b/sys/sys/disklabel.h @@ -436,8 +436,10 @@ dkunit(dev_t dev) return (((minor(dev) >> 16) & 0x1e0) | ((minor(dev) >> 3) & 0x1f)); } +struct bio; struct buf; struct buf_queue_head; +struct bio_queue_head; int bounds_check_with_label __P((struct buf *bp, struct disklabel *lp, int wlabel)); @@ -447,6 +449,7 @@ void disksort __P((struct buf *ap, struct buf *bp)); u_int dkcksum __P((struct disklabel *lp)); char *readdisklabel __P((dev_t dev, struct disklabel *lp)); void bufqdisksort __P((struct buf_queue_head *ap, struct buf *bp)); +void bioqdisksort __P((struct bio_queue_head *ap, struct bio *bp)); int setdisklabel __P((struct disklabel *olp, struct disklabel *nlp, u_long openmask)); int writedisklabel __P((dev_t dev, struct disklabel *lp)); diff --git a/sys/sys/diskmbr.h b/sys/sys/diskmbr.h index 5c5d7463b29b..81a5182a1c74 100644 --- a/sys/sys/diskmbr.h +++ b/sys/sys/diskmbr.h @@ -436,8 +436,10 @@ dkunit(dev_t dev) return (((minor(dev) >> 16) & 0x1e0) | ((minor(dev) >> 3) & 0x1f)); } +struct bio; struct buf; struct buf_queue_head; +struct bio_queue_head; int bounds_check_with_label __P((struct buf *bp, struct disklabel *lp, int wlabel)); @@ -447,6 +449,7 @@ void disksort __P((struct buf *ap, struct buf *bp)); u_int dkcksum __P((struct disklabel *lp)); char *readdisklabel __P((dev_t dev, struct disklabel *lp)); void bufqdisksort __P((struct buf_queue_head *ap, struct buf *bp)); +void bioqdisksort __P((struct bio_queue_head *ap, struct bio *bp)); int setdisklabel __P((struct disklabel *olp, struct disklabel *nlp, u_long openmask)); int writedisklabel __P((dev_t dev, struct disklabel *lp)); diff --git a/sys/sys/diskpc98.h b/sys/sys/diskpc98.h index 5c5d7463b29b..81a5182a1c74 100644 --- a/sys/sys/diskpc98.h +++ b/sys/sys/diskpc98.h @@ -436,8 +436,10 @@ dkunit(dev_t dev) return (((minor(dev) >> 16) & 0x1e0) | ((minor(dev) >> 3) & 0x1f)); } +struct bio; struct buf; struct buf_queue_head; +struct bio_queue_head; int bounds_check_with_label __P((struct buf *bp, struct disklabel *lp, int wlabel)); @@ -447,6 +449,7 @@ void disksort __P((struct buf *ap, struct buf *bp)); u_int dkcksum __P((struct disklabel *lp)); char *readdisklabel __P((dev_t dev, struct disklabel *lp)); void bufqdisksort __P((struct buf_queue_head *ap, struct buf *bp)); +void bioqdisksort __P((struct bio_queue_head *ap, struct bio *bp)); int setdisklabel __P((struct disklabel *olp, struct disklabel *nlp, u_long openmask)); int writedisklabel __P((dev_t dev, struct disklabel *lp)); diff --git a/sys/ufs/ufs/ufs_disksubr.c b/sys/ufs/ufs/ufs_disksubr.c index fb2064a338c0..551652fdc4cf 100644 --- a/sys/ufs/ufs/ufs_disksubr.c +++ b/sys/ufs/ufs/ufs_disksubr.c @@ -160,6 +160,104 @@ bufqdisksort(bufq, bp) } +void +bioqdisksort(bioq, bp) + struct bio_queue_head *bioq; + struct bio *bp; +{ + struct bio *bq; + struct bio *bn; + struct bio *be; + + be = TAILQ_LAST(&bioq->queue, bio_queue); + /* + * If the queue is empty or we are an + * ordered transaction, then it's easy. + */ + if ((bq = bioq_first(bioq)) == NULL + || (bp->bio_flags & BIO_ORDERED) != 0) { + bioq_insert_tail(bioq, bp); + return; + } else if (bioq->insert_point != NULL) { + + /* + * A certain portion of the list is + * "locked" to preserve ordering, so + * we can only insert after the insert + * point. + */ + bq = bioq->insert_point; + } else { + + /* + * If we lie before the last removed (currently active) + * request, and are not inserting ourselves into the + * "locked" portion of the list, then we must add ourselves + * to the second request list. + */ + if (bp->bio_pblkno < bioq->last_pblkno) { + + bq = bioq->switch_point; + /* + * If we are starting a new secondary list, + * then it's easy. + */ + if (bq == NULL) { + bioq->switch_point = bp; + bioq_insert_tail(bioq, bp); + return; + } + /* + * If we lie ahead of the current switch point, + * insert us before the switch point and move + * the switch point. + */ + if (bp->bio_pblkno < bq->bio_pblkno) { + bioq->switch_point = bp; + TAILQ_INSERT_BEFORE(bq, bp, bio_queue); + return; + } + } else { + if (bioq->switch_point != NULL) + be = TAILQ_PREV(bioq->switch_point, + bio_queue, bio_queue); + /* + * If we lie between last_pblkno and bq, + * insert before bq. + */ + if (bp->bio_pblkno < bq->bio_pblkno) { + TAILQ_INSERT_BEFORE(bq, bp, bio_queue); + return; + } + } + } + + /* + * Request is at/after our current position in the list. + * Optimize for sequential I/O by seeing if we go at the tail. + */ + if (bp->bio_pblkno > be->bio_pblkno) { + TAILQ_INSERT_AFTER(&bioq->queue, be, bp, bio_queue); + return; + } + + /* Otherwise, insertion sort */ + while ((bn = TAILQ_NEXT(bq, bio_queue)) != NULL) { + + /* + * We want to go after the current request if it is the end + * of the first request list, or if the next request is a + * larger cylinder than our request. + */ + if (bn == bioq->switch_point + || bp->bio_pblkno < bn->bio_pblkno) + break; + bq = bn; + } + TAILQ_INSERT_AFTER(&bioq->queue, bq, bp, bio_queue); +} + + /* * Attempt to read a disk label from a device using the indicated strategy * routine. The label must be partly set up before this: secpercyl, secsize