Xen project Mailing List

[Xen-devel] [vtpm v3 02/12] add posix io for blkfront

To: xen-devel@xxxxxxxxxxxxx, ian.campbell@xxxxxxxxxx, samuel.thibault@xxxxxxxxxxxx

From: Matthew Fioravante <matthew.fioravante@xxxxxxxxxx>

Date: Mon, 8 Oct 2012 13:42:33 -0400

Cc: Matthew Fioravante <matthew.fioravante@xxxxxxxxxx>

Delivery-date: Mon, 08 Oct 2012 17:43:46 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

This patch adds posix io support (read,write,lseek) to block devices using blkfront. Signed-off-by: Matthew Fioravante <matthew.fioravante@xxxxxxxxxx> Acked-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx> diff --git a/extras/mini-os/blkfront.c b/extras/mini-os/blkfront.c index 74b8b26..f4283a9 100644 --- a/extras/mini-os/blkfront.c +++ b/extras/mini-os/blkfront.c @@ -392,6 +392,7 @@ void blkfront_aio(struct blkfront_aiocb *aiocbp, int write) static void blkfront_aio_cb(struct blkfront_aiocb *aiocbp, int ret) { aiocbp->data = (void*) 1; + aiocbp->aio_cb = NULL; } void blkfront_io(struct blkfront_aiocb *aiocbp, int write) @@ -547,9 +548,177 @@ moretodo: #ifdef HAVE_LIBC int blkfront_open(struct blkfront_dev *dev) { + /* Silently prevent multiple opens */ + if(dev->fd != -1) { + return dev->fd; + } dev->fd = alloc_fd(FTYPE_BLK); printk("blk_open(%s) -> %d\n", dev->nodename, dev->fd); files[dev->fd].blk.dev = dev; + files[dev->fd].blk.offset = 0; return dev->fd; } + +int blkfront_posix_rwop(int fd, uint8_t* buf, size_t count, int write) +{ + struct blkfront_dev* dev = files[fd].blk.dev; + off_t offset = files[fd].blk.offset; + struct blkfront_aiocb aiocb; + unsigned long long disksize = dev->info.sectors * dev->info.sector_size; + unsigned int blocksize = dev->info.sector_size; + + int blknum; + int blkoff; + size_t bytes; + int rc = 0; + int alignedbuf = 0; + uint8_t* copybuf = NULL; + + /* RW 0 bytes is just a NOP */ + if(count == 0) { + return 0; + } + /* Check for NULL buffer */ + if( buf == NULL ) { + errno = EFAULT; + return -1; + } + + /* Write mode checks */ + if(write) { + /*Make sure we have write permission */ + if(dev->info.info & VDISK_READONLY + || (dev->info.mode != O_RDWR && dev->info.mode != O_WRONLY)) { + errno = EACCES; + return -1; + } + /*Make sure disk is big enough for this write */ + if(offset + count > disksize) { + errno = ENOSPC; + return -1; + } + } + /* Read mode checks */ + else + { + /* Reading past the disk? Just return 0 */ + if(offset >= disksize) { + return 0; + } + + /*If the requested read is bigger than the disk, just + * read as much as we can until the end */ + if(offset + count > disksize) { + count = disksize - offset; + } + } + /* Determine which block to start at and at which offset inside of it */ + blknum = offset / blocksize; + blkoff = offset % blocksize; + + /* Optimization: We need to check if buf is aligned to the sector size. + * This is somewhat tricky code. We have to add the blocksize - block offset + * because the first block may be a partial block and then for every subsequent + * block rw the buffer will be offset.*/ + if(!((uintptr_t) (buf +(blocksize - blkoff)) & (dev->info.sector_size-1))) { + alignedbuf = 1; + } + + /* Setup aiocb block object */ + aiocb.aio_dev = dev; + aiocb.aio_offset = blknum * blocksize; + aiocb.aio_cb = NULL; + aiocb.data = NULL; + + /* If our buffer is unaligned or its aligned but we will need to rw a partial block + * then a copy will have to be done */ + if(!alignedbuf || blkoff != 0 || count % blocksize != 0) { + copybuf = _xmalloc(blocksize, dev->info.sector_size); + } + + rc = count; + while(count > 0) { + /* determine how many bytes to read/write from/to the current block buffer */ + if(!alignedbuf || blkoff != 0 || count < blocksize) { + /* This is the case for unaligned R/W or partial block */ + bytes = count < blocksize - blkoff ? count : blocksize - blkoff; + aiocb.aio_nbytes = blocksize; + } else { + /* We can optimize further if buffer is page aligned */ + int not_page_aligned = 0; + if(((uintptr_t)buf) & (PAGE_SIZE -1)) { + not_page_aligned = 1; + } + + /* For an aligned R/W we can read up to the maximum transfer size */ + bytes = count > (BLKIF_MAX_SEGMENTS_PER_REQUEST-not_page_aligned)*PAGE_SIZE + ? (BLKIF_MAX_SEGMENTS_PER_REQUEST-not_page_aligned)*PAGE_SIZE + : count & ~(blocksize -1); + aiocb.aio_nbytes = bytes; + } + + /* read operation */ + if(!write) { + if (alignedbuf && bytes >= blocksize) { + /* If aligned and were reading a whole block, just read right into buf */ + aiocb.aio_buf = buf; + blkfront_read(&aiocb); + } else { + /* If not then we have to do a copy */ + aiocb.aio_buf = copybuf; + blkfront_read(&aiocb); + memcpy(buf, &copybuf[blkoff], bytes); + } + } + /* Write operation */ + else { + if(alignedbuf && bytes >= blocksize) { + /* If aligned and were writing a whole block, just write directly from buf */ + aiocb.aio_buf = buf; + blkfront_write(&aiocb); + } else { + /* If not then we have to do a copy. */ + aiocb.aio_buf = copybuf; + /* If we're writing a partial block, we need to read the current contents first + * so we don't overwrite the extra bits with garbage */ + if(blkoff != 0 || bytes < blocksize) { + blkfront_read(&aiocb); + } + memcpy(&copybuf[blkoff], buf, bytes); + blkfront_write(&aiocb); + } + } + /* Will start at beginning of all remaining blocks */ + blkoff = 0; + + /* Increment counters and continue */ + count -= bytes; + buf += bytes; + if(bytes < blocksize) { + //At minimum we read one block + aiocb.aio_offset += blocksize; + } else { + //If we read more than a block, was a multiple of blocksize + aiocb.aio_offset += bytes; + } + } + + free(copybuf); + files[fd].blk.offset += rc; + return rc; + +} + +int blkfront_posix_fstat(int fd, struct stat* buf) +{ + struct blkfront_dev* dev = files[fd].blk.dev; + + buf->st_mode = dev->info.mode; + buf->st_uid = 0; + buf->st_gid = 0; + buf->st_size = dev->info.sectors * dev->info.sector_size; + buf->st_atime = buf->st_mtime = buf->st_ctime = time(NULL); + + return 0; +} #endif diff --git a/extras/mini-os/include/blkfront.h b/extras/mini-os/include/blkfront.h index 724137e..3528af9 100644 --- a/extras/mini-os/include/blkfront.h +++ b/extras/mini-os/include/blkfront.h @@ -28,7 +28,17 @@ struct blkfront_info }; struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info); #ifdef HAVE_LIBC +#include <sys/stat.h> +/* POSIX IO functions: + * use blkfront_open() to get a file descriptor to the block device + * Don't use the other blkfront posix functions here directly, instead use + * read(), write(), lseek() and fstat() on the file descriptor + */ int blkfront_open(struct blkfront_dev *dev); +int blkfront_posix_rwop(int fd, uint8_t* buf, size_t count, int write); +#define blkfront_posix_write(fd, buf, count) blkfront_posix_rwop(fd, (uint8_t*)buf, count, 1) +#define blkfront_posix_read(fd, buf, count) blkfront_posix_rwop(fd, (uint8_t*)buf, count, 0) +int blkfront_posix_fstat(int fd, struct stat* buf); #endif void blkfront_aio(struct blkfront_aiocb *aiocbp, int write); #define blkfront_aio_read(aiocbp) blkfront_aio(aiocbp, 0) diff --git a/extras/mini-os/include/lib.h b/extras/mini-os/include/lib.h index 1af2717..d4641b6 100644 --- a/extras/mini-os/include/lib.h +++ b/extras/mini-os/include/lib.h @@ -174,6 +174,7 @@ extern struct file { } tap; struct { struct blkfront_dev *dev; + off_t offset; } blk; struct { struct kbdfront_dev *dev; diff --git a/extras/mini-os/lib/sys.c b/extras/mini-os/lib/sys.c index a7d35d6..7ddbbf8 100644 --- a/extras/mini-os/lib/sys.c +++ b/extras/mini-os/lib/sys.c @@ -289,6 +289,11 @@ int read(int fd, void *buf, size_t nbytes) return ret * sizeof(union xenfb_in_event); } #endif +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: { + return blkfront_posix_read(fd, buf, nbytes); + } +#endif default: break; } @@ -321,6 +326,10 @@ int write(int fd, const void *buf, size_t nbytes) netfront_xmit(files[fd].tap.dev, (void*) buf, nbytes); return nbytes; #endif +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: + return blkfront_posix_write(fd, buf, nbytes); +#endif default: break; } @@ -331,8 +340,37 @@ int write(int fd, const void *buf, size_t nbytes) off_t lseek(int fd, off_t offset, int whence) { - errno = ESPIPE; - return (off_t) -1; + switch(files[fd].type) { +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: + switch (whence) { + case SEEK_SET: + files[fd].file.offset = offset; + break; + case SEEK_CUR: + files[fd].file.offset += offset; + break; + case SEEK_END: + { + struct stat st; + int ret; + ret = fstat(fd, &st); + if (ret) + return -1; + files[fd].file.offset = st.st_size + offset; + break; + } + default: + errno = EINVAL; + return -1; + } + return files[fd].file.offset; + break; +#endif + default: /* Not implemented on this FTYPE */ + errno = ESPIPE; + return (off_t) -1; + } } int fsync(int fd) { @@ -445,6 +483,10 @@ int fstat(int fd, struct stat *buf) buf->st_ctime = time(NULL); return 0; } +#ifdef CONFIG_BLKFRONT + case FTYPE_BLK: + return blkfront_posix_fstat(fd, buf); +#endif default: break; } -- 1.7.4.4 _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.