[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH vtpm v3 02/12] add posix io for blkfront



Matthew Fioravante, le Mon 08 Oct 2012 15:06:42 -0400, a écrit :
> This patch adds posix io support (read,write,lseek) to block devices
> using blkfront.
> 
> Signed-off-by: Matthew Fioravante <matthew.fioravante@xxxxxxxxxx>
> Acked-by: Samuel Thibault <samuel.thibault@xxxxxxxxxxxxx>

Confirmed on this version.

> diff --git a/extras/mini-os/blkfront.c b/extras/mini-os/blkfront.c
> index 74b8b26..f4283a9 100644
> --- a/extras/mini-os/blkfront.c
> +++ b/extras/mini-os/blkfront.c
> @@ -392,6 +392,7 @@ void blkfront_aio(struct blkfront_aiocb *aiocbp, int 
> write)
>  static void blkfront_aio_cb(struct blkfront_aiocb *aiocbp, int ret)
>  {
>      aiocbp->data = (void*) 1;
> +    aiocbp->aio_cb = NULL;
>  }
>  
>  void blkfront_io(struct blkfront_aiocb *aiocbp, int write)
> @@ -547,9 +548,177 @@ moretodo:
>  #ifdef HAVE_LIBC
>  int blkfront_open(struct blkfront_dev *dev)
>  {
> +    /* Silently prevent multiple opens */
> +    if(dev->fd != -1) {
> +       return dev->fd;
> +    }
>      dev->fd = alloc_fd(FTYPE_BLK);
>      printk("blk_open(%s) -> %d\n", dev->nodename, dev->fd);
>      files[dev->fd].blk.dev = dev;
> +    files[dev->fd].blk.offset = 0;
>      return dev->fd;
>  }
> +
> +int blkfront_posix_rwop(int fd, uint8_t* buf, size_t count, int write)
> +{
> +   struct blkfront_dev* dev = files[fd].blk.dev;
> +   off_t offset = files[fd].blk.offset;
> +   struct blkfront_aiocb aiocb;
> +   unsigned long long disksize = dev->info.sectors * dev->info.sector_size;
> +   unsigned int blocksize = dev->info.sector_size;
> +
> +   int blknum;
> +   int blkoff;
> +   size_t bytes;
> +   int rc = 0;
> +   int alignedbuf = 0;
> +   uint8_t* copybuf = NULL;
> +
> +   /* RW 0 bytes is just a NOP */
> +   if(count == 0) {
> +      return 0;
> +   }
> +   /* Check for NULL buffer */
> +   if( buf == NULL ) {
> +      errno = EFAULT;
> +      return -1;
> +   }
> +
> +   /* Write mode checks */
> +   if(write) {
> +      /*Make sure we have write permission */
> +      if(dev->info.info & VDISK_READONLY 
> +            || (dev->info.mode != O_RDWR  && dev->info.mode !=  O_WRONLY)) {
> +         errno = EACCES;
> +         return -1;
> +      }
> +      /*Make sure disk is big enough for this write */
> +      if(offset + count > disksize) {
> +         errno = ENOSPC;
> +         return -1;
> +      }
> +   }
> +   /* Read mode checks */
> +   else
> +   {
> +      /* Reading past the disk? Just return 0 */
> +      if(offset >= disksize) {
> +         return 0;
> +      }
> +
> +      /*If the requested read is bigger than the disk, just
> +       * read as much as we can until the end */
> +      if(offset + count > disksize) {
> +         count = disksize - offset;
> +      }
> +   }
> +   /* Determine which block to start at and at which offset inside of it */
> +   blknum = offset / blocksize;
> +   blkoff = offset % blocksize;
> +
> +   /* Optimization: We need to check if buf is aligned to the sector size.
> +    * This is somewhat tricky code. We have to add the blocksize - block 
> offset
> +    * because the first block may be a partial block and then for every 
> subsequent
> +    * block rw the buffer will be offset.*/
> +   if(!((uintptr_t) (buf +(blocksize -  blkoff)) & 
> (dev->info.sector_size-1))) {
> +      alignedbuf = 1;
> +   }
> +
> +   /* Setup aiocb block object */
> +   aiocb.aio_dev = dev;
> +   aiocb.aio_offset = blknum * blocksize;
> +   aiocb.aio_cb = NULL;
> +   aiocb.data = NULL;
> +
> +   /* If our buffer is unaligned or its aligned but we will need to rw a 
> partial block
> +    * then a copy will have to be done */
> +   if(!alignedbuf || blkoff != 0 || count % blocksize != 0) {
> +      copybuf = _xmalloc(blocksize, dev->info.sector_size);
> +   }
> +
> +   rc = count;
> +   while(count > 0) {
> +      /* determine how many bytes to read/write from/to the current block 
> buffer */
> +      if(!alignedbuf || blkoff != 0 || count < blocksize) {
> +         /* This is the case for unaligned R/W or partial block */
> +         bytes = count < blocksize - blkoff ? count : blocksize - blkoff;
> +         aiocb.aio_nbytes = blocksize;
> +      } else {
> +         /* We can optimize further if buffer is page aligned */
> +         int not_page_aligned = 0;
> +         if(((uintptr_t)buf) & (PAGE_SIZE -1)) {
> +            not_page_aligned = 1;
> +         }
> +
> +         /* For an aligned R/W we can read up to the maximum transfer size */
> +         bytes = count > 
> (BLKIF_MAX_SEGMENTS_PER_REQUEST-not_page_aligned)*PAGE_SIZE 
> +            ? (BLKIF_MAX_SEGMENTS_PER_REQUEST-not_page_aligned)*PAGE_SIZE
> +            : count & ~(blocksize -1);
> +         aiocb.aio_nbytes = bytes;
> +      }
> +
> +      /* read operation */
> +      if(!write) {
> +         if (alignedbuf && bytes >= blocksize) {
> +            /* If aligned and were reading a whole block, just read right 
> into buf */
> +            aiocb.aio_buf = buf;
> +            blkfront_read(&aiocb);
> +         } else {
> +            /* If not then we have to do a copy */
> +            aiocb.aio_buf = copybuf;
> +            blkfront_read(&aiocb);
> +            memcpy(buf, &copybuf[blkoff], bytes);
> +         }
> +      }
> +      /* Write operation */
> +      else {
> +         if(alignedbuf && bytes >= blocksize) {
> +            /* If aligned and were writing a whole block, just write 
> directly from buf */
> +            aiocb.aio_buf = buf;
> +            blkfront_write(&aiocb);
> +         } else {
> +            /* If not then we have to do a copy. */
> +            aiocb.aio_buf = copybuf;
> +            /* If we're writing a partial block, we need to read the current 
> contents first
> +             * so we don't overwrite the extra bits with garbage */
> +            if(blkoff != 0 || bytes < blocksize) {
> +               blkfront_read(&aiocb);
> +            }
> +            memcpy(&copybuf[blkoff], buf, bytes);
> +            blkfront_write(&aiocb);
> +         }
> +      }
> +      /* Will start at beginning of all remaining blocks */
> +      blkoff = 0;
> +
> +      /* Increment counters and continue */
> +      count -= bytes;
> +      buf += bytes;
> +      if(bytes < blocksize) {
> +         //At minimum we read one block
> +         aiocb.aio_offset += blocksize;
> +      } else {
> +         //If we read more than a block, was a multiple of blocksize
> +         aiocb.aio_offset += bytes;
> +      }
> +   }
> +
> +   free(copybuf);
> +   files[fd].blk.offset += rc;
> +   return rc;
> +
> +}
> +
> +int blkfront_posix_fstat(int fd, struct stat* buf)
> +{
> +   struct blkfront_dev* dev = files[fd].blk.dev;
> +
> +   buf->st_mode = dev->info.mode;
> +   buf->st_uid = 0;
> +   buf->st_gid = 0;
> +   buf->st_size = dev->info.sectors * dev->info.sector_size;
> +   buf->st_atime = buf->st_mtime = buf->st_ctime = time(NULL);
> +
> +   return 0;
> +}
>  #endif
> diff --git a/extras/mini-os/include/blkfront.h 
> b/extras/mini-os/include/blkfront.h
> index 724137e..3528af9 100644
> --- a/extras/mini-os/include/blkfront.h
> +++ b/extras/mini-os/include/blkfront.h
> @@ -28,7 +28,17 @@ struct blkfront_info
>  };
>  struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info 
> *info);
>  #ifdef HAVE_LIBC
> +#include <sys/stat.h>
> +/* POSIX IO functions:
> + * use blkfront_open() to get a file descriptor to the block device
> + * Don't use the other blkfront posix functions here directly, instead use
> + * read(), write(), lseek() and fstat() on the file descriptor
> + */
>  int blkfront_open(struct blkfront_dev *dev);
> +int blkfront_posix_rwop(int fd, uint8_t* buf, size_t count, int write);
> +#define blkfront_posix_write(fd, buf, count) blkfront_posix_rwop(fd, 
> (uint8_t*)buf, count, 1)
> +#define blkfront_posix_read(fd, buf, count) blkfront_posix_rwop(fd, 
> (uint8_t*)buf, count, 0)
> +int blkfront_posix_fstat(int fd, struct stat* buf);
>  #endif
>  void blkfront_aio(struct blkfront_aiocb *aiocbp, int write);
>  #define blkfront_aio_read(aiocbp) blkfront_aio(aiocbp, 0)
> diff --git a/extras/mini-os/include/lib.h b/extras/mini-os/include/lib.h
> index 1af2717..d4641b6 100644
> --- a/extras/mini-os/include/lib.h
> +++ b/extras/mini-os/include/lib.h
> @@ -174,6 +174,7 @@ extern struct file {
>       } tap;
>       struct {
>           struct blkfront_dev *dev;
> +            off_t offset;
>       } blk;
>       struct {
>           struct kbdfront_dev *dev;
> diff --git a/extras/mini-os/lib/sys.c b/extras/mini-os/lib/sys.c
> index a7d35d6..7ddbbf8 100644
> --- a/extras/mini-os/lib/sys.c
> +++ b/extras/mini-os/lib/sys.c
> @@ -289,6 +289,11 @@ int read(int fd, void *buf, size_t nbytes)
>           return ret * sizeof(union xenfb_in_event);
>          }
>  #endif
> +#ifdef CONFIG_BLKFRONT
> +        case FTYPE_BLK: {
> +         return blkfront_posix_read(fd, buf, nbytes);
> +        }
> +#endif
>       default:
>           break;
>      }
> @@ -321,6 +326,10 @@ int write(int fd, const void *buf, size_t nbytes)
>           netfront_xmit(files[fd].tap.dev, (void*) buf, nbytes);
>           return nbytes;
>  #endif
> +#ifdef CONFIG_BLKFRONT
> +     case FTYPE_BLK:
> +         return blkfront_posix_write(fd, buf, nbytes);
> +#endif
>       default:
>           break;
>      }
> @@ -331,8 +340,37 @@ int write(int fd, const void *buf, size_t nbytes)
>  
>  off_t lseek(int fd, off_t offset, int whence)
>  {
> -    errno = ESPIPE;
> -    return (off_t) -1;
> +    switch(files[fd].type) {
> +#ifdef CONFIG_BLKFRONT
> +       case FTYPE_BLK:
> +       switch (whence) {
> +          case SEEK_SET:
> +             files[fd].file.offset = offset;
> +             break;
> +          case SEEK_CUR:
> +             files[fd].file.offset += offset;
> +             break;
> +          case SEEK_END:
> +             {
> +                struct stat st;
> +                int ret;
> +                ret = fstat(fd, &st);
> +                if (ret)
> +                   return -1;
> +                files[fd].file.offset = st.st_size + offset;
> +                break;
> +             }
> +          default:
> +             errno = EINVAL;
> +             return -1;
> +       }
> +       return files[fd].file.offset;
> +       break;
> +#endif
> +       default: /* Not implemented on this FTYPE */
> +       errno = ESPIPE;
> +       return (off_t) -1;
> +    }
>  }
>  
>  int fsync(int fd) {
> @@ -445,6 +483,10 @@ int fstat(int fd, struct stat *buf)
>           buf->st_ctime = time(NULL);
>           return 0;
>       }
> +#ifdef CONFIG_BLKFRONT
> +     case FTYPE_BLK:
> +        return blkfront_posix_fstat(fd, buf);
> +#endif
>       default:
>           break;
>      }
> -- 
> 1.7.4.4
> 

-- 
Samuel
 PS> Salut ! J'ai un sujet de philo à vous soumettre : "Suffit-il
 PS> d'observer pour connaître" Idées + plan Merçi
 Oui, ya qu'a t'observer pour connaître le fait que tu es une feignasse.
 -+- FF in: Guide du Neuneu d'Usenet - Neuneu fait de la philo -+-

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.