Mailing List Archive

[PATCH vtpm v3 02/12] add posix io for blkfront
This patch adds posix io support (read,write,lseek) to block devices
using blkfront.

Signed-off-by: Matthew Fioravante <matthew.fioravante@jhuapl.edu>
Acked-by: Samuel Thibault <samuel.thibault@ens-lyons.org>

diff --git a/extras/mini-os/blkfront.c b/extras/mini-os/blkfront.c
index 74b8b26..f4283a9 100644
--- a/extras/mini-os/blkfront.c
+++ b/extras/mini-os/blkfront.c
@@ -392,6 +392,7 @@ void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
static void blkfront_aio_cb(struct blkfront_aiocb *aiocbp, int ret)
{
aiocbp->data = (void*) 1;
+ aiocbp->aio_cb = NULL;
}

void blkfront_io(struct blkfront_aiocb *aiocbp, int write)
@@ -547,9 +548,177 @@ moretodo:
#ifdef HAVE_LIBC
int blkfront_open(struct blkfront_dev *dev)
{
+ /* Silently prevent multiple opens */
+ if(dev->fd != -1) {
+ return dev->fd;
+ }
dev->fd = alloc_fd(FTYPE_BLK);
printk("blk_open(%s) -> %d\n", dev->nodename, dev->fd);
files[dev->fd].blk.dev = dev;
+ files[dev->fd].blk.offset = 0;
return dev->fd;
}
+
+int blkfront_posix_rwop(int fd, uint8_t* buf, size_t count, int write)
+{
+ struct blkfront_dev* dev = files[fd].blk.dev;
+ off_t offset = files[fd].blk.offset;
+ struct blkfront_aiocb aiocb;
+ unsigned long long disksize = dev->info.sectors * dev->info.sector_size;
+ unsigned int blocksize = dev->info.sector_size;
+
+ int blknum;
+ int blkoff;
+ size_t bytes;
+ int rc = 0;
+ int alignedbuf = 0;
+ uint8_t* copybuf = NULL;
+
+ /* RW 0 bytes is just a NOP */
+ if(count == 0) {
+ return 0;
+ }
+ /* Check for NULL buffer */
+ if( buf == NULL ) {
+ errno = EFAULT;
+ return -1;
+ }
+
+ /* Write mode checks */
+ if(write) {
+ /*Make sure we have write permission */
+ if(dev->info.info & VDISK_READONLY
+ || (dev->info.mode != O_RDWR && dev->info.mode != O_WRONLY)) {
+ errno = EACCES;
+ return -1;
+ }
+ /*Make sure disk is big enough for this write */
+ if(offset + count > disksize) {
+ errno = ENOSPC;
+ return -1;
+ }
+ }
+ /* Read mode checks */
+ else
+ {
+ /* Reading past the disk? Just return 0 */
+ if(offset >= disksize) {
+ return 0;
+ }
+
+ /*If the requested read is bigger than the disk, just
+ * read as much as we can until the end */
+ if(offset + count > disksize) {
+ count = disksize - offset;
+ }
+ }
+ /* Determine which block to start at and at which offset inside of it */
+ blknum = offset / blocksize;
+ blkoff = offset % blocksize;
+
+ /* Optimization: We need to check if buf is aligned to the sector size.
+ * This is somewhat tricky code. We have to add the blocksize - block offset
+ * because the first block may be a partial block and then for every subsequent
+ * block rw the buffer will be offset.*/
+ if(!((uintptr_t) (buf +(blocksize - blkoff)) & (dev->info.sector_size-1))) {
+ alignedbuf = 1;
+ }
+
+ /* Setup aiocb block object */
+ aiocb.aio_dev = dev;
+ aiocb.aio_offset = blknum * blocksize;
+ aiocb.aio_cb = NULL;
+ aiocb.data = NULL;
+
+ /* If our buffer is unaligned or its aligned but we will need to rw a partial block
+ * then a copy will have to be done */
+ if(!alignedbuf || blkoff != 0 || count % blocksize != 0) {
+ copybuf = _xmalloc(blocksize, dev->info.sector_size);
+ }
+
+ rc = count;
+ while(count > 0) {
+ /* determine how many bytes to read/write from/to the current block buffer */
+ if(!alignedbuf || blkoff != 0 || count < blocksize) {
+ /* This is the case for unaligned R/W or partial block */
+ bytes = count < blocksize - blkoff ? count : blocksize - blkoff;
+ aiocb.aio_nbytes = blocksize;
+ } else {
+ /* We can optimize further if buffer is page aligned */
+ int not_page_aligned = 0;
+ if(((uintptr_t)buf) & (PAGE_SIZE -1)) {
+ not_page_aligned = 1;
+ }
+
+ /* For an aligned R/W we can read up to the maximum transfer size */
+ bytes = count > (BLKIF_MAX_SEGMENTS_PER_REQUEST-not_page_aligned)*PAGE_SIZE
+ ? (BLKIF_MAX_SEGMENTS_PER_REQUEST-not_page_aligned)*PAGE_SIZE
+ : count & ~(blocksize -1);
+ aiocb.aio_nbytes = bytes;
+ }
+
+ /* read operation */
+ if(!write) {
+ if (alignedbuf && bytes >= blocksize) {
+ /* If aligned and were reading a whole block, just read right into buf */
+ aiocb.aio_buf = buf;
+ blkfront_read(&aiocb);
+ } else {
+ /* If not then we have to do a copy */
+ aiocb.aio_buf = copybuf;
+ blkfront_read(&aiocb);
+ memcpy(buf, &copybuf[blkoff], bytes);
+ }
+ }
+ /* Write operation */
+ else {
+ if(alignedbuf && bytes >= blocksize) {
+ /* If aligned and were writing a whole block, just write directly from buf */
+ aiocb.aio_buf = buf;
+ blkfront_write(&aiocb);
+ } else {
+ /* If not then we have to do a copy. */
+ aiocb.aio_buf = copybuf;
+ /* If we're writing a partial block, we need to read the current contents first
+ * so we don't overwrite the extra bits with garbage */
+ if(blkoff != 0 || bytes < blocksize) {
+ blkfront_read(&aiocb);
+ }
+ memcpy(&copybuf[blkoff], buf, bytes);
+ blkfront_write(&aiocb);
+ }
+ }
+ /* Will start at beginning of all remaining blocks */
+ blkoff = 0;
+
+ /* Increment counters and continue */
+ count -= bytes;
+ buf += bytes;
+ if(bytes < blocksize) {
+ //At minimum we read one block
+ aiocb.aio_offset += blocksize;
+ } else {
+ //If we read more than a block, was a multiple of blocksize
+ aiocb.aio_offset += bytes;
+ }
+ }
+
+ free(copybuf);
+ files[fd].blk.offset += rc;
+ return rc;
+
+}
+
+int blkfront_posix_fstat(int fd, struct stat* buf)
+{
+ struct blkfront_dev* dev = files[fd].blk.dev;
+
+ buf->st_mode = dev->info.mode;
+ buf->st_uid = 0;
+ buf->st_gid = 0;
+ buf->st_size = dev->info.sectors * dev->info.sector_size;
+ buf->st_atime = buf->st_mtime = buf->st_ctime = time(NULL);
+
+ return 0;
+}
#endif
diff --git a/extras/mini-os/include/blkfront.h b/extras/mini-os/include/blkfront.h
index 724137e..3528af9 100644
--- a/extras/mini-os/include/blkfront.h
+++ b/extras/mini-os/include/blkfront.h
@@ -28,7 +28,17 @@ struct blkfront_info
};
struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info);
#ifdef HAVE_LIBC
+#include <sys/stat.h>
+/* POSIX IO functions:
+ * use blkfront_open() to get a file descriptor to the block device
+ * Don't use the other blkfront posix functions here directly, instead use
+ * read(), write(), lseek() and fstat() on the file descriptor
+ */
int blkfront_open(struct blkfront_dev *dev);
+int blkfront_posix_rwop(int fd, uint8_t* buf, size_t count, int write);
+#define blkfront_posix_write(fd, buf, count) blkfront_posix_rwop(fd, (uint8_t*)buf, count, 1)
+#define blkfront_posix_read(fd, buf, count) blkfront_posix_rwop(fd, (uint8_t*)buf, count, 0)
+int blkfront_posix_fstat(int fd, struct stat* buf);
#endif
void blkfront_aio(struct blkfront_aiocb *aiocbp, int write);
#define blkfront_aio_read(aiocbp) blkfront_aio(aiocbp, 0)
diff --git a/extras/mini-os/include/lib.h b/extras/mini-os/include/lib.h
index 1af2717..d4641b6 100644
--- a/extras/mini-os/include/lib.h
+++ b/extras/mini-os/include/lib.h
@@ -174,6 +174,7 @@ extern struct file {
} tap;
struct {
struct blkfront_dev *dev;
+ off_t offset;
} blk;
struct {
struct kbdfront_dev *dev;
diff --git a/extras/mini-os/lib/sys.c b/extras/mini-os/lib/sys.c
index a7d35d6..7ddbbf8 100644
--- a/extras/mini-os/lib/sys.c
+++ b/extras/mini-os/lib/sys.c
@@ -289,6 +289,11 @@ int read(int fd, void *buf, size_t nbytes)
return ret * sizeof(union xenfb_in_event);
}
#endif
+#ifdef CONFIG_BLKFRONT
+ case FTYPE_BLK: {
+ return blkfront_posix_read(fd, buf, nbytes);
+ }
+#endif
default:
break;
}
@@ -321,6 +326,10 @@ int write(int fd, const void *buf, size_t nbytes)
netfront_xmit(files[fd].tap.dev, (void*) buf, nbytes);
return nbytes;
#endif
+#ifdef CONFIG_BLKFRONT
+ case FTYPE_BLK:
+ return blkfront_posix_write(fd, buf, nbytes);
+#endif
default:
break;
}
@@ -331,8 +340,37 @@ int write(int fd, const void *buf, size_t nbytes)

off_t lseek(int fd, off_t offset, int whence)
{
- errno = ESPIPE;
- return (off_t) -1;
+ switch(files[fd].type) {
+#ifdef CONFIG_BLKFRONT
+ case FTYPE_BLK:
+ switch (whence) {
+ case SEEK_SET:
+ files[fd].file.offset = offset;
+ break;
+ case SEEK_CUR:
+ files[fd].file.offset += offset;
+ break;
+ case SEEK_END:
+ {
+ struct stat st;
+ int ret;
+ ret = fstat(fd, &st);
+ if (ret)
+ return -1;
+ files[fd].file.offset = st.st_size + offset;
+ break;
+ }
+ default:
+ errno = EINVAL;
+ return -1;
+ }
+ return files[fd].file.offset;
+ break;
+#endif
+ default: /* Not implemented on this FTYPE */
+ errno = ESPIPE;
+ return (off_t) -1;
+ }
}

int fsync(int fd) {
@@ -445,6 +483,10 @@ int fstat(int fd, struct stat *buf)
buf->st_ctime = time(NULL);
return 0;
}
+#ifdef CONFIG_BLKFRONT
+ case FTYPE_BLK:
+ return blkfront_posix_fstat(fd, buf);
+#endif
default:
break;
}
--
1.7.4.4


_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [PATCH vtpm v3 02/12] add posix io for blkfront [ In reply to ]
Matthew Fioravante, le Mon 08 Oct 2012 15:06:42 -0400, a écrit :
> This patch adds posix io support (read,write,lseek) to block devices
> using blkfront.
>
> Signed-off-by: Matthew Fioravante <matthew.fioravante@jhuapl.edu>
> Acked-by: Samuel Thibault <samuel.thibault@ens-lyons.org>

Confirmed on this version.

> diff --git a/extras/mini-os/blkfront.c b/extras/mini-os/blkfront.c
> index 74b8b26..f4283a9 100644
> --- a/extras/mini-os/blkfront.c
> +++ b/extras/mini-os/blkfront.c
> @@ -392,6 +392,7 @@ void blkfront_aio(struct blkfront_aiocb *aiocbp, int write)
> static void blkfront_aio_cb(struct blkfront_aiocb *aiocbp, int ret)
> {
> aiocbp->data = (void*) 1;
> + aiocbp->aio_cb = NULL;
> }
>
> void blkfront_io(struct blkfront_aiocb *aiocbp, int write)
> @@ -547,9 +548,177 @@ moretodo:
> #ifdef HAVE_LIBC
> int blkfront_open(struct blkfront_dev *dev)
> {
> + /* Silently prevent multiple opens */
> + if(dev->fd != -1) {
> + return dev->fd;
> + }
> dev->fd = alloc_fd(FTYPE_BLK);
> printk("blk_open(%s) -> %d\n", dev->nodename, dev->fd);
> files[dev->fd].blk.dev = dev;
> + files[dev->fd].blk.offset = 0;
> return dev->fd;
> }
> +
> +int blkfront_posix_rwop(int fd, uint8_t* buf, size_t count, int write)
> +{
> + struct blkfront_dev* dev = files[fd].blk.dev;
> + off_t offset = files[fd].blk.offset;
> + struct blkfront_aiocb aiocb;
> + unsigned long long disksize = dev->info.sectors * dev->info.sector_size;
> + unsigned int blocksize = dev->info.sector_size;
> +
> + int blknum;
> + int blkoff;
> + size_t bytes;
> + int rc = 0;
> + int alignedbuf = 0;
> + uint8_t* copybuf = NULL;
> +
> + /* RW 0 bytes is just a NOP */
> + if(count == 0) {
> + return 0;
> + }
> + /* Check for NULL buffer */
> + if( buf == NULL ) {
> + errno = EFAULT;
> + return -1;
> + }
> +
> + /* Write mode checks */
> + if(write) {
> + /*Make sure we have write permission */
> + if(dev->info.info & VDISK_READONLY
> + || (dev->info.mode != O_RDWR && dev->info.mode != O_WRONLY)) {
> + errno = EACCES;
> + return -1;
> + }
> + /*Make sure disk is big enough for this write */
> + if(offset + count > disksize) {
> + errno = ENOSPC;
> + return -1;
> + }
> + }
> + /* Read mode checks */
> + else
> + {
> + /* Reading past the disk? Just return 0 */
> + if(offset >= disksize) {
> + return 0;
> + }
> +
> + /*If the requested read is bigger than the disk, just
> + * read as much as we can until the end */
> + if(offset + count > disksize) {
> + count = disksize - offset;
> + }
> + }
> + /* Determine which block to start at and at which offset inside of it */
> + blknum = offset / blocksize;
> + blkoff = offset % blocksize;
> +
> + /* Optimization: We need to check if buf is aligned to the sector size.
> + * This is somewhat tricky code. We have to add the blocksize - block offset
> + * because the first block may be a partial block and then for every subsequent
> + * block rw the buffer will be offset.*/
> + if(!((uintptr_t) (buf +(blocksize - blkoff)) & (dev->info.sector_size-1))) {
> + alignedbuf = 1;
> + }
> +
> + /* Setup aiocb block object */
> + aiocb.aio_dev = dev;
> + aiocb.aio_offset = blknum * blocksize;
> + aiocb.aio_cb = NULL;
> + aiocb.data = NULL;
> +
> + /* If our buffer is unaligned or its aligned but we will need to rw a partial block
> + * then a copy will have to be done */
> + if(!alignedbuf || blkoff != 0 || count % blocksize != 0) {
> + copybuf = _xmalloc(blocksize, dev->info.sector_size);
> + }
> +
> + rc = count;
> + while(count > 0) {
> + /* determine how many bytes to read/write from/to the current block buffer */
> + if(!alignedbuf || blkoff != 0 || count < blocksize) {
> + /* This is the case for unaligned R/W or partial block */
> + bytes = count < blocksize - blkoff ? count : blocksize - blkoff;
> + aiocb.aio_nbytes = blocksize;
> + } else {
> + /* We can optimize further if buffer is page aligned */
> + int not_page_aligned = 0;
> + if(((uintptr_t)buf) & (PAGE_SIZE -1)) {
> + not_page_aligned = 1;
> + }
> +
> + /* For an aligned R/W we can read up to the maximum transfer size */
> + bytes = count > (BLKIF_MAX_SEGMENTS_PER_REQUEST-not_page_aligned)*PAGE_SIZE
> + ? (BLKIF_MAX_SEGMENTS_PER_REQUEST-not_page_aligned)*PAGE_SIZE
> + : count & ~(blocksize -1);
> + aiocb.aio_nbytes = bytes;
> + }
> +
> + /* read operation */
> + if(!write) {
> + if (alignedbuf && bytes >= blocksize) {
> + /* If aligned and were reading a whole block, just read right into buf */
> + aiocb.aio_buf = buf;
> + blkfront_read(&aiocb);
> + } else {
> + /* If not then we have to do a copy */
> + aiocb.aio_buf = copybuf;
> + blkfront_read(&aiocb);
> + memcpy(buf, &copybuf[blkoff], bytes);
> + }
> + }
> + /* Write operation */
> + else {
> + if(alignedbuf && bytes >= blocksize) {
> + /* If aligned and were writing a whole block, just write directly from buf */
> + aiocb.aio_buf = buf;
> + blkfront_write(&aiocb);
> + } else {
> + /* If not then we have to do a copy. */
> + aiocb.aio_buf = copybuf;
> + /* If we're writing a partial block, we need to read the current contents first
> + * so we don't overwrite the extra bits with garbage */
> + if(blkoff != 0 || bytes < blocksize) {
> + blkfront_read(&aiocb);
> + }
> + memcpy(&copybuf[blkoff], buf, bytes);
> + blkfront_write(&aiocb);
> + }
> + }
> + /* Will start at beginning of all remaining blocks */
> + blkoff = 0;
> +
> + /* Increment counters and continue */
> + count -= bytes;
> + buf += bytes;
> + if(bytes < blocksize) {
> + //At minimum we read one block
> + aiocb.aio_offset += blocksize;
> + } else {
> + //If we read more than a block, was a multiple of blocksize
> + aiocb.aio_offset += bytes;
> + }
> + }
> +
> + free(copybuf);
> + files[fd].blk.offset += rc;
> + return rc;
> +
> +}
> +
> +int blkfront_posix_fstat(int fd, struct stat* buf)
> +{
> + struct blkfront_dev* dev = files[fd].blk.dev;
> +
> + buf->st_mode = dev->info.mode;
> + buf->st_uid = 0;
> + buf->st_gid = 0;
> + buf->st_size = dev->info.sectors * dev->info.sector_size;
> + buf->st_atime = buf->st_mtime = buf->st_ctime = time(NULL);
> +
> + return 0;
> +}
> #endif
> diff --git a/extras/mini-os/include/blkfront.h b/extras/mini-os/include/blkfront.h
> index 724137e..3528af9 100644
> --- a/extras/mini-os/include/blkfront.h
> +++ b/extras/mini-os/include/blkfront.h
> @@ -28,7 +28,17 @@ struct blkfront_info
> };
> struct blkfront_dev *init_blkfront(char *nodename, struct blkfront_info *info);
> #ifdef HAVE_LIBC
> +#include <sys/stat.h>
> +/* POSIX IO functions:
> + * use blkfront_open() to get a file descriptor to the block device
> + * Don't use the other blkfront posix functions here directly, instead use
> + * read(), write(), lseek() and fstat() on the file descriptor
> + */
> int blkfront_open(struct blkfront_dev *dev);
> +int blkfront_posix_rwop(int fd, uint8_t* buf, size_t count, int write);
> +#define blkfront_posix_write(fd, buf, count) blkfront_posix_rwop(fd, (uint8_t*)buf, count, 1)
> +#define blkfront_posix_read(fd, buf, count) blkfront_posix_rwop(fd, (uint8_t*)buf, count, 0)
> +int blkfront_posix_fstat(int fd, struct stat* buf);
> #endif
> void blkfront_aio(struct blkfront_aiocb *aiocbp, int write);
> #define blkfront_aio_read(aiocbp) blkfront_aio(aiocbp, 0)
> diff --git a/extras/mini-os/include/lib.h b/extras/mini-os/include/lib.h
> index 1af2717..d4641b6 100644
> --- a/extras/mini-os/include/lib.h
> +++ b/extras/mini-os/include/lib.h
> @@ -174,6 +174,7 @@ extern struct file {
> } tap;
> struct {
> struct blkfront_dev *dev;
> + off_t offset;
> } blk;
> struct {
> struct kbdfront_dev *dev;
> diff --git a/extras/mini-os/lib/sys.c b/extras/mini-os/lib/sys.c
> index a7d35d6..7ddbbf8 100644
> --- a/extras/mini-os/lib/sys.c
> +++ b/extras/mini-os/lib/sys.c
> @@ -289,6 +289,11 @@ int read(int fd, void *buf, size_t nbytes)
> return ret * sizeof(union xenfb_in_event);
> }
> #endif
> +#ifdef CONFIG_BLKFRONT
> + case FTYPE_BLK: {
> + return blkfront_posix_read(fd, buf, nbytes);
> + }
> +#endif
> default:
> break;
> }
> @@ -321,6 +326,10 @@ int write(int fd, const void *buf, size_t nbytes)
> netfront_xmit(files[fd].tap.dev, (void*) buf, nbytes);
> return nbytes;
> #endif
> +#ifdef CONFIG_BLKFRONT
> + case FTYPE_BLK:
> + return blkfront_posix_write(fd, buf, nbytes);
> +#endif
> default:
> break;
> }
> @@ -331,8 +340,37 @@ int write(int fd, const void *buf, size_t nbytes)
>
> off_t lseek(int fd, off_t offset, int whence)
> {
> - errno = ESPIPE;
> - return (off_t) -1;
> + switch(files[fd].type) {
> +#ifdef CONFIG_BLKFRONT
> + case FTYPE_BLK:
> + switch (whence) {
> + case SEEK_SET:
> + files[fd].file.offset = offset;
> + break;
> + case SEEK_CUR:
> + files[fd].file.offset += offset;
> + break;
> + case SEEK_END:
> + {
> + struct stat st;
> + int ret;
> + ret = fstat(fd, &st);
> + if (ret)
> + return -1;
> + files[fd].file.offset = st.st_size + offset;
> + break;
> + }
> + default:
> + errno = EINVAL;
> + return -1;
> + }
> + return files[fd].file.offset;
> + break;
> +#endif
> + default: /* Not implemented on this FTYPE */
> + errno = ESPIPE;
> + return (off_t) -1;
> + }
> }
>
> int fsync(int fd) {
> @@ -445,6 +483,10 @@ int fstat(int fd, struct stat *buf)
> buf->st_ctime = time(NULL);
> return 0;
> }
> +#ifdef CONFIG_BLKFRONT
> + case FTYPE_BLK:
> + return blkfront_posix_fstat(fd, buf);
> +#endif
> default:
> break;
> }
> --
> 1.7.4.4
>

--
Samuel
PS> Salut ! J'ai un sujet de philo à vous soumettre : "Suffit-il
PS> d'observer pour connaître" Idées + plan Merçi
Oui, ya qu'a t'observer pour connaître le fait que tu es une feignasse.
-+- FF in: Guide du Neuneu d'Usenet - Neuneu fait de la philo -+-

_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel
Re: [PATCH vtpm v3 02/12] add posix io for blkfront [ In reply to ]
On Mon, 2012-10-08 at 23:35 +0100, Samuel Thibault wrote:
> Matthew Fioravante, le Mon 08 Oct 2012 15:06:42 -0400, a écrit :
> > This patch adds posix io support (read,write,lseek) to block devices
> > using blkfront.
> >
> > Signed-off-by: Matthew Fioravante <matthew.fioravante@jhuapl.edu>
> > Acked-by: Samuel Thibault <samuel.thibault@ens-lyons.org>
>
> Confirmed on this version.

And applied, thanks.



_______________________________________________
Xen-devel mailing list
Xen-devel@lists.xen.org
http://lists.xen.org/xen-devel