[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

Re: [Nbd] [PATCH] nbd: Support FUA, FLUSH and ROTATIONAL



Finally, I've gotten around to this...my comments below...

On Sun, May 22, 2011 at 2:18 PM, Alex Bligh <alex@...872...> wrote:

> This patch implements FUA (Force Unit Access) and FLUSH commands
> and support for rotational devices for network block devices. They have
> been tested and appear to work as specified.

> @@ -240,9 +243,18 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
>        unsigned long size = blk_rq_bytes(req);
>
>        request.magic = htonl(NBD_REQUEST_MAGIC);
> -       request.type = htonl(nbd_cmd(req));
> -       request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
> -       request.len = htonl(size);
> +       /* If FUA is set in the request, and we are told to send FUA, then OR in NBD_CMD_FLAG_FUA */
> +       request.type = htonl(nbd_cmd(req) |
> +                            (( (req->cmd_flags & REQ_FUA) && (lo->flags & NBD_FLAG_SEND_FUA)) ?
> +                             NBD_CMD_FLAG_FUA : 0));
> +       /* Send from & len as zero on FLUSH - other values reserved per protocol */
> +       if (nbd_cmd(req) == NBD_CMD_FLUSH) {
> +               request.from = 0;
> +               request.len = 0;

I don't believe we can assume that a FLUSH always has zero length. At
least in the block layer sources it says it is valid to have a FLUSH
and/or FUA with a single bio attached. I think nbd needs to handle
that.

> +       } else {
> +               request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
> +               request.len = htonl(size);
> +       }
>        memcpy(request.handle, &req, sizeof(req));
>
>        dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%uB)\n",
> @@ -459,13 +471,34 @@ static void nbd_handle_req(struct nbd_device *lo, struct request *req)
>        nbd_cmd(req) = NBD_CMD_READ;
>        if (rq_data_dir(req) == WRITE) {
>                nbd_cmd(req) = NBD_CMD_WRITE;
> -               if (lo->flags & NBD_READ_ONLY) {
> +               if (lo->flags & NBD_FLAG_READ_ONLY) {
>                        printk(KERN_ERR "%s: Write on read-only\n",
>                                        lo->disk->disk_name);
>                        goto error_out;
>                }
>        }
>
> +       if (req->cmd_flags & REQ_FLUSH) {
> +               if (unlikely(blk_rq_sectors(req))) {
> +                       /* Elevator is meant to guarantee that a request with REQ_FLUSH
> +                        * set is broken into an empty request with REQ_FLUSH set then
> +                        * the rest of the content (if any). If this doesn't happen,
> +                        * whinge, then proceed to do the content without a flush.
> +                        */
> +                       printk(KERN_ERR "%s: nbd passed non-empty flush request\n",
> +                              lo->disk->disk_name);
> +
> +               } else {
> +                       if (lo->flags & NBD_FLAG_SEND_FLUSH)
> +                               nbd_cmd(req) = NBD_CMD_FLUSH;
> +                       else {
> +                               /* Ignore flush that we don't need */
> +                               nbd_end_request(req);
> +                               return;
> +                       }
> +               }
> +       }
> +
>        req->errors = 0;
>
>        mutex_lock(&lo->tx_lock);
> @@ -642,6 +675,18 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *lo,
>                lo->xmit_timeout = arg * HZ;
>                return 0;
>
> +       case NBD_SET_FLAGS:
> +               lo->flags = arg;
> +               if (lo->disk)
> +               {
> +                       if (lo->flags & NBD_FLAG_ROTATIONAL)
> +                               queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, lo->disk->queue);
> +                       else
> +                               queue_flag_set_unlocked(QUEUE_FLAG_NONROT, lo->disk->queue);
> +
> +               }
> +               return 0;
> +

rotational can already be toggled by the block layer sysfs entries...I
don't know that we need an nbd-specific way to do this...

>        case NBD_SET_SIZE_BLOCKS:
>                lo->bytesize = ((u64) arg) * lo->blksize;
>                bdev->bd_inode->i_size = lo->bytesize;
> @@ -775,6 +820,11 @@ static int __init nbd_init(void)
>                        put_disk(disk);
>                        goto out;
>                }
> +               /* In order not to confuse the elevator, say we always
> +                * want FLUSH and FUA. We won't send them to the server
> +                * unless the relevant flag bit is set
> +                */
> +               blk_queue_flush(disk->queue, REQ_FLUSH | REQ_FUA);

Not sure this is kosher either. If you say that you implement FUA at
least, you have to honor the FUA requests. Otherwise, you're
invalidating the assumptions that filesystems make about data
consistency. I think we could eliminate the FUA handling bit, since
we're not really doing FUA (the nbd-server is just doing a postflush).
It looks like we basically get the same behavior for less work if we
just don't say that nbd handles FUA and let the block layer convert
the FUA to a postflush. If you turn off FUA do you get 2 flushes
instead?


>                /*
>                 * Tell the block layer that we are not a rotational device
>                 */
> diff --git a/include/linux/nbh.c b/include/linux/nbd.h
> index d146ca1..5cf18a0 100644
> --- a/include/linux/nbd.h
> +++ b/include/linux/nbd.h
> @@ -27,13 +27,25 @@
>  #define NBD_SET_SIZE_BLOCKS    _IO( 0xab, 7 )
>  #define NBD_DISCONNECT  _IO( 0xab, 8 )
>  #define NBD_SET_TIMEOUT _IO( 0xab, 9 )
> +#define NBD_SET_FLAGS _IO( 0xab, 10 )
>
>  enum {
>        NBD_CMD_READ = 0,
>        NBD_CMD_WRITE = 1,
> -       NBD_CMD_DISC = 2
> +       NBD_CMD_DISC = 2,
> +       NBD_CMD_FLUSH = 3
>  };
>
> +#define NBD_CMD_MASK_COMMAND 0x0000ffff
> +#define NBD_CMD_FLAG_FUA (1<<16)
> +
> +/* values for flags field */
> +#define NBD_FLAG_HAS_FLAGS     (1 << 0)        /* Flags are there */
> +#define NBD_FLAG_READ_ONLY     (1 << 1)        /* Device is read-only */
> +#define NBD_FLAG_SEND_FLUSH    (1 << 2)        /* Send FLUSH */
> +#define NBD_FLAG_SEND_FUA      (1 << 3)        /* Send FUA (Force Unit Access) */
> +#define NBD_FLAG_ROTATIONAL    (1 << 4)        /* Use elevator algorithm - rotational media */
> +
>  #define nbd_cmd(req) ((req)->cmd[0])
>
>  /* userspace doesn't need the nbd_device structure */
> @@ -42,10 +54,6 @@ enum {
>  #include <linux/wait.h>
>  #include <linux/mutex.h>
>
> -/* values for flags field */
> -#define NBD_READ_ONLY 0x0001
> -#define NBD_WRITE_NOCHK 0x0002
> -
>  struct request;
>
>  struct nbd_device {
>

The flag changes look good. I'll go ahead and start testing with those.

One question to the general audience: do we need an ioctl for the flag
setting or would a sysfs entry be sufficient? I really would like to
move away from having so many ioctls in nbd. I've started converting
some debug and ioctl stuff to sysfs already...

Thanks,
Paul



Reply to: