Mailing List Archive

[RFC] Removing b_end_io
On my laptop, I have about 31MB allocated to buffer_heads.

buffer_head 182728 299910 104 39 1 : tunables 0 0 0 : slabdata 7690 7690 0

Reducing the size of the buffer_head by 8 bytes gets us to 96 bytes,
which means we get 42 per page instead of 39 and saves me 2MB of memory.

I think b_end_io() is ripe for removal. It's only used while the I/O
is in progress, and it's always set to end_bio_bh_io_sync() which
may set the quiet bit, calls ->b_end_io and calls bio_put().

So how about this as an approach? Only another 40 or so call-sites
to take care of to eliminate b_end_io from the buffer_head. Yes, this
particular example should be entirely rewritten to do away with buffer
heads, but that's been true since 2006. I'm looking for an approach
which can be implemented quickly since the buffer_head does not appear
to be going away any time soon.

diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index d61b524ae440..7f985b138372 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -314,14 +314,16 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait)
md_bitmap_file_kick(bitmap);
}

-static void end_bitmap_write(struct buffer_head *bh, int uptodate)
+static void end_bitmap_write(struct bio *bio)
{
+ struct buffer_head *bh = bio->bi_private;
struct bitmap *bitmap = bh->b_private;

- if (!uptodate)
+ if (bio->bi_status != BLK_STS_OK)
set_bit(BITMAP_WRITE_ERROR, &bitmap->flags);
if (atomic_dec_and_test(&bitmap->pending_writes))
wake_up(&bitmap->write_wait);
+ bio_put(bio);
}

static void free_buffers(struct page *page)
@@ -388,12 +390,11 @@ static int read_page(struct file *file, unsigned long index,
else
count -= (1<<inode->i_blkbits);

- bh->b_end_io = end_bitmap_write;
bh->b_private = bitmap;
atomic_inc(&bitmap->pending_writes);
set_buffer_locked(bh);
set_buffer_mapped(bh);
- submit_bh(REQ_OP_READ, 0, bh);
+ bh_submit(bh, REQ_OP_READ, end_bitmap_write);
}
blk_cur++;
bh = bh->b_this_page;
diff --git a/fs/buffer.c b/fs/buffer.c
index d468ed9981e0..668c586b2ed1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -3022,8 +3022,9 @@ static void end_bio_bh_io_sync(struct bio *bio)
bio_put(bio);
}

-static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
- enum rw_hint write_hint, struct writeback_control *wbc)
+static void __bh_submit(struct buffer_head *bh, unsigned req_flags,
+ enum rw_hint write_hint, struct writeback_control *wbc,
+ bio_end_io_t end_bio)
{
struct bio *bio;

@@ -3036,7 +3037,7 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
/*
* Only clear out a write error when rewriting
*/
- if (test_set_buffer_req(bh) && (op == REQ_OP_WRITE))
+ if (test_set_buffer_req(bh) && (op_is_write(req_flags)))
clear_buffer_write_io_error(bh);

bio = bio_alloc(GFP_NOIO, 1);
@@ -3050,14 +3051,14 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
BUG_ON(bio->bi_iter.bi_size != bh->b_size);

- bio->bi_end_io = end_bio_bh_io_sync;
+ bio->bi_end_io = end_bio;
bio->bi_private = bh;

if (buffer_meta(bh))
- op_flags |= REQ_META;
+ req_flags |= REQ_META;
if (buffer_prio(bh))
- op_flags |= REQ_PRIO;
- bio_set_op_attrs(bio, op, op_flags);
+ req_flags |= REQ_PRIO;
+ bio->bi_opf = req_flags;

/* Take care of bh's that straddle the end of the device */
guard_bio_eod(bio);
@@ -3068,6 +3069,12 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
}

submit_bio(bio);
+}
+
+static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
+ enum rw_hint write_hint, struct writeback_control *wbc)
+{
+ __bh_submit(bh, op | op_flags, write_hint, wbc, end_bio_bh_io_sync);
return 0;
}

@@ -3077,6 +3084,11 @@ int submit_bh(int op, int op_flags, struct buffer_head *bh)
}
EXPORT_SYMBOL(submit_bh);

+void bh_submit(struct buffer_head *bh, unsigned req_flags, bio_end_io_t end_io)
+{
+ __bh_submit(bh, req_flags, 0, NULL, end_io);
+}
+
/**
* ll_rw_block: low-level access to block devices (DEPRECATED)
* @op: whether to %READ or %WRITE
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 6b47f94378c5..cc9113befe15 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -203,6 +203,7 @@ int sync_dirty_buffer(struct buffer_head *bh);
int __sync_dirty_buffer(struct buffer_head *bh, int op_flags);
void write_dirty_buffer(struct buffer_head *bh, int op_flags);
int submit_bh(int, int, struct buffer_head *);
+void bh_submit(struct buffer_head *, unsigned req_flags, bio_end_io_t);
void write_boundary_block(struct block_device *bdev,
sector_t bblock, unsigned blocksize);
int bh_uptodate_or_lock(struct buffer_head *bh);
Re: [RFC] Removing b_end_io [ In reply to ]
On Sun, Oct 25, 2020 at 04:44:38AM +0000, Matthew Wilcox wrote:
> @@ -3068,6 +3069,12 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
> }
>
> submit_bio(bio);
> +}
> +
> +static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
> + enum rw_hint write_hint, struct writeback_control *wbc)
> +{
> + __bh_submit(bh, op | op_flags, write_hint, wbc, end_bio_bh_io_sync);
> return 0;
> }
>

I believe this will break use cases where the file system sets
bh->b_end_io and then calls submit_bh(), which then calls
submit_bh_wbc(). That's because with this change, calls to
submit_bh_wbc() --- include submit_bh() --- ignores bh->b_end_io and
results in end_bio_bh_io_sync getting used.

Filesystems that do this includes fs/ntfs, fs/resiserfs.

In this case, that can probably be fixed by changing submit_bh() to
pass in bh->b_end_io, or switching those users to use the new
bh_submit() function to prevent these breakages.

- Ted
Re: [RFC] Removing b_end_io [ In reply to ]
On Sun, Oct 25, 2020 at 11:56:52AM -0400, Theodore Y. Ts'o wrote:
> On Sun, Oct 25, 2020 at 04:44:38AM +0000, Matthew Wilcox wrote:
> > @@ -3068,6 +3069,12 @@ static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
> > }
> >
> > submit_bio(bio);
> > +}
> > +
> > +static int submit_bh_wbc(int op, int op_flags, struct buffer_head *bh,
> > + enum rw_hint write_hint, struct writeback_control *wbc)
> > +{
> > + __bh_submit(bh, op | op_flags, write_hint, wbc, end_bio_bh_io_sync);
> > return 0;
> > }
> >
>
> I believe this will break use cases where the file system sets
> bh->b_end_io and then calls submit_bh(), which then calls
> submit_bh_wbc(). That's because with this change, calls to
> submit_bh_wbc() --- include submit_bh() --- ignores bh->b_end_io and
> results in end_bio_bh_io_sync getting used.

I think you're confused between the two end_ios. The final argument
to bh_submit() and __bh_submit() is a bio_end_io_t. end_bio_bh_io_sync()
calls bh->b_end_io.
Re: [RFC] Removing b_end_io [ In reply to ]
On Sun, Oct 25, 2020 at 04:44:38AM +0000, Matthew Wilcox wrote:
>
> On my laptop, I have about 31MB allocated to buffer_heads.
>
> buffer_head 182728 299910 104 39 1 : tunables 0 0 0 : slabdata 7690 7690 0
>
> Reducing the size of the buffer_head by 8 bytes gets us to 96 bytes,
> which means we get 42 per page instead of 39 and saves me 2MB of memory.
>
> I think b_end_io() is ripe for removal. It's only used while the I/O
> is in progress, and it's always set to end_bio_bh_io_sync() which
> may set the quiet bit, calls ->b_end_io and calls bio_put().
>
> So how about this as an approach? Only another 40 or so call-sites
> to take care of to eliminate b_end_io from the buffer_head. Yes, this
> particular example should be entirely rewritten to do away with buffer
> heads, but that's been true since 2006. I'm looking for an approach
> which can be implemented quickly since the buffer_head does not appear
> to be going away any time soon.

I think this looks pretty reasonable.