Mailing List Archive

[PATCH v3] zram: Introduce an aged idle interface
This change introduces an aged idle interface to the existing
idle sysfs file for zram.

When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
now also accepts an integer argument. This integer is the
age (in seconds) of pages to mark as idle. The idle file
still supports 'all' as it always has. This new approach
allows for much more control over which pages get marked
as idle.

v2 -> v3:
- Correct unused variable warning when
CONFIG_ZRAM_MEMORY_TRACKING is not enabled.
v1 -> v2:
- Switch to using existing idle file.
- Dont compare ktime directly.

Signed-off-by: Brian Geffon <bgeffon@google.com>
---
Documentation/admin-guide/blockdev/zram.rst | 8 +++
drivers/block/zram/zram_drv.c | 60 +++++++++++++++------
2 files changed, 52 insertions(+), 16 deletions(-)

diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index 700329d25f57..8c8a92e5c00c 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -328,6 +328,14 @@ as idle::
From now on, any pages on zram are idle pages. The idle mark
will be removed until someone requests access of the block.
IOW, unless there is access request, those pages are still idle pages.
+Additionally, when CONFIG_ZRAM_MEMORY_TRACKING is enabled pages can be
+marked as idle based on how long (in seconds) it's been since they were
+last accessed, in seconds::
+
+ echo 86400 > /sys/block/zramX/idle
+
+In this example all pages which haven't been accessed in more than 86400
+seconds (one day) will be marked idle.

Admin can request writeback of those idle pages at right timing via::

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index fcaf2750f68f..2af5cdb8da1a 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
return len;
}

-static ssize_t idle_store(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t len)
+/*
+ * Mark all pages which are older than or equal to cutoff as IDLE.
+ * Callers should hold the zram init lock in read mode
+ **/
+static void mark_idle(struct zram *zram, ktime_t cutoff)
{
- struct zram *zram = dev_to_zram(dev);
+ int is_idle = 1;
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
int index;

- if (!sysfs_streq(buf, "all"))
- return -EINVAL;
-
- down_read(&zram->init_lock);
- if (!init_done(zram)) {
- up_read(&zram->init_lock);
- return -EINVAL;
- }
-
for (index = 0; index < nr_pages; index++) {
/*
* Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
@@ -314,14 +308,48 @@ static ssize_t idle_store(struct device *dev,
*/
zram_slot_lock(zram, index);
if (zram_allocated(zram, index) &&
- !zram_test_flag(zram, index, ZRAM_UNDER_WB))
- zram_set_flag(zram, index, ZRAM_IDLE);
+ !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));
+#endif
+ if (is_idle)
+ zram_set_flag(zram, index, ZRAM_IDLE);
+ }
zram_slot_unlock(zram, index);
}
+}

- up_read(&zram->init_lock);
+static ssize_t idle_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct zram *zram = dev_to_zram(dev);
+ ktime_t cutoff_time = 0;
+ ssize_t rv = -EINVAL;

- return len;
+ if (!sysfs_streq(buf, "all")) {
+#ifdef CONFIG_ZRAM_MEMORY_TRACKING
+ u64 age_sec;
+ /* If it did not parse as 'all' try to treat it as an integer */
+ if (!kstrtoull(buf, 10, &age_sec))
+ cutoff_time = ktime_sub(ktime_get_boottime(),
+ ns_to_ktime(age_sec * NSEC_PER_SEC));
+ else
+#endif
+ goto out;
+ }
+
+ down_read(&zram->init_lock);
+ if (!init_done(zram))
+ goto out_unlock;
+
+ /* A age_sec of 0 marks everything as idle, this is the "all" behavior */
+ mark_idle(zram, cutoff_time);
+ rv = len;
+
+out_unlock:
+ up_read(&zram->init_lock);
+out:
+ return rv;
}

#ifdef CONFIG_ZRAM_WRITEBACK
--
2.33.0.464.g1972c5931b-goog
Re: [PATCH v3] zram: Introduce an aged idle interface [ In reply to ]
On Tue, Sep 21, 2021 at 12:43:36PM -0700, Brian Geffon wrote:
> This change introduces an aged idle interface to the existing
> idle sysfs file for zram.
>
> When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
> now also accepts an integer argument. This integer is the
> age (in seconds) of pages to mark as idle. The idle file
> still supports 'all' as it always has. This new approach
> allows for much more control over which pages get marked
> as idle.
>
> v2 -> v3:
> - Correct unused variable warning when
> CONFIG_ZRAM_MEMORY_TRACKING is not enabled.
> v1 -> v2:
> - Switch to using existing idle file.
> - Dont compare ktime directly.
>
> Signed-off-by: Brian Geffon <bgeffon@google.com>
> ---
> Documentation/admin-guide/blockdev/zram.rst | 8 +++
> drivers/block/zram/zram_drv.c | 60 +++++++++++++++------
> 2 files changed, 52 insertions(+), 16 deletions(-)
>
> diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
> index 700329d25f57..8c8a92e5c00c 100644
> --- a/Documentation/admin-guide/blockdev/zram.rst
> +++ b/Documentation/admin-guide/blockdev/zram.rst
> @@ -328,6 +328,14 @@ as idle::
> From now on, any pages on zram are idle pages. The idle mark
> will be removed until someone requests access of the block.
> IOW, unless there is access request, those pages are still idle pages.
> +Additionally, when CONFIG_ZRAM_MEMORY_TRACKING is enabled pages can be
> +marked as idle based on how long (in seconds) it's been since they were
> +last accessed, in seconds::
> +
> + echo 86400 > /sys/block/zramX/idle
> +
> +In this example all pages which haven't been accessed in more than 86400
> +seconds (one day) will be marked idle.
>
> Admin can request writeback of those idle pages at right timing via::
>
> diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> index fcaf2750f68f..2af5cdb8da1a 100644
> --- a/drivers/block/zram/zram_drv.c
> +++ b/drivers/block/zram/zram_drv.c
> @@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
> return len;
> }
>
> -static ssize_t idle_store(struct device *dev,
> - struct device_attribute *attr, const char *buf, size_t len)
> +/*
> + * Mark all pages which are older than or equal to cutoff as IDLE.
> + * Callers should hold the zram init lock in read mode
> + **/
> +static void mark_idle(struct zram *zram, ktime_t cutoff)
> {
> - struct zram *zram = dev_to_zram(dev);
> + int is_idle = 1;
> unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
> int index;
>
> - if (!sysfs_streq(buf, "all"))
> - return -EINVAL;
> -
> - down_read(&zram->init_lock);
> - if (!init_done(zram)) {
> - up_read(&zram->init_lock);
> - return -EINVAL;
> - }
> -
> for (index = 0; index < nr_pages; index++) {
> /*
> * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
> @@ -314,14 +308,48 @@ static ssize_t idle_store(struct device *dev,
> */
> zram_slot_lock(zram, index);
> if (zram_allocated(zram, index) &&
> - !zram_test_flag(zram, index, ZRAM_UNDER_WB))
> - zram_set_flag(zram, index, ZRAM_IDLE);
> + !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
> +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> + is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));
> +#endif
> + if (is_idle)
> + zram_set_flag(zram, index, ZRAM_IDLE);
> + }
> zram_slot_unlock(zram, index);
> }
> +}
>
> - up_read(&zram->init_lock);
> +static ssize_t idle_store(struct device *dev,
> + struct device_attribute *attr, const char *buf, size_t len)
> +{
> + struct zram *zram = dev_to_zram(dev);
> + ktime_t cutoff_time = 0;
> + ssize_t rv = -EINVAL;
>
> - return len;
> + if (!sysfs_streq(buf, "all")) {
> +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> + u64 age_sec;
> + /* If it did not parse as 'all' try to treat it as an integer */
> + if (!kstrtoull(buf, 10, &age_sec))

nit:
Do we need such limit base which work with only 10 base?
Passing 0 would give more flexibility.

Otherwise, looks good to me.

Thanks, Brian.

> + cutoff_time = ktime_sub(ktime_get_boottime(),
> + ns_to_ktime(age_sec * NSEC_PER_SEC));
> + else
> +#endif
> + goto out;
> + }
> +
> + down_read(&zram->init_lock);
> + if (!init_done(zram))
> + goto out_unlock;
> +
> + /* A age_sec of 0 marks everything as idle, this is the "all" behavior */
> + mark_idle(zram, cutoff_time);
> + rv = len;
> +
> +out_unlock:
> + up_read(&zram->init_lock);
> +out:
> + return rv;
> }
>
> #ifdef CONFIG_ZRAM_WRITEBACK
> --
> 2.33.0.464.g1972c5931b-goog
>
Re: [PATCH v3] zram: Introduce an aged idle interface [ In reply to ]
Hi Minchan,
Thank you for taking a look. I'm happy to make that change, but I
personally cannot see why userspace would want to do something like
idle pages older than "0x3C seconds" or "0o250600 seconds," it just
seems like a strange way to represent seconds. What do you think?

Brian

On Wed, Sep 22, 2021 at 8:09 PM Minchan Kim <minchan@kernel.org> wrote:
>
> On Tue, Sep 21, 2021 at 12:43:36PM -0700, Brian Geffon wrote:
> > This change introduces an aged idle interface to the existing
> > idle sysfs file for zram.
> >
> > When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
> > now also accepts an integer argument. This integer is the
> > age (in seconds) of pages to mark as idle. The idle file
> > still supports 'all' as it always has. This new approach
> > allows for much more control over which pages get marked
> > as idle.
> >
> > v2 -> v3:
> > - Correct unused variable warning when
> > CONFIG_ZRAM_MEMORY_TRACKING is not enabled.
> > v1 -> v2:
> > - Switch to using existing idle file.
> > - Dont compare ktime directly.
> >
> > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > ---
> > Documentation/admin-guide/blockdev/zram.rst | 8 +++
> > drivers/block/zram/zram_drv.c | 60 +++++++++++++++------
> > 2 files changed, 52 insertions(+), 16 deletions(-)
> >
> > diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
> > index 700329d25f57..8c8a92e5c00c 100644
> > --- a/Documentation/admin-guide/blockdev/zram.rst
> > +++ b/Documentation/admin-guide/blockdev/zram.rst
> > @@ -328,6 +328,14 @@ as idle::
> > From now on, any pages on zram are idle pages. The idle mark
> > will be removed until someone requests access of the block.
> > IOW, unless there is access request, those pages are still idle pages.
> > +Additionally, when CONFIG_ZRAM_MEMORY_TRACKING is enabled pages can be
> > +marked as idle based on how long (in seconds) it's been since they were
> > +last accessed, in seconds::
> > +
> > + echo 86400 > /sys/block/zramX/idle
> > +
> > +In this example all pages which haven't been accessed in more than 86400
> > +seconds (one day) will be marked idle.
> >
> > Admin can request writeback of those idle pages at right timing via::
> >
> > diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> > index fcaf2750f68f..2af5cdb8da1a 100644
> > --- a/drivers/block/zram/zram_drv.c
> > +++ b/drivers/block/zram/zram_drv.c
> > @@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
> > return len;
> > }
> >
> > -static ssize_t idle_store(struct device *dev,
> > - struct device_attribute *attr, const char *buf, size_t len)
> > +/*
> > + * Mark all pages which are older than or equal to cutoff as IDLE.
> > + * Callers should hold the zram init lock in read mode
> > + **/
> > +static void mark_idle(struct zram *zram, ktime_t cutoff)
> > {
> > - struct zram *zram = dev_to_zram(dev);
> > + int is_idle = 1;
> > unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
> > int index;
> >
> > - if (!sysfs_streq(buf, "all"))
> > - return -EINVAL;
> > -
> > - down_read(&zram->init_lock);
> > - if (!init_done(zram)) {
> > - up_read(&zram->init_lock);
> > - return -EINVAL;
> > - }
> > -
> > for (index = 0; index < nr_pages; index++) {
> > /*
> > * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
> > @@ -314,14 +308,48 @@ static ssize_t idle_store(struct device *dev,
> > */
> > zram_slot_lock(zram, index);
> > if (zram_allocated(zram, index) &&
> > - !zram_test_flag(zram, index, ZRAM_UNDER_WB))
> > - zram_set_flag(zram, index, ZRAM_IDLE);
> > + !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
> > +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> > + is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));
> > +#endif
> > + if (is_idle)
> > + zram_set_flag(zram, index, ZRAM_IDLE);
> > + }
> > zram_slot_unlock(zram, index);
> > }
> > +}
> >
> > - up_read(&zram->init_lock);
> > +static ssize_t idle_store(struct device *dev,
> > + struct device_attribute *attr, const char *buf, size_t len)
> > +{
> > + struct zram *zram = dev_to_zram(dev);
> > + ktime_t cutoff_time = 0;
> > + ssize_t rv = -EINVAL;
> >
> > - return len;
> > + if (!sysfs_streq(buf, "all")) {
> > +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> > + u64 age_sec;
> > + /* If it did not parse as 'all' try to treat it as an integer */
> > + if (!kstrtoull(buf, 10, &age_sec))
>
> nit:
> Do we need such limit base which work with only 10 base?
> Passing 0 would give more flexibility.
>
> Otherwise, looks good to me.
>
> Thanks, Brian.
>
> > + cutoff_time = ktime_sub(ktime_get_boottime(),
> > + ns_to_ktime(age_sec * NSEC_PER_SEC));
> > + else
> > +#endif
> > + goto out;
> > + }
> > +
> > + down_read(&zram->init_lock);
> > + if (!init_done(zram))
> > + goto out_unlock;
> > +
> > + /* A age_sec of 0 marks everything as idle, this is the "all" behavior */
> > + mark_idle(zram, cutoff_time);
> > + rv = len;
> > +
> > +out_unlock:
> > + up_read(&zram->init_lock);
> > +out:
> > + return rv;
> > }
> >
> > #ifdef CONFIG_ZRAM_WRITEBACK
> > --
> > 2.33.0.464.g1972c5931b-goog
> >
Re: [PATCH v3] zram: Introduce an aged idle interface [ In reply to ]
Hey Brian,

On Wed, Sep 22, 2021 at 08:42:44PM -0400, Brian Geffon wrote:
> Hi Minchan,
> Thank you for taking a look. I'm happy to make that change, but I
> personally cannot see why userspace would want to do something like
> idle pages older than "0x3C seconds" or "0o250600 seconds," it just
> seems like a strange way to represent seconds. What do you think?

Kernel communty loves inline reply instead of top posting. ;-)

I am not strong opinion about mutiple base support. The question
just started from "what's the benefit with only 10-base support?"
if we can support multiple bases with almost zero maintainace
overhead.

>
> Brian
>
> On Wed, Sep 22, 2021 at 8:09 PM Minchan Kim <minchan@kernel.org> wrote:
> >
> > On Tue, Sep 21, 2021 at 12:43:36PM -0700, Brian Geffon wrote:
> > > This change introduces an aged idle interface to the existing
> > > idle sysfs file for zram.
> > >
> > > When CONFIG_ZRAM_MEMORY_TRACKING is enabled the idle file
> > > now also accepts an integer argument. This integer is the
> > > age (in seconds) of pages to mark as idle. The idle file
> > > still supports 'all' as it always has. This new approach
> > > allows for much more control over which pages get marked
> > > as idle.
> > >
> > > v2 -> v3:
> > > - Correct unused variable warning when
> > > CONFIG_ZRAM_MEMORY_TRACKING is not enabled.
> > > v1 -> v2:
> > > - Switch to using existing idle file.
> > > - Dont compare ktime directly.
> > >
> > > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > > ---
> > > Documentation/admin-guide/blockdev/zram.rst | 8 +++
> > > drivers/block/zram/zram_drv.c | 60 +++++++++++++++------
> > > 2 files changed, 52 insertions(+), 16 deletions(-)
> > >
> > > diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
> > > index 700329d25f57..8c8a92e5c00c 100644
> > > --- a/Documentation/admin-guide/blockdev/zram.rst
> > > +++ b/Documentation/admin-guide/blockdev/zram.rst
> > > @@ -328,6 +328,14 @@ as idle::
> > > From now on, any pages on zram are idle pages. The idle mark
> > > will be removed until someone requests access of the block.
> > > IOW, unless there is access request, those pages are still idle pages.
> > > +Additionally, when CONFIG_ZRAM_MEMORY_TRACKING is enabled pages can be
> > > +marked as idle based on how long (in seconds) it's been since they were
> > > +last accessed, in seconds::
> > > +
> > > + echo 86400 > /sys/block/zramX/idle
> > > +
> > > +In this example all pages which haven't been accessed in more than 86400
> > > +seconds (one day) will be marked idle.
> > >
> > > Admin can request writeback of those idle pages at right timing via::
> > >
> > > diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
> > > index fcaf2750f68f..2af5cdb8da1a 100644
> > > --- a/drivers/block/zram/zram_drv.c
> > > +++ b/drivers/block/zram/zram_drv.c
> > > @@ -291,22 +291,16 @@ static ssize_t mem_used_max_store(struct device *dev,
> > > return len;
> > > }
> > >
> > > -static ssize_t idle_store(struct device *dev,
> > > - struct device_attribute *attr, const char *buf, size_t len)
> > > +/*
> > > + * Mark all pages which are older than or equal to cutoff as IDLE.
> > > + * Callers should hold the zram init lock in read mode
> > > + **/
> > > +static void mark_idle(struct zram *zram, ktime_t cutoff)
> > > {
> > > - struct zram *zram = dev_to_zram(dev);
> > > + int is_idle = 1;
> > > unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
> > > int index;
> > >
> > > - if (!sysfs_streq(buf, "all"))
> > > - return -EINVAL;
> > > -
> > > - down_read(&zram->init_lock);
> > > - if (!init_done(zram)) {
> > > - up_read(&zram->init_lock);
> > > - return -EINVAL;
> > > - }
> > > -
> > > for (index = 0; index < nr_pages; index++) {
> > > /*
> > > * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
> > > @@ -314,14 +308,48 @@ static ssize_t idle_store(struct device *dev,
> > > */
> > > zram_slot_lock(zram, index);
> > > if (zram_allocated(zram, index) &&
> > > - !zram_test_flag(zram, index, ZRAM_UNDER_WB))
> > > - zram_set_flag(zram, index, ZRAM_IDLE);
> > > + !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
> > > +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> > > + is_idle = (!cutoff || ktime_after(cutoff, zram->table[index].ac_time));
> > > +#endif
> > > + if (is_idle)
> > > + zram_set_flag(zram, index, ZRAM_IDLE);
> > > + }
> > > zram_slot_unlock(zram, index);
> > > }
> > > +}
> > >
> > > - up_read(&zram->init_lock);
> > > +static ssize_t idle_store(struct device *dev,
> > > + struct device_attribute *attr, const char *buf, size_t len)
> > > +{
> > > + struct zram *zram = dev_to_zram(dev);
> > > + ktime_t cutoff_time = 0;
> > > + ssize_t rv = -EINVAL;
> > >
> > > - return len;
> > > + if (!sysfs_streq(buf, "all")) {
> > > +#ifdef CONFIG_ZRAM_MEMORY_TRACKING
> > > + u64 age_sec;
> > > + /* If it did not parse as 'all' try to treat it as an integer */
> > > + if (!kstrtoull(buf, 10, &age_sec))
> >
> > nit:
> > Do we need such limit base which work with only 10 base?
> > Passing 0 would give more flexibility.
> >
> > Otherwise, looks good to me.
> >
> > Thanks, Brian.
> >
> > > + cutoff_time = ktime_sub(ktime_get_boottime(),
> > > + ns_to_ktime(age_sec * NSEC_PER_SEC));
> > > + else
> > > +#endif
> > > + goto out;
> > > + }
> > > +
> > > + down_read(&zram->init_lock);
> > > + if (!init_done(zram))
> > > + goto out_unlock;
> > > +
> > > + /* A age_sec of 0 marks everything as idle, this is the "all" behavior */
> > > + mark_idle(zram, cutoff_time);
> > > + rv = len;
> > > +
> > > +out_unlock:
> > > + up_read(&zram->init_lock);
> > > +out:
> > > + return rv;
> > > }
> > >
> > > #ifdef CONFIG_ZRAM_WRITEBACK
> > > --
> > > 2.33.0.464.g1972c5931b-goog
> > >