Mailing List Archive

[XEN][RFC PATCH v3 10/14] xen/arm: Implement device tree node removal functionalities
Introduce sysctl XEN_SYSCTL_dt_overlay to remove device-tree nodes added using
device tree overlay.

xl overlay remove file.dtbo:
Removes all the nodes in a given dtbo.
First, removes IRQ permissions and MMIO accesses. Next, it finds the nodes
in dt_host and delete the device node entries from dt_host.

The nodes get removed only if it is not used by any of dom0 or domio.

Also, added overlay_track struct to keep the track of added node through device
tree overlay. overlay_track has dt_host_new which is unflattened form of updated
fdt and name of overlay nodes. When a node is removed, we also free the memory
used by overlay_track for the particular overlay node.

Signed-off-by: Vikram Garhwal <fnu.vikram@xilinx.com>
---
xen/common/Makefile | 1 +
xen/common/dt_overlay.c | 447 +++++++++++++++++++++++++++++++++++
xen/common/sysctl.c | 10 +
xen/include/public/sysctl.h | 18 ++
xen/include/xen/dt_overlay.h | 47 ++++
5 files changed, 523 insertions(+)
create mode 100644 xen/common/dt_overlay.c
create mode 100644 xen/include/xen/dt_overlay.h

diff --git a/xen/common/Makefile b/xen/common/Makefile
index dc8d3a13f5..2eb5734f8e 100644
--- a/xen/common/Makefile
+++ b/xen/common/Makefile
@@ -54,6 +54,7 @@ obj-y += wait.o
obj-bin-y += warning.init.o
obj-$(CONFIG_XENOPROF) += xenoprof.o
obj-y += xmalloc_tlsf.o
+obj-$(CONFIG_OVERLAY_DTB) += dt_overlay.o

obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma lzo unlzo unlz4 unzstd earlycpio,$(n).init.o)

diff --git a/xen/common/dt_overlay.c b/xen/common/dt_overlay.c
new file mode 100644
index 0000000000..fcb31de495
--- /dev/null
+++ b/xen/common/dt_overlay.c
@@ -0,0 +1,447 @@
+/*
+ * xen/common/dt_overlay.c
+ *
+ * Device tree overlay support in Xen.
+ *
+ * Copyright (c) 2021 Xilinx Inc.
+ * Written by Vikram Garhwal <fnu.vikram@xilinx.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms and conditions of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <xen/iocap.h>
+#include <xen/xmalloc.h>
+#include <asm/domain_build.h>
+#include <xen/dt_overlay.h>
+#include <xen/guest_access.h>
+
+static LIST_HEAD(overlay_tracker);
+static DEFINE_SPINLOCK(overlay_lock);
+
+static int dt_overlay_remove_node(struct dt_device_node *device_node)
+{
+ struct dt_device_node *np;
+ struct dt_device_node *parent_node;
+ struct dt_device_node *current_node;
+
+ parent_node = device_node->parent;
+
+ current_node = parent_node;
+
+ if ( parent_node == NULL )
+ {
+ dt_dprintk("%s's parent node not found\n", device_node->name);
+ return -EFAULT;
+ }
+
+ np = parent_node->child;
+
+ if ( np == NULL )
+ {
+ dt_dprintk("parent node %s's not found\n", parent_node->name);
+ return -EFAULT;
+ }
+
+ /* If node to be removed is only child node or first child. */
+ if ( !dt_node_cmp(np->full_name, device_node->full_name) )
+ {
+ current_node->allnext = np->allnext;
+
+ /* If node is first child but not the only child. */
+ if ( np->sibling != NULL )
+ current_node->child = np->sibling;
+ else
+ /* If node is only child. */
+ current_node->child = NULL;
+ return 0;
+ }
+
+ for ( np = parent_node->child; np->sibling != NULL; np = np->sibling )
+ {
+ current_node = np;
+ if ( !dt_node_cmp(np->sibling->full_name, device_node->full_name) )
+ {
+ /* Found the node. Now we remove it. */
+ current_node->allnext = np->allnext->allnext;
+
+ if ( np->sibling->sibling )
+ current_node->sibling = np->sibling->sibling;
+ else
+ current_node->sibling = NULL;
+
+ break;
+ }
+ }
+
+ return 0;
+}
+
+/* Basic sanity check for the dtbo tool stack provided to Xen. */
+static int check_overlay_fdt(const void *overlay_fdt, uint32_t overlay_fdt_size)
+{
+ if ( (fdt_totalsize(overlay_fdt) != overlay_fdt_size) ||
+ fdt_check_header(overlay_fdt) )
+ {
+ printk(XENLOG_ERR "The overlay FDT is not a valid Flat Device Tree\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static unsigned int overlay_node_count(void *fdto)
+{
+ unsigned int num_overlay_nodes = 0;
+ int fragment;
+
+ fdt_for_each_subnode(fragment, fdto, 0)
+ {
+
+ int subnode;
+ int overlay;
+
+ overlay = fdt_subnode_offset(fdto, fragment, "__overlay__");
+
+ /*
+ * Overlay value can be < 0. But fdt_for_each_subnode() loop checks for
+ * overlay >= 0. So, no need for a overlay>=0 check here.
+ */
+
+ fdt_for_each_subnode(subnode, fdto, overlay)
+ {
+ num_overlay_nodes++;
+ }
+ }
+
+ return num_overlay_nodes;
+}
+
+/*
+ * overlay_get_nodes_info will get the all node's full name with path. This is
+ * useful when checking node for duplication i.e. dtbo tries to add nodes which
+ * already exists in device tree.
+ */
+static int overlay_get_nodes_info(const void *fdto, char ***nodes_full_path,
+ unsigned int num_overlay_nodes)
+{
+ int fragment;
+ unsigned int node_num = 0;
+
+ *nodes_full_path = xmalloc_bytes(num_overlay_nodes * sizeof(char *));
+
+ if ( *nodes_full_path == NULL )
+ return -ENOMEM;
+ memset(*nodes_full_path, 0x0, num_overlay_nodes * sizeof(char *));
+
+ fdt_for_each_subnode(fragment, fdto, 0)
+ {
+ int target;
+ int overlay;
+ int subnode;
+ const char *target_path;
+
+ target = fdt_overlay_target_offset(device_tree_flattened, fdto,
+ fragment, &target_path);
+ if ( target < 0 )
+ return target;
+
+ overlay = fdt_subnode_offset(fdto, fragment, "__overlay__");
+
+ /*
+ * Overlay value can be < 0. But fdt_for_each_subnode() loop checks for
+ * overlay >= 0. So, no need for a overlay>=0 check here.
+ */
+ fdt_for_each_subnode(subnode, fdto, overlay)
+ {
+ const char *node_name = NULL;
+ unsigned int node_name_len = 0;
+ unsigned int target_path_len = strlen(target_path);
+ unsigned int node_full_name_len = 0;
+
+ node_name = fdt_get_name(fdto, subnode, &node_name_len);
+
+ if ( node_name == NULL )
+ return -EINVAL;
+
+ /*
+ * Magic number 2 is for adding '/'. This is done to keep the
+ * node_full_name in the correct full node name format.
+ */
+ node_full_name_len = target_path_len + node_name_len + 2;
+
+ (*nodes_full_path)[node_num] = xmalloc_bytes(node_full_name_len);
+
+ if ( (*nodes_full_path)[node_num] == NULL )
+ return -ENOMEM;
+
+ memcpy((*nodes_full_path)[node_num], target_path, target_path_len);
+
+ (*nodes_full_path)[node_num][target_path_len] = '/';
+
+ memcpy((*nodes_full_path)[node_num] + target_path_len + 1, node_name,
+ node_name_len);
+
+ (*nodes_full_path)[node_num][node_full_name_len - 1] = '\0';
+
+ node_num++;
+ }
+ }
+
+ return 0;
+}
+
+/* Remove nodes from dt_host. */
+static int remove_nodes(char **full_dt_node_path, int **nodes_irq,
+ int *node_num_irq, unsigned int num_nodes)
+{
+ struct domain *d = hardware_domain;
+ int rc = 0;
+ struct dt_device_node *overlay_node;
+ unsigned int naddr;
+ unsigned int i, j, nirq;
+ u64 addr, size;
+ domid_t domid = 0;
+
+ for ( j = 0; j < num_nodes; j++ )
+ {
+ dt_dprintk("Finding node %s in the dt_host\n", full_dt_node_path[j]);
+
+ overlay_node = dt_find_node_by_path(full_dt_node_path[j]);
+
+ if ( overlay_node == NULL )
+ {
+ printk(XENLOG_ERR "Device %s is not present in the tree. Removing nodes failed\n",
+ full_dt_node_path[j]);
+ return -EINVAL;
+ }
+
+ domid = dt_device_used_by(overlay_node);
+
+ dt_dprintk("Checking if node %s is used by any domain\n",
+ full_dt_node_path[j]);
+
+ /* Remove the node iff it's assigned to domain 0 or domain io. */
+ if ( domid != 0 && domid != DOMID_IO )
+ {
+ printk(XENLOG_ERR "Device %s as it is being used by domain %d. Removing nodes failed\n",
+ full_dt_node_path[j], domid);
+ return -EINVAL;
+ }
+
+ dt_dprintk("Removing node: %s\n", full_dt_node_path[j]);
+
+ nirq = node_num_irq[j];
+
+ /* Remove IRQ permission */
+ for ( i = 0; i < nirq; i++ )
+ {
+ rc = nodes_irq[j][i];
+ /*
+ * TODO: We don't handle shared IRQs for now. So, it is assumed that
+ * the IRQs was not shared with another domain.
+ */
+ rc = irq_deny_access(d, rc);
+ if ( rc )
+ {
+ printk(XENLOG_ERR "unable to revoke access for irq %u for %s\n",
+ i, dt_node_full_name(overlay_node));
+ return rc;
+ }
+ }
+
+ rc = iommu_remove_dt_device(overlay_node);
+ if ( rc != 0 && rc != -ENXIO )
+ return rc;
+
+ naddr = dt_number_of_address(overlay_node);
+
+ /* Remove mmio access. */
+ for ( i = 0; i < naddr; i++ )
+ {
+ rc = dt_device_get_address(overlay_node, i, &addr, &size);
+ if ( rc )
+ {
+ printk(XENLOG_ERR "Unable to retrieve address %u for %s\n",
+ i, dt_node_full_name(overlay_node));
+ return rc;
+ }
+
+ rc = iomem_deny_access(d, paddr_to_pfn(addr),
+ paddr_to_pfn(PAGE_ALIGN(addr + size - 1)));
+ if ( rc )
+ {
+ printk(XENLOG_ERR "Unable to remove dom%d access to"
+ " 0x%"PRIx64" - 0x%"PRIx64"\n",
+ d->domain_id,
+ addr & PAGE_MASK, PAGE_ALIGN(addr + size) - 1);
+ return rc;
+ }
+ }
+
+ rc = dt_overlay_remove_node(overlay_node);
+ if ( rc )
+ return rc;
+ }
+
+ return rc;
+}
+
+/*
+ * First finds the device node to remove. Check if the device is being used by
+ * any dom and finally remove it from dt_host. IOMMU is already being taken care
+ * while destroying the domain.
+ */
+static long handle_remove_overlay_nodes(char **full_dt_nodes_path,
+ unsigned int num_nodes)
+{
+ int rc = 0;
+ struct overlay_track *entry, *temp, *track;
+ bool found_entry = false;
+ unsigned int i;
+
+ spin_lock(&overlay_lock);
+
+ /*
+ * First check if dtbo is correct i.e. it should one of the dtbo which was
+ * used when dynamically adding the node.
+ * Limitation: Cases with same node names but different property are not
+ * supported currently. We are relying on user to provide the same dtbo
+ * as it was used when adding the nodes.
+ */
+ list_for_each_entry_safe( entry, temp, &overlay_tracker, entry )
+ {
+ /* Checking the num of nodes first. If not same skip to next entry. */
+ if ( num_nodes == entry->num_nodes )
+ {
+ for ( i = 0; i < num_nodes; i++ )
+ {
+ if ( strcmp(full_dt_nodes_path[i], entry->nodes_fullname[i]) )
+ {
+ /* Node name didn't match. Skip to next entry. */
+ break;
+ }
+ }
+
+ /* Found one tracker with all node name matching. */
+ track = entry;
+ found_entry = true;
+ break;
+ }
+ }
+
+ if ( found_entry == false )
+ {
+ rc = -EINVAL;
+
+ printk(XENLOG_ERR "Cannot find any matching tracker with input dtbo."
+ " Removing nodes is supported for only prior added dtbo. Please"
+ " provide a valid dtbo which was used to add the nodes.\n");
+ goto out;
+
+ }
+
+ rc = remove_nodes(full_dt_nodes_path, entry->nodes_irq, entry->node_num_irq,
+ num_nodes);
+
+ if ( rc )
+ {
+ printk(XENLOG_ERR "Removing node failed\n");
+ goto out;
+ }
+
+ list_del(&entry->entry);
+
+ for ( i = 0; i < entry->num_nodes && entry->nodes_fullname[i] != NULL; i++ )
+ {
+ xfree(entry->nodes_fullname[i]);
+ }
+ xfree(entry->nodes_fullname);
+ for ( i = 0; i < entry->num_nodes && entry->nodes_irq[i] != NULL; i++ )
+ {
+ xfree(entry->nodes_irq[i]);
+ }
+ xfree(entry->nodes_irq);
+ xfree(entry->node_num_irq);
+ xfree(entry->dt_host_new);
+ xfree(entry->fdt);
+ xfree(entry);
+
+out:
+ spin_unlock(&overlay_lock);
+ return rc;
+}
+
+long dt_sysctl(struct xen_sysctl *op)
+{
+ long ret = 0;
+ void *overlay_fdt;
+ char **nodes_full_path = NULL;
+ unsigned int num_overlay_nodes = 0;
+
+ if ( op->u.dt_overlay.overlay_fdt_size <= 0 )
+ return -EINVAL;
+
+ overlay_fdt = xmalloc_bytes(op->u.dt_overlay.overlay_fdt_size);
+
+ if ( overlay_fdt == NULL )
+ return -ENOMEM;
+
+ ret = copy_from_guest(overlay_fdt, op->u.dt_overlay.overlay_fdt,
+ op->u.dt_overlay.overlay_fdt_size);
+ if ( ret )
+ {
+ gprintk(XENLOG_ERR, "copy from guest failed\n");
+ xfree(overlay_fdt);
+
+ return -EFAULT;
+ }
+
+ switch ( op->u.dt_overlay.overlay_op )
+ {
+ case XEN_SYSCTL_DT_OVERLAY_REMOVE:
+ ret = check_overlay_fdt(overlay_fdt,
+ op->u.dt_overlay.overlay_fdt_size);
+ if ( ret )
+ {
+ ret = -EFAULT;
+ break;
+ }
+
+ num_overlay_nodes = overlay_node_count(overlay_fdt);
+ if ( num_overlay_nodes == 0 )
+ {
+ ret = -ENOMEM;
+ break;
+ }
+
+ ret = overlay_get_nodes_info(overlay_fdt, &nodes_full_path,
+ num_overlay_nodes);
+ if ( ret )
+ break;
+
+ ret = handle_remove_overlay_nodes(nodes_full_path,
+ num_overlay_nodes);
+ break;
+
+ default:
+ break;
+ }
+
+ if ( nodes_full_path != NULL )
+ {
+ int i;
+ for ( i = 0; i < num_overlay_nodes && nodes_full_path[i] != NULL; i++ )
+ {
+ xfree(nodes_full_path[i]);
+ }
+ xfree(nodes_full_path);
+ }
+
+ return ret;
+}
diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
index fc4a0b31d6..d685c07159 100644
--- a/xen/common/sysctl.c
+++ b/xen/common/sysctl.c
@@ -29,6 +29,10 @@
#include <xen/livepatch.h>
#include <xen/coverage.h>

+#ifdef CONFIG_OVERLAY_DTB
+#include <xen/dt_overlay.h>
+#endif
+
long cf_check do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
{
long ret = 0;
@@ -482,6 +486,12 @@ long cf_check do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
copyback = 1;
break;

+#ifdef CONFIG_OVERLAY_DTB
+ case XEN_SYSCTL_overlay:
+ ret = dt_sysctl(op);
+ break;
+#endif
+
default:
ret = arch_do_sysctl(op, u_sysctl);
copyback = 0;
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 55252e97f2..e256aeb7c6 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -1069,6 +1069,22 @@ typedef struct xen_sysctl_cpu_policy xen_sysctl_cpu_policy_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_policy_t);
#endif

+#define XEN_SYSCTL_DT_OVERLAY_REMOVE 2
+
+/*
+ * XEN_SYSCTL_dt_overlay
+ * Performs addition/removal of device tree nodes under parent node using dtbo.
+ * This does in three steps:
+ * - Adds/Removes the nodes from dt_host.
+ * - Adds/Removes IRQ permission for the nodes.
+ * - Adds/Removes MMIO accesses.
+ */
+struct xen_sysctl_dt_overlay {
+ XEN_GUEST_HANDLE_64(void) overlay_fdt;
+ uint32_t overlay_fdt_size; /* Overlay dtb size. */
+ uint8_t overlay_op; /* Add or remove. */
+};
+
struct xen_sysctl {
uint32_t cmd;
#define XEN_SYSCTL_readconsole 1
@@ -1099,6 +1115,7 @@ struct xen_sysctl {
#define XEN_SYSCTL_livepatch_op 27
/* #define XEN_SYSCTL_set_parameter 28 */
#define XEN_SYSCTL_get_cpu_policy 29
+#define XEN_SYSCTL_dt_overlay 30
uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
union {
struct xen_sysctl_readconsole readconsole;
@@ -1129,6 +1146,7 @@ struct xen_sysctl {
#if defined(__i386__) || defined(__x86_64__)
struct xen_sysctl_cpu_policy cpu_policy;
#endif
+ struct xen_sysctl_dt_overlay dt_overlay;
uint8_t pad[128];
} u;
};
diff --git a/xen/include/xen/dt_overlay.h b/xen/include/xen/dt_overlay.h
new file mode 100644
index 0000000000..525818b77c
--- /dev/null
+++ b/xen/include/xen/dt_overlay.h
@@ -0,0 +1,47 @@
+/*
+ * xen/common/dt_overlay.c
+ *
+ * Device tree overlay suppoert in Xen.
+ *
+ * Copyright (c) 2021 Xilinx Inc.
+ * Written by Vikram Garhwal <fnu.vikram@xilinx.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms and conditions of the GNU General Public
+ * License, version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#ifndef __XEN_DT_SYSCTL_H__
+#define __XEN_DT_SYSCTL_H__
+
+#include <xen/list.h>
+#include <xen/libfdt/libfdt.h>
+#include <xen/device_tree.h>
+#include <public/sysctl.h>
+
+/*
+ * overlay_node_track describes information about added nodes through dtbo.
+ * @entry: List pointer.
+ * @dt_host_new: Pointer to the updated dt_host_new unflattened 'updated fdt'.
+ * @fdt: Stores the fdt.
+ * @nodes_fullname: Stores the full name of nodes.
+ * @nodes_irq: Stores the IRQ added from overlay dtb.
+ * @node_num_irq: Stores num of IRQ for each node in overlay dtb.
+ * @num_nodes: Stores total number of nodes in overlay dtb.
+ */
+struct overlay_track {
+ struct list_head entry;
+ struct dt_device_node *dt_host_new;
+ void *fdt;
+ char **nodes_fullname;
+ int **nodes_irq;
+ int *node_num_irq;
+ unsigned int num_nodes;
+};
+
+long dt_sysctl(struct xen_sysctl *op);
+#endif
--
2.17.1
Re: [XEN][RFC PATCH v3 10/14] xen/arm: Implement device tree node removal functionalities [ In reply to ]
>
> diff --git a/xen/common/Makefile b/xen/common/Makefile
> index dc8d3a13f5..2eb5734f8e 100644
> --- a/xen/common/Makefile
> +++ b/xen/common/Makefile
> @@ -54,6 +54,7 @@ obj-y += wait.o
> obj-bin-y += warning.init.o
> obj-$(CONFIG_XENOPROF) += xenoprof.o
> obj-y += xmalloc_tlsf.o
> +obj-$(CONFIG_OVERLAY_DTB) += dt_overlay.o

I think the entries are in alphabetical order, this should be added after += domain.o

> +/*
> + * overlay_get_nodes_info will get the all node's full name with path. This is
> + * useful when checking node for duplication i.e. dtbo tries to add nodes which
> + * already exists in device tree.
> + */
> +static int overlay_get_nodes_info(const void *fdto, char ***nodes_full_path,
> + unsigned int num_overlay_nodes)
> +{
> + int fragment;
> + unsigned int node_num = 0;
> +
> + *nodes_full_path = xmalloc_bytes(num_overlay_nodes * sizeof(char *));

Here you can use xzalloc_bytes and...

> +
> + if ( *nodes_full_path == NULL )
> + return -ENOMEM;
> + memset(*nodes_full_path, 0x0, num_overlay_nodes * sizeof(char *));

Get rid of this memset

> +
> +/* Remove nodes from dt_host. */
> +static int remove_nodes(char **full_dt_node_path, int **nodes_irq,
> + int *node_num_irq, unsigned int num_nodes)
> +{
> + struct domain *d = hardware_domain;
> + int rc = 0;
> + struct dt_device_node *overlay_node;
> + unsigned int naddr;
> + unsigned int i, j, nirq;
> + u64 addr, size;
> + domid_t domid = 0;
> +
> + for ( j = 0; j < num_nodes; j++ )
> + {
> + dt_dprintk("Finding node %s in the dt_host\n", full_dt_node_path[j]);
> +
> + overlay_node = dt_find_node_by_path(full_dt_node_path[j]);
> +
> + if ( overlay_node == NULL )
> + {
> + printk(XENLOG_ERR "Device %s is not present in the tree. Removing nodes failed\n",
> + full_dt_node_path[j]);
> + return -EINVAL;
> + }
> +
> + domid = dt_device_used_by(overlay_node);
> +
> + dt_dprintk("Checking if node %s is used by any domain\n",
> + full_dt_node_path[j]);
> +
> + /* Remove the node iff it's assigned to domain 0 or domain io. */
> + if ( domid != 0 && domid != DOMID_IO )
> + {
> + printk(XENLOG_ERR "Device %s as it is being used by domain %d. Removing nodes failed\n",
> + full_dt_node_path[j], domid);
> + return -EINVAL;
> + }
> +
> + dt_dprintk("Removing node: %s\n", full_dt_node_path[j]);
> +
> + nirq = node_num_irq[j];
> +
> + /* Remove IRQ permission */
> + for ( i = 0; i < nirq; i++ )
> + {
> + rc = nodes_irq[j][i];
> + /*
> + * TODO: We don't handle shared IRQs for now. So, it is assumed that
> + * the IRQs was not shared with another domain.
> + */
> + rc = irq_deny_access(d, rc);
> + if ( rc )
> + {
> + printk(XENLOG_ERR "unable to revoke access for irq %u for %s\n",
> + i, dt_node_full_name(overlay_node));
> + return rc;
> + }
> + }
> +
> + rc = iommu_remove_dt_device(overlay_node);
> + if ( rc != 0 && rc != -ENXIO )
> + return rc;
> +
> + naddr = dt_number_of_address(overlay_node);
> +
> + /* Remove mmio access. */
> + for ( i = 0; i < naddr; i++ )
> + {
> + rc = dt_device_get_address(overlay_node, i, &addr, &size);
> + if ( rc )
> + {
> + printk(XENLOG_ERR "Unable to retrieve address %u for %s\n",
> + i, dt_node_full_name(overlay_node));
> + return rc;
> + }
> +
> + rc = iomem_deny_access(d, paddr_to_pfn(addr),
> + paddr_to_pfn(PAGE_ALIGN(addr + size - 1)));
> + if ( rc )
> + {
> + printk(XENLOG_ERR "Unable to remove dom%d access to"
> + " 0x%"PRIx64" - 0x%"PRIx64"\n",
> + d->domain_id,
> + addr & PAGE_MASK, PAGE_ALIGN(addr + size) - 1);

NIT: here in each line under XENLOG_ERR, there is an extra space, these lines
Could be aligned to XENLOG_ERR, just for code style purpose.

> + return rc;
> + }
> + }
> +
> + rc = dt_overlay_remove_node(overlay_node);
> + if ( rc )
> + return rc;
> + }
> +
> + return rc;
> +}
> +
>
> +long dt_sysctl(struct xen_sysctl *op)
> +{
> + long ret = 0;
> + void *overlay_fdt;
> + char **nodes_full_path = NULL;
> + unsigned int num_overlay_nodes = 0;
> +
> + if ( op->u.dt_overlay.overlay_fdt_size <= 0 )
> + return -EINVAL;
> +
> + overlay_fdt = xmalloc_bytes(op->u.dt_overlay.overlay_fdt_size);
> +
> + if ( overlay_fdt == NULL )
> + return -ENOMEM;
> +
> + ret = copy_from_guest(overlay_fdt, op->u.dt_overlay.overlay_fdt,
> + op->u.dt_overlay.overlay_fdt_size);
> + if ( ret )
> + {
> + gprintk(XENLOG_ERR, "copy from guest failed\n");
> + xfree(overlay_fdt);
> +
> + return -EFAULT;
> + }
> +
> + switch ( op->u.dt_overlay.overlay_op )
> + {
> + case XEN_SYSCTL_DT_OVERLAY_REMOVE:
> + ret = check_overlay_fdt(overlay_fdt,
> + op->u.dt_overlay.overlay_fdt_size);
> + if ( ret )
> + {
> + ret = -EFAULT;
> + break;
> + }
> +
> + num_overlay_nodes = overlay_node_count(overlay_fdt);
> + if ( num_overlay_nodes == 0 )
> + {
> + ret = -ENOMEM;
> + break;
> + }
> +
> + ret = overlay_get_nodes_info(overlay_fdt, &nodes_full_path,
> + num_overlay_nodes);
> + if ( ret )
> + break;
> +
> + ret = handle_remove_overlay_nodes(nodes_full_path,
> + num_overlay_nodes);
> + break;
> +
> + default:
> + break;
> + }
> +
> + if ( nodes_full_path != NULL )
> + {
> + int I;

unsigned int

> + for ( i = 0; i < num_overlay_nodes && nodes_full_path[i] != NULL; i++ )
> + {
> + xfree(nodes_full_path[i]);
> + }
> + xfree(nodes_full_path);
> + }
> +
> + return ret;
> +}
> diff --git a/xen/common/sysctl.c b/xen/common/sysctl.c
> index fc4a0b31d6..d685c07159 100644
> --- a/xen/common/sysctl.c
> +++ b/xen/common/sysctl.c
> @@ -29,6 +29,10 @@
> #include <xen/livepatch.h>
> #include <xen/coverage.h>
>
> +#ifdef CONFIG_OVERLAY_DTB
> +#include <xen/dt_overlay.h>
> +#endif

Maybe this header can be included anyway, removing ifdefs, and...

> +
> long cf_check do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
> {
> long ret = 0;
> @@ -482,6 +486,12 @@ long cf_check do_sysctl(XEN_GUEST_HANDLE_PARAM(xen_sysctl_t) u_sysctl)
> copyback = 1;
> break;
>
> +#ifdef CONFIG_OVERLAY_DTB
> + case XEN_SYSCTL_overlay:
> + ret = dt_sysctl(op);
> + break;
> +#endif

Also here you can remove ifdefs and use the header to switch between the real implementation
or a static inline returning error if CONFIG_OVERLAY_DTB is not enabled, take a look in
livepatch_op(struct xen_sysctl_livepatch_op *op).

dt_sysctl can take struct xen_sysctl_dt_overlay* as input.

> +
> default:
> ret = arch_do_sysctl(op, u_sysctl);
> copyback = 0;
> diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
> index 55252e97f2..e256aeb7c6 100644
> --- a/xen/include/public/sysctl.h
> +++ b/xen/include/public/sysctl.h
> @@ -1069,6 +1069,22 @@ typedef struct xen_sysctl_cpu_policy xen_sysctl_cpu_policy_t;
> DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_policy_t);
> #endif
>
> +#define XEN_SYSCTL_DT_OVERLAY_REMOVE 2
> +
> +/*
> + * XEN_SYSCTL_dt_overlay
> + * Performs addition/removal of device tree nodes under parent node using dtbo.
> + * This does in three steps:
> + * - Adds/Removes the nodes from dt_host.
> + * - Adds/Removes IRQ permission for the nodes.
> + * - Adds/Removes MMIO accesses.
> + */
> +struct xen_sysctl_dt_overlay {
> + XEN_GUEST_HANDLE_64(void) overlay_fdt;
> + uint32_t overlay_fdt_size; /* Overlay dtb size. */
> + uint8_t overlay_op; /* Add or remove. */
> +};
> +
> struct xen_sysctl {
> uint32_t cmd;
> #define XEN_SYSCTL_readconsole 1
> @@ -1099,6 +1115,7 @@ struct xen_sysctl {
> #define XEN_SYSCTL_livepatch_op 27
> /* #define XEN_SYSCTL_set_parameter 28 */
> #define XEN_SYSCTL_get_cpu_policy 29
> +#define XEN_SYSCTL_dt_overlay 30
> uint32_t interface_version; /* XEN_SYSCTL_INTERFACE_VERSION */
> union {
> struct xen_sysctl_readconsole readconsole;
> @@ -1129,6 +1146,7 @@ struct xen_sysctl {
> #if defined(__i386__) || defined(__x86_64__)
> struct xen_sysctl_cpu_policy cpu_policy;
> #endif
> + struct xen_sysctl_dt_overlay dt_overlay;

Here I would need an opinion from someone more experienced, but I think when a change
is made in this structure, XEN_SYSCTL_INTERFACE_VERSION should be bumped?

> uint8_t pad[128];
> } u;
> };
> diff --git a/xen/include/xen/dt_overlay.h b/xen/include/xen/dt_overlay.h
> new file mode 100644
> index 0000000000..525818b77c
> --- /dev/null
> +++ b/xen/include/xen/dt_overlay.h
> @@ -0,0 +1,47 @@
> +/*
> + * xen/common/dt_overlay.c

Typo: dt_overlay.h

> + *
> + * Device tree overlay suppoert in Xen.

Typo: support

> + *
> + * Copyright (c) 2021 Xilinx Inc.
> + * Written by Vikram Garhwal <fnu.vikram@xilinx.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms and conditions of the GNU General Public
> + * License, version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + */
> +#ifndef __XEN_DT_SYSCTL_H__
> +#define __XEN_DT_SYSCTL_H__
> +
> +#include <xen/list.h>
> +#include <xen/libfdt/libfdt.h>
> +#include <xen/device_tree.h>
> +#include <public/sysctl.h>

In case you decide to pass struct xen_sysctl_dt_overlay to dt_sysctl, you can remove
#include <public/sysctl.h> and use a forward declaration to struct xen_sysctl_dt_overlay
instead.

> +
> +/*
> + * overlay_node_track describes information about added nodes through dtbo.
> + * @entry: List pointer.
> + * @dt_host_new: Pointer to the updated dt_host_new unflattened 'updated fdt'.
> + * @fdt: Stores the fdt.
> + * @nodes_fullname: Stores the full name of nodes.
> + * @nodes_irq: Stores the IRQ added from overlay dtb.
> + * @node_num_irq: Stores num of IRQ for each node in overlay dtb.
> + * @num_nodes: Stores total number of nodes in overlay dtb.
> + */
> +struct overlay_track {
> + struct list_head entry;
> + struct dt_device_node *dt_host_new;
> + void *fdt;
> + char **nodes_fullname;
> + int **nodes_irq;
> + int *node_num_irq;
> + unsigned int num_nodes;
> +};
> +
> +long dt_sysctl(struct xen_sysctl *op);
> +#endif
>
Re: [XEN][RFC PATCH v3 10/14] xen/arm: Implement device tree node removal functionalities [ In reply to ]
Hi Vikram,

On 08/03/2022 19:47, Vikram Garhwal wrote:
> Introduce sysctl XEN_SYSCTL_dt_overlay to remove device-tree nodes added using
> device tree overlay.
>
> xl overlay remove file.dtbo:
> Removes all the nodes in a given dtbo.
> First, removes IRQ permissions and MMIO accesses. Next, it finds the nodes
> in dt_host and delete the device node entries from dt_host.
>
> The nodes get removed only if it is not used by any of dom0 or domio.
>
> Also, added overlay_track struct to keep the track of added node through device
> tree overlay. overlay_track has dt_host_new which is unflattened form of updated
> fdt and name of overlay nodes. When a node is removed, we also free the memory
> used by overlay_track for the particular overlay node.
>
> Signed-off-by: Vikram Garhwal <fnu.vikram@xilinx.com>
> ---
> xen/common/Makefile | 1 +
> xen/common/dt_overlay.c | 447 +++++++++++++++++++++++++++++++++++
> xen/common/sysctl.c | 10 +
> xen/include/public/sysctl.h | 18 ++
> xen/include/xen/dt_overlay.h | 47 ++++
> 5 files changed, 523 insertions(+)
> create mode 100644 xen/common/dt_overlay.c
> create mode 100644 xen/include/xen/dt_overlay.h
>
> diff --git a/xen/common/Makefile b/xen/common/Makefile
> index dc8d3a13f5..2eb5734f8e 100644
> --- a/xen/common/Makefile
> +++ b/xen/common/Makefile
> @@ -54,6 +54,7 @@ obj-y += wait.o
> obj-bin-y += warning.init.o
> obj-$(CONFIG_XENOPROF) += xenoprof.o
> obj-y += xmalloc_tlsf.o
> +obj-$(CONFIG_OVERLAY_DTB) += dt_overlay.o
>
> obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma lzo unlzo unlz4 unzstd earlycpio,$(n).init.o)
>
> diff --git a/xen/common/dt_overlay.c b/xen/common/dt_overlay.c
> new file mode 100644
> index 0000000000..fcb31de495
> --- /dev/null
> +++ b/xen/common/dt_overlay.c
> @@ -0,0 +1,447 @@
> +/*
> + * xen/common/dt_overlay.c
> + *
> + * Device tree overlay support in Xen.
> + *
> + * Copyright (c) 2021 Xilinx Inc.
> + * Written by Vikram Garhwal <fnu.vikram@xilinx.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms and conditions of the GNU General Public
> + * License, version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + */
> +#include <xen/iocap.h>
> +#include <xen/xmalloc.h>
> +#include <asm/domain_build.h>
> +#include <xen/dt_overlay.h>
> +#include <xen/guest_access.h>
> +
> +static LIST_HEAD(overlay_tracker);
> +static DEFINE_SPINLOCK(overlay_lock);
> +
> +static int dt_overlay_remove_node(struct dt_device_node *device_node)
> +{
> + struct dt_device_node *np;
> + struct dt_device_node *parent_node;
> + struct dt_device_node *current_node;
> +
> + parent_node = device_node->parent;
> +
> + current_node = parent_node;
> +
> + if ( parent_node == NULL )
> + {
> + dt_dprintk("%s's parent node not found\n", device_node->name);
> + return -EFAULT;
> + }
> +
> + np = parent_node->child;
> +
> + if ( np == NULL )
> + {
> + dt_dprintk("parent node %s's not found\n", parent_node->name);
> + return -EFAULT;
> + }
> +
> + /* If node to be removed is only child node or first child. */
> + if ( !dt_node_cmp(np->full_name, device_node->full_name) )
> + {
> + current_node->allnext = np->allnext;

While reviewing the previous patches, I realized that we have nothing to
prevent someone to browse the device-tree while it is modified.

I am not sure this can be solved with just refcounting (like Linux
does). So maybe we need a read-write-lock. I am open to other
suggestions here.

> +
> + /* If node is first child but not the only child. */
> + if ( np->sibling != NULL )
> + current_node->child = np->sibling;
> + else
> + /* If node is only child. */
> + current_node->child = NULL;

Those 4 lines can be replaced with one line:

current_node->child = np->sibling;

> + return 0;
> + }
> +
> + for ( np = parent_node->child; np->sibling != NULL; np = np->sibling )
> + {
> + current_node = np;
> + if ( !dt_node_cmp(np->sibling->full_name, device_node->full_name) )
> + {
> + /* Found the node. Now we remove it. */
> + current_node->allnext = np->allnext->allnext;

I find this code quite confusing to read. AFAICT, 'np' and
'current_node' are exactly the same here. Why do you use different name
to access it?

> +
> + if ( np->sibling->sibling )
> + current_node->sibling = np->sibling->sibling;
> + else
> + current_node->sibling = NULL;

Same here. This could be replaced with:

current_node->child = nb->sibling->sibling;

> +
> + break;
> + }
> + }
> +
> + return 0;
> +}
> +
> +/* Basic sanity check for the dtbo tool stack provided to Xen. */
> +static int check_overlay_fdt(const void *overlay_fdt, uint32_t overlay_fdt_size)
> +{
> + if ( (fdt_totalsize(overlay_fdt) != overlay_fdt_size) ||
> + fdt_check_header(overlay_fdt) )
> + {
> + printk(XENLOG_ERR "The overlay FDT is not a valid Flat Device Tree\n");
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +static unsigned int overlay_node_count(void *fdto)
> +{
> + unsigned int num_overlay_nodes = 0;
> + int fragment;
> +
> + fdt_for_each_subnode(fragment, fdto, 0)
> + {
> +
> + int subnode;
> + int overlay;
> +
> + overlay = fdt_subnode_offset(fdto, fragment, "__overlay__");
> +
> + /*
> + * Overlay value can be < 0. But fdt_for_each_subnode() loop checks for
> + * overlay >= 0. So, no need for a overlay>=0 check here.
> + */
> +
> + fdt_for_each_subnode(subnode, fdto, overlay)
> + {
> + num_overlay_nodes++;
> + }
> + }
> +
> + return num_overlay_nodes;
> +}
> +
> +/*
> + * overlay_get_nodes_info will get the all node's full name with path. This is
> + * useful when checking node for duplication i.e. dtbo tries to add nodes which
> + * already exists in device tree.
> + */

AFAIU the code below will only retrieve one level of nodes. So if you have

foo {
bar {
}
}

Only foo will be part of the nodes_full_path. Is it correct?

> +static int overlay_get_nodes_info(const void *fdto, char ***nodes_full_path,
> + unsigned int num_overlay_nodes)
> +{
> + int fragment;
> + unsigned int node_num = 0;
> +
> + *nodes_full_path = xmalloc_bytes(num_overlay_nodes * sizeof(char *));
> +
> + if ( *nodes_full_path == NULL )
> + return -ENOMEM;
> + memset(*nodes_full_path, 0x0, num_overlay_nodes * sizeof(char *));
> +
> + fdt_for_each_subnode(fragment, fdto, 0)
> + {
> + int target;
> + int overlay;
> + int subnode;
> + const char *target_path;
> +
> + target = fdt_overlay_target_offset(device_tree_flattened, fdto,
> + fragment, &target_path);
> + if ( target < 0 )
> + return target;
> +
> + overlay = fdt_subnode_offset(fdto, fragment, "__overlay__");
> +
> + /*
> + * Overlay value can be < 0. But fdt_for_each_subnode() loop checks for
> + * overlay >= 0. So, no need for a overlay>=0 check here.
> + */
> + fdt_for_each_subnode(subnode, fdto, overlay)
> + {
> + const char *node_name = NULL;
> + unsigned int node_name_len = 0;
> + unsigned int target_path_len = strlen(target_path);
> + unsigned int node_full_name_len = 0;
> +
> + node_name = fdt_get_name(fdto, subnode, &node_name_len);
> +
> + if ( node_name == NULL )
> + return -EINVAL;
> +
> + /*
> + * Magic number 2 is for adding '/'. This is done to keep the
> + * node_full_name in the correct full node name format.
> + */
> + node_full_name_len = target_path_len + node_name_len + 2;
> +
> + (*nodes_full_path)[node_num] = xmalloc_bytes(node_full_name_len);
> +
> + if ( (*nodes_full_path)[node_num] == NULL )
> + return -ENOMEM;
> +
> + memcpy((*nodes_full_path)[node_num], target_path, target_path_len);
> +
> + (*nodes_full_path)[node_num][target_path_len] = '/';
> +
> + memcpy((*nodes_full_path)[node_num] + target_path_len + 1, node_name,
> + node_name_len);
> +
> + (*nodes_full_path)[node_num][node_full_name_len - 1] = '\0';
> +
> + node_num++;
> + }
> + }
> +
> + return 0;
> +}
> +
> +/* Remove nodes from dt_host. */
> +static int remove_nodes(char **full_dt_node_path, int **nodes_irq,
> + int *node_num_irq, unsigned int num_nodes)

Most of the information above are stored in overlay_track. So can we
pass a pointer to the overlay_track?

Also, I think most of the parameter (include overlay track) should not
be modified here. So please use const.

> +{
> + struct domain *d = hardware_domain;
> + int rc = 0;
> + struct dt_device_node *overlay_node;
> + unsigned int naddr;
> + unsigned int i, j, nirq;
> + u64 addr, size;
> + domid_t domid = 0;
> +
> + for ( j = 0; j < num_nodes; j++ )
> + {
> + dt_dprintk("Finding node %s in the dt_host\n", full_dt_node_path[j]);
> +
> + overlay_node = dt_find_node_by_path(full_dt_node_path[j]);
> +
> + if ( overlay_node == NULL )

This error (and some below) may happen because we partially removed the
DTBO but stopped because on error. So on the next run, it is possible
that "overlay_node" will be NULL and therefore you will not be able to
remove the node.

In your use-case, are you planning to ask the admin to reboot if you
can't remove a node?

> + {
> + printk(XENLOG_ERR "Device %s is not present in the tree. Removing nodes failed\n",
> + full_dt_node_path[j]);
> + return -EINVAL;
> + }
> +
> + domid = dt_device_used_by(overlay_node);
> +
> + dt_dprintk("Checking if node %s is used by any domain\n",
> + full_dt_node_path[j]);
> +
> + /* Remove the node iff it's assigned to domain 0 or domain io. */
> + if ( domid != 0 && domid != DOMID_IO )

I think I asked before, but I have seen no answer on that. What will
prevent the device to not be assigned after this check?

Also, in general, I think it would be helpful if you answer on the ML
questions. This would at least confirm that you have seen them and we
agree on what to do.

> + {
> + printk(XENLOG_ERR "Device %s as it is being used by domain %d. Removing nodes failed\n",
> + full_dt_node_path[j], domid);
> + return -EINVAL;
> + }
> +
> + dt_dprintk("Removing node: %s\n", full_dt_node_path[j]);
> +
> + nirq = node_num_irq[j];
> +
> + /* Remove IRQ permission */
> + for ( i = 0; i < nirq; i++ )
> + {
> + rc = nodes_irq[j][i];
> + /*
> + * TODO: We don't handle shared IRQs for now. So, it is assumed that
> + * the IRQs was not shared with another domain.
> + */

This is not what I meant in v2. Interrupts cannot be shared between
domain on Arm. However, interrupts can be shared between devices.

This is the latter part that needs a TODO.

In addition to that, as I wrote, an IRQ can be assigned to a *single*
domain without the device been assigned to that domain. So I think this
needs to be checked possibly by using the information stored in "desc"
to know where the IRQ was routed to.

> + rc = irq_deny_access(d, rc);
> + if ( rc )
> + {
> + printk(XENLOG_ERR "unable to revoke access for irq %u for %s\n",
> + i, dt_node_full_name(overlay_node));
> + return rc;
> + }
> + }
> +
> + rc = iommu_remove_dt_device(overlay_node);
> + if ( rc != 0 && rc != -ENXIO )
> + return rc;
> +
> + naddr = dt_number_of_address(overlay_node);
> +
> + /* Remove mmio access. */
> + for ( i = 0; i < naddr; i++ )
> + {
> + rc = dt_device_get_address(overlay_node, i, &addr, &size);
> + if ( rc )
> + {
> + printk(XENLOG_ERR "Unable to retrieve address %u for %s\n",
> + i, dt_node_full_name(overlay_node));
> + return rc;
> + }
> +
> + rc = iomem_deny_access(d, paddr_to_pfn(addr),
> + paddr_to_pfn(PAGE_ALIGN(addr + size - 1)));

I think you missed my comment here. Similar to the IRQs, you are asking
for trouble to parse the device-tree again. It would be better to store
the information using a rangeset (see include/xen/rangeset.h).

I also think the double array for the IRQs should be converted to a
rangeset as this would simplify the code.

Furthemore, you are removing the permission but not the mapping in the
P2M. Can you clarify why?


> + if ( rc )
> + {
> + printk(XENLOG_ERR "Unable to remove dom%d access to"
> + " 0x%"PRIx64" - 0x%"PRIx64"\n",
> + d->domain_id,
> + addr & PAGE_MASK, PAGE_ALIGN(addr + size) - 1);
> + return rc;
> + }
> + }
> +
> + rc = dt_overlay_remove_node(overlay_node);
> + if ( rc )
> + return rc;
> + }
> +
> + return rc;
> +}

[...]

> + * overlay_node_track describes information about added nodes through dtbo.
> + * @entry: List pointer.
> + * @dt_host_new: Pointer to the updated dt_host_new unflattened 'updated fdt'.
> + * @fdt: Stores the fdt.
> + * @nodes_fullname: Stores the full name of nodes.
> + * @nodes_irq: Stores the IRQ added from overlay dtb.
> + * @node_num_irq: Stores num of IRQ for each node in overlay dtb.
> + * @num_nodes: Stores total number of nodes in overlay dtb.
> + */
> +struct overlay_track {
> + struct list_head entry;
> + struct dt_device_node *dt_host_new;
> + void *fdt;
> + char **nodes_fullname;

Looking at the code, the main use for the fullname are to check the FDT
match and looking up in the DT.

In order to check the DT, you could use memcmp() to confirm both the
stored FDT and the one provided by the user match.

For the lookup, you could avoid it by storing a pointer to the root of
the new subtrees.

Please let me know if you disagree with this approach.

> + int **nodes_irq;
> + int *node_num_irq;
> + unsigned int num_nodes;
> +};
> +
> +long dt_sysctl(struct xen_sysctl *op);
> +#endif

Cheers,

--
Julien Grall
Re: [XEN][RFC PATCH v3 10/14] xen/arm: Implement device tree node removal functionalities [ In reply to ]
Hi Vikram,

A few more comments that I spotted after reviewing the next patch.

On 08/03/2022 19:47, Vikram Garhwal wrote:
> Introduce sysctl XEN_SYSCTL_dt_overlay to remove device-tree nodes added using
> device tree overlay.
>
> xl overlay remove file.dtbo:
> Removes all the nodes in a given dtbo.
> First, removes IRQ permissions and MMIO accesses. Next, it finds the nodes
> in dt_host and delete the device node entries from dt_host.
>
> The nodes get removed only if it is not used by any of dom0 or domio.

Can overlay be nested (let say B nest in A)? If yes, how do you deal
with the case the A is removed before B?

[...]

> +long dt_sysctl(struct xen_sysctl *op)
> +{
> + long ret = 0;
> + void *overlay_fdt;
> + char **nodes_full_path = NULL;
> + unsigned int num_overlay_nodes = 0;
> +
> + if ( op->u.dt_overlay.overlay_fdt_size <= 0 )

From my understanding, FDT are typically limited to 2MB. At minimum, we
should check the overlay is not bigger than that (to avoid arbirtrary
allocation size). I would possibly consider to limit to lower than that
(i.e 500KB) if there is no need to have larger and to reduce the amount
memory consumption by the overlay code.

> + return -EINVAL;
> +
> + overlay_fdt = xmalloc_bytes(op->u.dt_overlay.overlay_fdt_size);
> +
> + if ( overlay_fdt == NULL )
> + return -ENOMEM;
> +
> + ret = copy_from_guest(overlay_fdt, op->u.dt_overlay.overlay_fdt,
> + op->u.dt_overlay.overlay_fdt_size);
> + if ( ret )
> + {
> + gprintk(XENLOG_ERR, "copy from guest failed\n");
> + xfree(overlay_fdt);

You free overlay_fdt, but not in the other paths.

> +
> + return -EFAULT;
> + }
> +
> + switch ( op->u.dt_overlay.overlay_op )
> + {
> + case XEN_SYSCTL_DT_OVERLAY_REMOVE:
> + ret = check_overlay_fdt(overlay_fdt,
> + op->u.dt_overlay.overlay_fdt_size);
> + if ( ret )
> + {
> + ret = -EFAULT;
> + break;
> + }
> +
> + num_overlay_nodes = overlay_node_count(overlay_fdt);
> + if ( num_overlay_nodes == 0 )
> + {
> + ret = -ENOMEM;
> + break;
> + }
> +
> + ret = overlay_get_nodes_info(overlay_fdt, &nodes_full_path,
> + num_overlay_nodes);
> + if ( ret )
> + break;
> +
> + ret = handle_remove_overlay_nodes(nodes_full_path,
> + num_overlay_nodes);
> + break;
> +
> + default:
> + break;
> + }
> +
> + if ( nodes_full_path != NULL )
> + {
> + int i;
> + for ( i = 0; i < num_overlay_nodes && nodes_full_path[i] != NULL; i++ )
> + {
> + xfree(nodes_full_path[i]);
> + }
> + xfree(nodes_full_path);
> + }

AFAICT, nodes_full_path is not going to be used by the subop to add an
overlay. So I would consider to move this within the case or (even
better) create a function handling the subop (like you did for add) so
we don't end up with a large switch.

Cheers,

--
Julien Grall
Re: [XEN][RFC PATCH v3 10/14] xen/arm: Implement device tree node removal functionalities [ In reply to ]
Hi Julien & Luca,

Sorry for so long delay on the this patch series. I was stuck with
internal work and then had to go on a long leave.

I prepared v4 and will send it shortly. Addressed all the feedback. I
have few comments. Please see them inline below.

On 5/18/22 11:31 AM, Julien Grall wrote:
> CAUTION: This message has originated from an External Source. Please
> use proper judgment and caution when opening attachments, clicking
> links, or responding to this email.
>
>
> Hi Vikram,
>
> On 08/03/2022 19:47, Vikram Garhwal wrote:
>> Introduce sysctl XEN_SYSCTL_dt_overlay to remove device-tree nodes
>> added using
>> device tree overlay.
>>
>> xl overlay remove file.dtbo:
>>      Removes all the nodes in a given dtbo.
>>      First, removes IRQ permissions and MMIO accesses. Next, it finds
>> the nodes
>>      in dt_host and delete the device node entries from dt_host.
>>
>>      The nodes get removed only if it is not used by any of dom0 or
>> domio.
>>
>> Also, added overlay_track struct to keep the track of added node
>> through device
>> tree overlay. overlay_track has dt_host_new which is unflattened form
>> of updated
>> fdt and name of overlay nodes. When a node is removed, we also free
>> the memory
>> used by overlay_track for the particular overlay node.
>>
>> Signed-off-by: Vikram Garhwal <fnu.vikram@xilinx.com>
>> ---
>>   xen/common/Makefile          |   1 +
>>   xen/common/dt_overlay.c      | 447 +++++++++++++++++++++++++++++++++++
>>   xen/common/sysctl.c          |  10 +
>>   xen/include/public/sysctl.h  |  18 ++
>>   xen/include/xen/dt_overlay.h |  47 ++++
>>   5 files changed, 523 insertions(+)
>>   create mode 100644 xen/common/dt_overlay.c
>>   create mode 100644 xen/include/xen/dt_overlay.h
>>
>> diff --git a/xen/common/Makefile b/xen/common/Makefile
>> index dc8d3a13f5..2eb5734f8e 100644
>> --- a/xen/common/Makefile
>> +++ b/xen/common/Makefile
>> @@ -54,6 +54,7 @@ obj-y += wait.o
>>   obj-bin-y += warning.init.o
>>   obj-$(CONFIG_XENOPROF) += xenoprof.o
>>   obj-y += xmalloc_tlsf.o
>> +obj-$(CONFIG_OVERLAY_DTB) += dt_overlay.o
>>
>>   obj-bin-$(CONFIG_X86) += $(foreach n,decompress bunzip2 unxz unlzma
>> lzo unlzo unlz4 unzstd earlycpio,$(n).init.o)
>>
>> diff --git a/xen/common/dt_overlay.c b/xen/common/dt_overlay.c
>> new file mode 100644
>> index 0000000000..fcb31de495
>> --- /dev/null
>> +++ b/xen/common/dt_overlay.c
>> @@ -0,0 +1,447 @@
>> +/*
>> + * xen/common/dt_overlay.c
>> + *
>> + * Device tree overlay support in Xen.
>> + *
>> + * Copyright (c) 2021 Xilinx Inc.
>> + * Written by Vikram Garhwal <fnu.vikram@xilinx.com>
>> + *
>> + * This program is free software; you can redistribute it and/or
>> + * modify it under the terms and conditions of the GNU General Public
>> + * License, version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful,
>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
>> + * GNU General Public License for more details.
>> + */
>> +#include <xen/iocap.h>
>> +#include <xen/xmalloc.h>
>> +#include <asm/domain_build.h>
>> +#include <xen/dt_overlay.h>
>> +#include <xen/guest_access.h>
>> +
>> +static LIST_HEAD(overlay_tracker);
>> +static DEFINE_SPINLOCK(overlay_lock);
>> +
>> +static int dt_overlay_remove_node(struct dt_device_node *device_node)
>> +{
>> +    struct dt_device_node *np;
>> +    struct dt_device_node *parent_node;
>> +    struct dt_device_node *current_node;
>> +
>> +    parent_node = device_node->parent;
>> +
>> +    current_node = parent_node;
>> +
>> +    if ( parent_node == NULL )
>> +    {
>> +        dt_dprintk("%s's parent node not found\n", device_node->name);
>> +        return -EFAULT;
>> +    }
>> +
>> +    np = parent_node->child;
>> +
>> +    if ( np == NULL )
>> +    {
>> +        dt_dprintk("parent node %s's not found\n", parent_node->name);
>> +        return -EFAULT;
>> +    }
>> +
>> +    /* If node to be removed is only child node or first child. */
>> +    if ( !dt_node_cmp(np->full_name, device_node->full_name) )
>> +    {
>> +        current_node->allnext = np->allnext;
>
> While reviewing the previous patches, I realized that we have nothing to
> prevent someone to browse the device-tree while it is modified.
>
> I am not sure this can be solved with just refcounting (like Linux
> does). So maybe we need a read-write-lock. I am open to other
> suggestions here.
Added read_locks for all dt_host access funtions.
>> +
>> +        /* If node is first child but not the only child. */
>> +        if ( np->sibling != NULL )
>> +            current_node->child = np->sibling;
>> +        else
>> +            /* If node is only child. */
>> +            current_node->child = NULL;
>
> Those 4 lines can be replaced with one line:
>
> current_node->child = np->sibling;
>
>> +        return 0;
>> +    }
>> +
>> +    for ( np = parent_node->child; np->sibling != NULL; np =
>> np->sibling )
>> +    {
>> +        current_node = np;
>> +        if ( !dt_node_cmp(np->sibling->full_name,
>> device_node->full_name) )
>> +        {
>> +            /* Found the node. Now we remove it. */
>> +            current_node->allnext = np->allnext->allnext;
>
> I find this code quite confusing to read. AFAICT, 'np' and
> 'current_node' are exactly the same here. Why do you use different name
> to access it?
>
>> +
>> +            if ( np->sibling->sibling )
>> +                current_node->sibling = np->sibling->sibling;
>> +            else
>> +                current_node->sibling = NULL;
>
> Same here. This could be replaced with:
>
> current_node->child = nb->sibling->sibling;
>
>> +
>> +            break;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +/* Basic sanity check for the dtbo tool stack provided to Xen. */
>> +static int check_overlay_fdt(const void *overlay_fdt, uint32_t
>> overlay_fdt_size)
>> +{
>> +    if ( (fdt_totalsize(overlay_fdt) != overlay_fdt_size) ||
>> +          fdt_check_header(overlay_fdt) )
>> +    {
>> +        printk(XENLOG_ERR "The overlay FDT is not a valid Flat
>> Device Tree\n");
>> +        return -EINVAL;
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +static unsigned int overlay_node_count(void *fdto)
>> +{
>> +    unsigned int num_overlay_nodes = 0;
>> +    int fragment;
>> +
>> +    fdt_for_each_subnode(fragment, fdto, 0)
>> +    {
>> +
>> +        int subnode;
>> +        int overlay;
>> +
>> +        overlay = fdt_subnode_offset(fdto, fragment, "__overlay__");
>> +
>> +        /*
>> +         * Overlay value can be < 0. But fdt_for_each_subnode() loop
>> checks for
>> +         * overlay >= 0. So, no need for a overlay>=0 check here.
>> +         */
>> +
>> +        fdt_for_each_subnode(subnode, fdto, overlay)
>> +        {
>> +            num_overlay_nodes++;
>> +        }
>> +    }
>> +
>> +    return num_overlay_nodes;
>> +}
>> +
>> +/*
>> + * overlay_get_nodes_info will get the all node's full name with
>> path. This is
>> + * useful when checking node for duplication i.e. dtbo tries to add
>> nodes which
>> + * already exists in device tree.
>> + */
>
> AFAIU the code below will only retrieve one level of nodes. So if you
> have
>
> foo {
>   bar {
>   }
> }
>
> Only foo will be part of the nodes_full_path. Is it correct?

Added support for adding children nodes too.

>
>
>> +static int overlay_get_nodes_info(const void *fdto, char
>> ***nodes_full_path,
>> +                                  unsigned int num_overlay_nodes)
>> +{
>> +    int fragment;
>> +    unsigned int node_num = 0;
>> +
>> +    *nodes_full_path = xmalloc_bytes(num_overlay_nodes * sizeof(char
>> *));
>> +
>> +    if ( *nodes_full_path == NULL )
>> +        return -ENOMEM;
>> +    memset(*nodes_full_path, 0x0, num_overlay_nodes * sizeof(char *));
>> +
>> +    fdt_for_each_subnode(fragment, fdto, 0)
>> +    {
>> +        int target;
>> +        int overlay;
>> +        int subnode;
>> +        const char *target_path;
>> +
>> +        target = fdt_overlay_target_offset(device_tree_flattened, fdto,
>> +                                           fragment, &target_path);
>> +        if ( target < 0 )
>> +            return target;
>> +
>> +        overlay = fdt_subnode_offset(fdto, fragment, "__overlay__");
>> +
>> +        /*
>> +         * Overlay value can be < 0. But fdt_for_each_subnode() loop
>> checks for
>> +         * overlay >= 0. So, no need for a overlay>=0 check here.
>> +         */
>> +        fdt_for_each_subnode(subnode, fdto, overlay)
>> +        {
>> +            const char *node_name = NULL;
>> +            unsigned int node_name_len = 0;
>> +            unsigned int target_path_len = strlen(target_path);
>> +            unsigned int node_full_name_len = 0;
>> +
>> +            node_name = fdt_get_name(fdto, subnode, &node_name_len);
>> +
>> +            if ( node_name == NULL )
>> +                return -EINVAL;
>> +
>> +            /*
>> +             * Magic number 2 is for adding '/'. This is done to
>> keep the
>> +             * node_full_name in the correct full node name format.
>> +             */
>> +            node_full_name_len = target_path_len + node_name_len + 2;
>> +
>> +            (*nodes_full_path)[node_num] =
>> xmalloc_bytes(node_full_name_len);
>> +
>> +            if ( (*nodes_full_path)[node_num] == NULL )
>> +                return -ENOMEM;
>> +
>> +            memcpy((*nodes_full_path)[node_num], target_path,
>> target_path_len);
>> +
>> +            (*nodes_full_path)[node_num][target_path_len] = '/';
>> +
>> +            memcpy((*nodes_full_path)[node_num] + target_path_len +
>> 1, node_name,
>> +                   node_name_len);
>> +
>> +            (*nodes_full_path)[node_num][node_full_name_len - 1] =
>> '\0';
>> +
>> +            node_num++;
>> +        }
>> +    }
>> +
>> +    return 0;
>> +}
>> +
>> +/* Remove nodes from dt_host. */
>> +static int remove_nodes(char **full_dt_node_path, int **nodes_irq,
>> +                        int *node_num_irq, unsigned int num_nodes)
>
> Most of the information above are stored in overlay_track. So can we
> pass a pointer to the overlay_track?
>
> Also, I think most of the parameter (include overlay track) should not
> be modified here. So please use const.
>
>> +{
>> +    struct domain *d = hardware_domain;
>> +    int rc = 0;
>> +    struct dt_device_node *overlay_node;
>> +    unsigned int naddr;
>> +    unsigned int i, j, nirq;
>> +    u64 addr, size;
>> +    domid_t domid = 0;
>> +
>> +    for ( j = 0; j < num_nodes; j++ )
>> +    {
>> +        dt_dprintk("Finding node %s in the dt_host\n",
>> full_dt_node_path[j]);
>> +
>> +        overlay_node = dt_find_node_by_path(full_dt_node_path[j]);
>> +
>> +        if ( overlay_node == NULL )
>
> This error (and some below) may happen because we partially removed the
> DTBO but stopped because on error. So on the next run, it is possible
> that "overlay_node" will be NULL and therefore you will not be able to
> remove the node.
>
> In your use-case, are you planning to ask the admin to reboot if you
> can't remove a node?
Yeah. What is error case where it may happen?

We are checking if dtbo is same as it was added and also expect user to
remove the nodes only iff the nodes aren't

used by any domain.

>
>> +        {
>> +            printk(XENLOG_ERR "Device %s is not present in the tree.
>> Removing nodes failed\n",
>> +                   full_dt_node_path[j]);
>> +            return -EINVAL;
>> +        }
>> +
>> +        domid = dt_device_used_by(overlay_node);
>> +
>> +        dt_dprintk("Checking if node %s is used by any domain\n",
>> +                   full_dt_node_path[j]);
>> +
>> +        /* Remove the node iff it's assigned to domain 0 or domain
>> io. */
>> +        if ( domid != 0 && domid != DOMID_IO )
>
> I think I asked before, but I have seen no answer on that. What will
> prevent the device to not be assigned after this check?

So, here for removal we assume that user removed all on-going ops on the
dtbo nodes which they wants to remove.

>
>
> Also, in general, I think it would be helpful if you answer on the ML
> questions. This would at least confirm that you have seen them and we
> agree on what to do.
Will keep this in mind and reply.
>
>> +        {
>> +            printk(XENLOG_ERR "Device %s as it is being used by
>> domain %d. Removing nodes failed\n",
>> +                   full_dt_node_path[j], domid);
>> +            return -EINVAL;
>> +        }
>> +
>> +        dt_dprintk("Removing node: %s\n", full_dt_node_path[j]);
>> +
>> +        nirq = node_num_irq[j];
>> +
>> +        /* Remove IRQ permission */
>> +        for ( i = 0; i < nirq; i++ )
>> +        {
>> +            rc = nodes_irq[j][i];
>> +            /*
>> +             * TODO: We don't handle shared IRQs for now. So, it is
>> assumed that
>> +             * the IRQs was not shared with another domain.
>> +             */
>
> This is not what I meant in v2. Interrupts cannot be shared between
> domain on Arm. However, interrupts can be shared between devices.
>
> This is the latter part that needs a TODO.
>
> In addition to that, as I wrote, an IRQ can be assigned to a *single*
> domain without the device been assigned to that domain. So I think this
> needs to be checked possibly by using the information stored in "desc"
> to know where the IRQ was routed to.
>
>> +            rc = irq_deny_access(d, rc);
>> +            if ( rc )
>> +            {
>> +                printk(XENLOG_ERR "unable to revoke access for irq
>> %u for %s\n",
>> +                       i, dt_node_full_name(overlay_node));
>> +                return rc;
>> +            }
>> +        }
>> +
>> +        rc = iommu_remove_dt_device(overlay_node);
>> +        if ( rc != 0 && rc != -ENXIO )
>> +            return rc;
>> +
>> +        naddr = dt_number_of_address(overlay_node);
>> +
>> +        /* Remove mmio access. */
>> +        for ( i = 0; i < naddr; i++ )
>> +        {
>> +            rc = dt_device_get_address(overlay_node, i, &addr, &size);
>> +            if ( rc )
>> +            {
>> +                printk(XENLOG_ERR "Unable to retrieve address %u for
>> %s\n",
>> +                       i, dt_node_full_name(overlay_node));
>> +                return rc;
>> +            }
>> +
>> +            rc = iomem_deny_access(d, paddr_to_pfn(addr),
>> +                                   paddr_to_pfn(PAGE_ALIGN(addr +
>> size - 1)));
>
> I think you missed my comment here. Similar to the IRQs, you are asking
> for trouble to parse the device-tree again. It would be better to store
> the information using a rangeset (see include/xen/rangeset.h).
>
> I also think the double array for the IRQs should be converted to a
> rangeset as this would simplify the code.
>
Keeping rangeset will work if we only parse one-level nodes. But if
there are descendant nodes, then its looking complicated to get info
using rangeset. While adding, we have to add parent first and then it's
descendant. But while remove, we will need to remove descendants first
and the parent node lastly.

I am unable to comeup with easy way to use rangeset to store information
in this way for add and remove.

> Furthemore, you are removing the permission but not the mapping in the
> P2M. Can you clarify why?
We are not actually mapping the nodes here.
>
>
>> +            if ( rc )
>> +            {
>> +                printk(XENLOG_ERR "Unable to remove dom%d access to"
>> +                        " 0x%"PRIx64" - 0x%"PRIx64"\n",
>> +                        d->domain_id,
>> +                        addr & PAGE_MASK, PAGE_ALIGN(addr + size) - 1);
>> +                return rc;
>> +            }
>> +        }
>> +
>> +        rc = dt_overlay_remove_node(overlay_node);
>> +        if ( rc )
>> +            return rc;
>> +    }
>> +
>> +    return rc;
>> +}
>
> [...]
>
>> + * overlay_node_track describes information about added nodes
>> through dtbo.
>> + * @entry: List pointer.
>> + * @dt_host_new: Pointer to the updated dt_host_new unflattened
>> 'updated fdt'.
>> + * @fdt: Stores the fdt.
>> + * @nodes_fullname: Stores the full name of nodes.
>> + * @nodes_irq: Stores the IRQ added from overlay dtb.
>> + * @node_num_irq: Stores num of IRQ for each node in overlay dtb.
>> + * @num_nodes: Stores total number of nodes in overlay dtb.
>> + */
>> +struct overlay_track {
>> +    struct list_head entry;
>> +    struct dt_device_node *dt_host_new;
>> +    void *fdt;
>> +    char **nodes_fullname;
>
> Looking at the code, the main use for the fullname are to check the FDT
> match and looking up in the DT.
>
> In order to check the DT, you could use memcmp() to confirm both the
> stored FDT and the one provided by the user match.
>
> For the lookup, you could avoid it by storing a pointer to the root of
> the new subtrees.
>
> Please let me know if you disagree with this approach.
>
If I understood correctly: just keeping the root of new overlay subtree
will not work for all case. It will work only if the nodes added are
adjacent to each other i.e. have the same parent then it will work as we
add all overlay nodes as the last child of their parent. But If two
subnodes have different parents, they may or may not be
nearby(node->allnext won't work) then we will issues removing the node
from this approach.

I did following small modification to your suggestion:
Keep FDT( do memcmp) for match and also keep address for all added nodes
at one-level( we can find children info if we know the top one-level
nodes. Please check overlay_node_count()). This will take 8bytes * num
of nodes in one-level space which is lot less space than keeping
nodes_fullname.


I will send v4 this week.

>> +    int **nodes_irq;
>> +    int *node_num_irq;
>> +    unsigned int num_nodes;
>> +};
>> +
>> +long dt_sysctl(struct xen_sysctl *op);
>> +#endif
>
> Cheers,
>
> --
> Julien Grall
Re: [XEN][RFC PATCH v3 10/14] xen/arm: Implement device tree node removal functionalities [ In reply to ]
On 07/12/2022 01:37, Vikram Garhwal wrote:
>> In your use-case, are you planning to ask the admin to reboot if you
>> can't remove a node?
> Yeah. What is error case where it may happen?

The code below have many possible failures. I would suggest to test by
throwing an error on the second node and check what happen if you try to
remove again.

>
> We are checking if dtbo is same as it was added and also expect user to
> remove the nodes only iff the nodes aren't
>
> used by any domain.
>
>>
>>> +        {
>>> +            printk(XENLOG_ERR "Device %s is not present in the tree.
>>> Removing nodes failed\n",
>>> +                   full_dt_node_path[j]);
>>> +            return -EINVAL;
>>> +        }
>>> +
>>> +        domid = dt_device_used_by(overlay_node);
>>> +
>>> +        dt_dprintk("Checking if node %s is used by any domain\n",
>>> +                   full_dt_node_path[j]);
>>> +
>>> +        /* Remove the node iff it's assigned to domain 0 or domain
>>> io. */
>>> +        if ( domid != 0 && domid != DOMID_IO )
>>
>> I think I asked before, but I have seen no answer on that. What will
>> prevent the device to not be assigned after this check?
>
> So, here for removal we assume that user removed all on-going ops on the
> dtbo nodes which they wants to remove.
Please don't make any assumption on what the user is doing even if it is
dom0. So the hypervisor code should be hardened.

>>> +        {
>>> +            printk(XENLOG_ERR "Device %s as it is being used by
>>> domain %d. Removing nodes failed\n",
>>> +                   full_dt_node_path[j], domid);
>>> +            return -EINVAL;
>>> +        }
>>> +
>>> +        dt_dprintk("Removing node: %s\n", full_dt_node_path[j]);
>>> +
>>> +        nirq = node_num_irq[j];
>>> +
>>> +        /* Remove IRQ permission */
>>> +        for ( i = 0; i < nirq; i++ )
>>> +        {
>>> +            rc = nodes_irq[j][i];
>>> +            /*
>>> +             * TODO: We don't handle shared IRQs for now. So, it is
>>> assumed that
>>> +             * the IRQs was not shared with another domain.
>>> +             */
>>
>> This is not what I meant in v2. Interrupts cannot be shared between
>> domain on Arm. However, interrupts can be shared between devices.
>>
>> This is the latter part that needs a TODO.
>>
>> In addition to that, as I wrote, an IRQ can be assigned to a *single*
>> domain without the device been assigned to that domain. So I think this
>> needs to be checked possibly by using the information stored in "desc"
>> to know where the IRQ was routed to.
>>
>>> +            rc = irq_deny_access(d, rc);
>>> +            if ( rc )
>>> +            {
>>> +                printk(XENLOG_ERR "unable to revoke access for irq
>>> %u for %s\n",
>>> +                       i, dt_node_full_name(overlay_node));
>>> +                return rc;
>>> +            }
>>> +        }
>>> +
>>> +        rc = iommu_remove_dt_device(overlay_node);
>>> +        if ( rc != 0 && rc != -ENXIO )
>>> +            return rc;
>>> +
>>> +        naddr = dt_number_of_address(overlay_node);
>>> +
>>> +        /* Remove mmio access. */
>>> +        for ( i = 0; i < naddr; i++ )
>>> +        {
>>> +            rc = dt_device_get_address(overlay_node, i, &addr, &size);
>>> +            if ( rc )
>>> +            {
>>> +                printk(XENLOG_ERR "Unable to retrieve address %u for
>>> %s\n",
>>> +                       i, dt_node_full_name(overlay_node));
>>> +                return rc;
>>> +            }
>>> +
>>> +            rc = iomem_deny_access(d, paddr_to_pfn(addr),
>>> +                                   paddr_to_pfn(PAGE_ALIGN(addr +
>>> size - 1)));
>>
>> I think you missed my comment here. Similar to the IRQs, you are asking
>> for trouble to parse the device-tree again. It would be better to store
>> the information using a rangeset (see include/xen/rangeset.h).
>>
>> I also think the double array for the IRQs should be converted to a
>> rangeset as this would simplify the code.
>>
> Keeping rangeset will work if we only parse one-level nodes. But if
> there are descendant nodes, then its looking complicated to get info
> using rangeset. While adding, we have to add parent first and then it's
> descendant. But while remove, we will need to remove descendants first
> and the parent node lastly.

I am not sure I understand the problem here. If you use a rangeset, then
you don't need to go through every node one by one to apply/revert the
permissions. You could simply apply/revert all the permission in one call.

>> Furthemore, you are removing the permission but not the mapping in the
>> P2M. Can you clarify why?
> We are not actually mapping the nodes here.
The remove function should be the inverse of the add function. AFAICT,
you are calling mapping the P2M (see call to map_range_to_domain()). So
this removal function *have* to remove the entry from the P2Ms.

If you disagree with that, then please explain who is going to remove
the P2M mappings.

±>>
>>
>>> +            if ( rc )
>>> +            {
>>> +                printk(XENLOG_ERR "Unable to remove dom%d access to"
>>> +                        " 0x%"PRIx64" - 0x%"PRIx64"\n",
>>> +                        d->domain_id,
>>> +                        addr & PAGE_MASK, PAGE_ALIGN(addr + size) - 1);
>>> +                return rc;
>>> +            }
>>> +        }
>>> +
>>> +        rc = dt_overlay_remove_node(overlay_node);
>>> +        if ( rc )
>>> +            return rc;
>>> +    }
>>> +
>>> +    return rc;
>>> +}
>>
>> [...]
>>
>>> + * overlay_node_track describes information about added nodes
>>> through dtbo.
>>> + * @entry: List pointer.
>>> + * @dt_host_new: Pointer to the updated dt_host_new unflattened
>>> 'updated fdt'.
>>> + * @fdt: Stores the fdt.
>>> + * @nodes_fullname: Stores the full name of nodes.
>>> + * @nodes_irq: Stores the IRQ added from overlay dtb.
>>> + * @node_num_irq: Stores num of IRQ for each node in overlay dtb.
>>> + * @num_nodes: Stores total number of nodes in overlay dtb.
>>> + */
>>> +struct overlay_track {
>>> +    struct list_head entry;
>>> +    struct dt_device_node *dt_host_new;
>>> +    void *fdt;
>>> +    char **nodes_fullname;
>>
>> Looking at the code, the main use for the fullname are to check the FDT
>> match and looking up in the DT.
>>
>> In order to check the DT, you could use memcmp() to confirm both the
>> stored FDT and the one provided by the user match.
>>
>> For the lookup, you could avoid it by storing a pointer to the root of
>> the new subtrees.
>>
>> Please let me know if you disagree with this approach.
>>
> If I understood correctly: just keeping the root of new overlay subtree
> will not work for all case. It will work only if the nodes added are
> adjacent to each other i.e. have the same parent then it will work as we
> add all overlay nodes as the last child of their parent. But If two
> subnodes have different parents, they may or may not be
> nearby(node->allnext won't work) then we will issues removing the node
> from this approach.

Thanks for the explanation.

>
> I did following small modification to your suggestion:
> Keep FDT( do memcmp) for match and also keep address for all added nodes
> at one-level( we can find children info if we know the top one-level
> nodes. Please check overlay_node_count()). This will take 8bytes * num
> of nodes in one-level space which is lot less space than keeping
> nodes_fullname.

This seems to match my thoughts after your explanation above. I will
have a look at the next version.

Cheers,

--
Julien Grall