[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [XEN PATCH v3] xen/arm: ffa: reclaim shared memory on guest destroy
Hi Jens, > On 5 Feb 2024, at 14:39, Jens Wiklander <jens.wiklander@xxxxxxxxxx> wrote: > > Hi Bertrand, > > On Thu, Feb 1, 2024 at 2:57 PM Bertrand Marquis > <Bertrand.Marquis@xxxxxxx> wrote: >> >> Hi Jens, >> >>> On 17 Jan 2024, at 12:06, Jens Wiklander <jens.wiklander@xxxxxxxxxx> wrote: >>> >>> When an FF-A enabled guest is destroyed it may leave behind memory >>> shared with SPs. This memory must be reclaimed before it's reused or an >>> SP may make changes to memory used by a new unrelated guest. So when the >>> domain is teared down add FF-A requests to reclaim all remaining shared >>> memory. >>> >>> SPs in the secure world are notified using VM_DESTROYED that a guest has >>> been destroyed. An SP is supposed to relinquish all shared memory to allow >>> reclaiming the memory. The relinquish operation may need to be delayed if >>> the shared memory is for instance part of a DMA operation. >>> >>> The domain reference counter is increased when the first FF-A shared >>> memory is registered and the counter is decreased again when the last >>> shared memory is reclaimed. If FF-A shared memory registrations remain >>> at the end of of ffa_domain_teardown() a timer is set to try to reclaim >>> the shared memory every second until the memory is reclaimed. >>> >>> A few minor style fixes with a removed empty line here and an added new >>> line there. >>> >>> Signed-off-by: Jens Wiklander <jens.wiklander@xxxxxxxxxx> >>> --- >>> >>> v3: >>> - Mentioning in the commit message that there are some style fixes >>> - Addressing review comments >>> - Refactor the ffa_domain_teardown() path to let >>> ffa_domain_teardown_continue() do most of the work. >>> >>> v2: >>> - Update commit message to match the new implementation >>> - Using a per domain bitfield to keep track of which SPs has been notified >>> with VM_DESTROYED >>> - Holding a domain reference counter to keep the domain as a zombie domain >>> while there still is shared memory registrations remaining to be reclaimed >>> - Using a timer to retry reclaiming remaining shared memory registrations >>> --- >>> xen/arch/arm/tee/ffa.c | 253 +++++++++++++++++++++++++++++++++-------- >>> 1 file changed, 204 insertions(+), 49 deletions(-) >>> >>> diff --git a/xen/arch/arm/tee/ffa.c b/xen/arch/arm/tee/ffa.c >>> index 0793c1c7585d..80ebbf4f01c6 100644 >>> --- a/xen/arch/arm/tee/ffa.c >>> +++ b/xen/arch/arm/tee/ffa.c >>> @@ -54,6 +54,7 @@ >>> #include <xen/mm.h> >>> #include <xen/sched.h> >>> #include <xen/sizes.h> >>> +#include <xen/timer.h> >>> #include <xen/types.h> >>> >>> #include <asm/event.h> >>> @@ -144,6 +145,12 @@ >>> */ >>> #define FFA_MAX_SHM_COUNT 32 >>> >>> +/* >>> + * The time we wait until trying to tear down a domain again if it was >>> + * blocked initially. >>> + */ >>> +#define FFA_CTX_TEARDOWN_DELAY SECONDS(1) >>> + >>> /* FF-A-1.1-REL0 section 10.9.2 Memory region handle, page 167 */ >>> #define FFA_HANDLE_HYP_FLAG BIT(63, ULL) >>> #define FFA_HANDLE_INVALID 0xffffffffffffffffULL >>> @@ -384,11 +391,6 @@ struct ffa_ctx { >>> unsigned int page_count; >>> /* FF-A version used by the guest */ >>> uint32_t guest_vers; >>> - /* >>> - * Number of SPs that we have sent a VM created signal to, used in >>> - * ffa_domain_teardown() to know which SPs need to be signalled. >>> - */ >>> - uint16_t create_signal_count; >>> bool rx_is_free; >>> /* Used shared memory objects, struct ffa_shm_mem */ >>> struct list_head shm_list; >>> @@ -402,6 +404,15 @@ struct ffa_ctx { >>> spinlock_t tx_lock; >>> spinlock_t rx_lock; >>> spinlock_t lock; >>> + /* Used if domain can't be torn down immediately */ >>> + struct domain *teardown_d; >>> + struct list_head teardown_list; >>> + s_time_t teardown_expire; >>> + /* >>> + * Used for ffa_domain_teardown() to keep track of which SPs should be >>> + * notified that this guest is being destroyed. >>> + */ >>> + unsigned long vm_destroy_bitmap[]; >>> }; >>> >>> struct ffa_shm_mem { >>> @@ -436,6 +447,12 @@ static void *ffa_tx __read_mostly; >>> static DEFINE_SPINLOCK(ffa_rx_buffer_lock); >>> static DEFINE_SPINLOCK(ffa_tx_buffer_lock); >>> >>> + >>> +/* Used to track domains that could not be torn down immediately. */ >>> +static struct timer ffa_teardown_timer; >>> +static struct list_head ffa_teardown_head; >>> +static DEFINE_SPINLOCK(ffa_teardown_lock); >>> + >>> static bool ffa_get_version(uint32_t *vers) >>> { >>> const struct arm_smccc_1_2_regs arg = { >>> @@ -853,7 +870,6 @@ static int32_t handle_partition_info_get(uint32_t w1, >>> uint32_t w2, uint32_t w3, >>> goto out_rx_release; >>> } >>> >>> - >>> memcpy(ctx->rx, ffa_rx, sz); >>> } >>> ctx->rx_is_free = false; >>> @@ -992,53 +1008,75 @@ static void put_shm_pages(struct ffa_shm_mem *shm) >>> } >>> } >>> >>> -static bool inc_ctx_shm_count(struct ffa_ctx *ctx) >>> +static bool inc_ctx_shm_count(struct domain *d, struct ffa_ctx *ctx) >>> { >>> bool ret = true; >>> >>> spin_lock(&ctx->lock); >>> + >>> + /* >>> + * If this is the first shm added, increase the domain reference >>> + * counter as we need to keep domain around a bit longer to reclaim the >>> + * shared memory in the teardown path. >>> + */ >>> + if ( !ctx->shm_count ) >>> + get_knownalive_domain(d); >>> + >>> if (ctx->shm_count >= FFA_MAX_SHM_COUNT) >>> ret = false; >>> else >>> ctx->shm_count++; >>> + >>> spin_unlock(&ctx->lock); >>> >>> return ret; >>> } >>> >>> -static void dec_ctx_shm_count(struct ffa_ctx *ctx) >>> +static void dec_ctx_shm_count(struct domain *d, struct ffa_ctx *ctx) >>> { >>> spin_lock(&ctx->lock); >>> + >>> ASSERT(ctx->shm_count > 0); >>> ctx->shm_count--; >>> + >>> + /* >>> + * If this was the last shm removed, let go of the domain reference we >>> + * took in inc_ctx_shm_count() above. >>> + */ >>> + if ( !ctx->shm_count ) >>> + put_domain(d); >>> + >>> spin_unlock(&ctx->lock); >>> } >>> >>> -static struct ffa_shm_mem *alloc_ffa_shm_mem(struct ffa_ctx *ctx, >>> +static struct ffa_shm_mem *alloc_ffa_shm_mem(struct domain *d, >>> unsigned int page_count) >>> { >>> + struct ffa_ctx *ctx = d->arch.tee; >>> struct ffa_shm_mem *shm; >>> >>> if ( page_count >= FFA_MAX_SHM_PAGE_COUNT ) >>> return NULL; >>> - if ( !inc_ctx_shm_count(ctx) ) >>> + if ( !inc_ctx_shm_count(d, ctx) ) >>> return NULL; >>> >>> shm = xzalloc_flex_struct(struct ffa_shm_mem, pages, page_count); >>> if ( shm ) >>> shm->page_count = page_count; >>> else >>> - dec_ctx_shm_count(ctx); >>> + dec_ctx_shm_count(d, ctx); >>> >>> return shm; >>> } >>> >>> -static void free_ffa_shm_mem(struct ffa_ctx *ctx, struct ffa_shm_mem *shm) >>> +static void free_ffa_shm_mem(struct domain *d, struct ffa_shm_mem *shm) >>> { >>> + struct ffa_ctx *ctx = d->arch.tee; >>> + >>> if ( !shm ) >>> return; >>> >>> - dec_ctx_shm_count(ctx); >>> + dec_ctx_shm_count(d, ctx); >>> put_shm_pages(shm); >>> xfree(shm); >>> } >>> @@ -1306,7 +1344,7 @@ static void handle_mem_share(struct cpu_user_regs >>> *regs) >>> goto out_unlock; >>> } >>> >>> - shm = alloc_ffa_shm_mem(ctx, page_count); >>> + shm = alloc_ffa_shm_mem(d, page_count); >>> if ( !shm ) >>> { >>> ret = FFA_RET_NO_MEMORY; >>> @@ -1350,7 +1388,7 @@ static void handle_mem_share(struct cpu_user_regs >>> *regs) >>> >>> out: >>> if ( ret ) >>> - free_ffa_shm_mem(ctx, shm); >>> + free_ffa_shm_mem(d, shm); >>> out_unlock: >>> spin_unlock(&ctx->tx_lock); >>> >>> @@ -1401,7 +1439,7 @@ static int handle_mem_reclaim(uint64_t handle, >>> uint32_t flags) >>> } >>> else >>> { >>> - free_ffa_shm_mem(ctx, shm); >>> + free_ffa_shm_mem(d, shm); >>> } >>> >>> return ret; >>> @@ -1486,6 +1524,41 @@ static bool ffa_handle_call(struct cpu_user_regs >>> *regs) >>> } >>> } >>> >>> +static bool is_in_subscr_list(const uint16_t *subscr, uint16_t start, >>> + uint16_t end, uint16_t sp_id) >>> +{ >>> + unsigned int n; >>> + >>> + for ( n = start; n < end; n++ ) >>> + { >>> + if ( subscr[n] == sp_id ) >>> + return true; >>> + } >>> + >>> + return false; >>> +} >>> + >>> +static void vm_destroy_bitmap_init(struct ffa_ctx *ctx, >>> + unsigned int create_signal_count) >>> +{ >>> + unsigned int n; >>> + >>> + for ( n = 0; n < subscr_vm_destroyed_count; n++ ) >>> + { >>> + /* >>> + * Skip SPs subscribed to the VM created event that never was >>> + * notified of the VM creation due to an error during >>> + * ffa_domain_init(). >>> + */ >>> + if ( is_in_subscr_list(subscr_vm_created, create_signal_count, >>> + subscr_vm_created_count, >>> + subscr_vm_destroyed[n]) ) >>> + continue; >>> + >>> + set_bit(n, ctx->vm_destroy_bitmap); >>> + } >>> +} >>> + >>> static int ffa_domain_init(struct domain *d) >>> { >>> struct ffa_ctx *ctx; >>> @@ -1501,11 +1574,14 @@ static int ffa_domain_init(struct domain *d) >>> if ( d->domain_id >= UINT16_MAX) >>> return -ERANGE; >>> >>> - ctx = xzalloc(struct ffa_ctx); >>> + ctx = xzalloc_flex_struct(struct ffa_ctx, vm_destroy_bitmap, >>> + BITS_TO_LONGS(subscr_vm_destroyed_count)); >>> if ( !ctx ) >>> return -ENOMEM; >>> >>> d->arch.tee = ctx; >>> + ctx->teardown_d = d; >>> + INIT_LIST_HEAD(&ctx->shm_list); >>> >>> for ( n = 0; n < subscr_vm_created_count; n++ ) >>> { >>> @@ -1515,65 +1591,141 @@ static int ffa_domain_init(struct domain *d) >>> { >>> printk(XENLOG_ERR "ffa: Failed to report creation of vm_id %u to >>> %u: res %d\n", >>> get_vm_id(d), subscr_vm_created[n], res); >>> - ctx->create_signal_count = n; >>> - return -EIO; >>> + break; >>> } >>> } >>> - ctx->create_signal_count = subscr_vm_created_count; >>> - >>> - INIT_LIST_HEAD(&ctx->shm_list); >>> + vm_destroy_bitmap_init(ctx, n); >>> + if ( n != subscr_vm_created_count ) >>> + return -EIO; >>> >>> return 0; >>> } >>> >>> -static bool is_in_subscr_list(const uint16_t *subscr, uint16_t start, >>> - uint16_t end, uint16_t sp_id) >>> +static void send_vm_destroyed(struct domain *d) >>> { >>> + struct ffa_ctx *ctx = d->arch.tee; >>> unsigned int n; >>> + int32_t res; >>> >>> - for ( n = start; n < end; n++ ) >>> + for ( n = 0; n < subscr_vm_destroyed_count; n++ ) >>> { >>> - if ( subscr[n] == sp_id ) >>> - return true; >>> - } >>> + if ( !test_bit(n, ctx->vm_destroy_bitmap) ) >>> + continue; >>> >>> - return false; >>> + res = ffa_direct_req_send_vm(subscr_vm_destroyed[n], get_vm_id(d), >>> + FFA_MSG_SEND_VM_DESTROYED); >>> + >> >> I think here we should dinstinguish cases depending on the error received: >> - retry: we should keep the bit set and retry later >> - other: unset the bit as there is not much we can do but log it. > > Makes sense. > >> >> The interrupted case can happen, we handle that already in the direct message >> request but we will get back here if that does not work after some retries so >> putting it in the other category is ok i think. > > In the case of FFA_MSG_SEND_VM_DESTROYED is think it makes more sense > to also retry FFA_RET_INTERRUPTED later. To me it seems that there > will be an even better chance for an eventual non-secure interrupt to > be handled if wait a little before retrying. So, if you don't mind, > I'll skip clearing the bit below for both FFA_RET_INTERRUPTED and > FFA_RET_RETRY. I agree. Cheers Bertrand > >> >>> + if ( res ) >>> + { >>> + printk(XENLOG_ERR "%pd: ffa: Failed to report destruction of >>> vm_id %u to %u: res %d\n", >>> + d, get_vm_id(d), subscr_vm_destroyed[n], res); >>> + } >>> + else >>> + { >>> + clear_bit(n, ctx->vm_destroy_bitmap); >>> + } >>> + } >>> } >>> >>> -/* This function is supposed to undo what ffa_domain_init() has done */ >>> -static int ffa_domain_teardown(struct domain *d) >>> +static void reclaim_shms(struct domain *d) >>> { >>> struct ffa_ctx *ctx = d->arch.tee; >>> - unsigned int n; >>> + struct ffa_shm_mem *shm, *tmp; >>> int32_t res; >>> >>> - if ( !ctx ) >>> - return 0; >>> + list_for_each_entry_safe(shm, tmp, &ctx->shm_list, list) >>> + { >>> + register_t handle_hi; >>> + register_t handle_lo; >>> >>> - for ( n = 0; n < subscr_vm_destroyed_count; n++ ) >>> + uint64_to_regpair(&handle_hi, &handle_lo, shm->handle); >>> + res = ffa_mem_reclaim(handle_lo, handle_hi, 0); >> >> Same here as for the VM_DESTROYED, there are some error code that we should >> not reiterate on as they will also fail next time: >> - invalid parameters: stop and log an error, this will always fail >> - no memory: for now I would say do the same as invalid parameter with a >> comment >> maybe because we could retry and memory coud be available next time >> - abort: stop and log an error, this is a major fault on the other side >> - denied: retry. > > I agree, I'll propose something like that in the v4. > >> >>> + if ( res ) >>> + { >>> + printk(XENLOG_G_INFO "%pd: ffa: Failed to reclaim handle %#lx >>> : %d\n", >>> + d, shm->handle, res); >>> + } >>> + else >>> + { >>> + printk(XENLOG_G_DEBUG "%pd: ffa: Reclaimed handle %#lx\n", >>> + d, shm->handle); >>> + list_del(&shm->list); >>> + free_ffa_shm_mem(d, shm); >>> + } >>> + } >>> +} >>> + >>> +static void ffa_domain_teardown_continue(struct ffa_ctx *ctx, bool >>> first_time) >>> +{ >>> + struct ffa_ctx *next_ctx = NULL; >>> + >>> + send_vm_destroyed(ctx->teardown_d); >>> + reclaim_shms(ctx->teardown_d); >>> + >>> + if ( ctx->shm_count ) >>> + { >> >> If i do understand right you only retry if there are remaining SHM to >> reclaim but what >> if you have destroy messages that were not sent successfully, shouldn't we >> reiterate >> in that case to send the destroy message again ? > > I agree, I'll fix. > > Cheers, > Jens > >> >>> + printk(XENLOG_G_INFO "%pd: ffa: Remaining unclaimed handles, >>> retrying\n", ctx->teardown_d); >>> + >>> + ctx->teardown_expire = NOW() + FFA_CTX_TEARDOWN_DELAY; >>> + >>> + spin_lock(&ffa_teardown_lock); >>> + list_add_tail(&ctx->teardown_list, &ffa_teardown_head); >>> + /* Need to set a new timer for the next ctx in line */ >>> + next_ctx = list_first_entry(&ffa_teardown_head, struct ffa_ctx, >>> + teardown_list); >>> + spin_unlock(&ffa_teardown_lock); >>> + } >>> + else >>> { >>> /* >>> - * Skip SPs subscribed to the VM created event that never was >>> - * notified of the VM creation due to an error during >>> - * ffa_domain_init(). >>> + * domain_destroy() might have been called (via put_domain() in >>> + * reclaim_shms()), so we can't touch the domain structure anymore. >>> */ >>> - if ( is_in_subscr_list(subscr_vm_created, ctx->create_signal_count, >>> - subscr_vm_created_count, >>> - subscr_vm_destroyed[n]) ) >>> - continue; >>> + xfree(ctx); >>> >>> - res = ffa_direct_req_send_vm(subscr_vm_destroyed[n], get_vm_id(d), >>> - FFA_MSG_SEND_VM_DESTROYED); >>> - >>> - if ( res ) >>> - printk(XENLOG_ERR "ffa: Failed to report destruction of vm_id >>> %u to %u: res %d\n", >>> - get_vm_id(d), subscr_vm_destroyed[n], res); >>> + /* Only check if there has been a change to the teardown queue */ >>> + if ( !first_time ) >>> + { >>> + spin_lock(&ffa_teardown_lock); >>> + next_ctx = list_first_entry_or_null(&ffa_teardown_head, >>> + struct ffa_ctx, >>> teardown_list); >>> + spin_unlock(&ffa_teardown_lock); >>> + } >>> } >>> >>> + if ( next_ctx ) >>> + set_timer(&ffa_teardown_timer, next_ctx->teardown_expire); >>> +} >>> + >>> +static void ffa_teardown_timer_callback(void *arg) >>> +{ >>> + struct ffa_ctx *ctx; >>> + >>> + spin_lock(&ffa_teardown_lock); >>> + ctx = list_first_entry_or_null(&ffa_teardown_head, struct ffa_ctx, >>> + teardown_list); >>> + if ( ctx ) >>> + list_del(&ctx->teardown_list); >>> + spin_unlock(&ffa_teardown_lock); >>> + >>> + if ( ctx ) >>> + ffa_domain_teardown_continue(ctx, false /* !first_time */); >>> + else >>> + printk(XENLOG_G_ERR "%s: teardown list is empty\n", __func__); >>> +} >>> + >>> +/* This function is supposed to undo what ffa_domain_init() has done */ >>> +static int ffa_domain_teardown(struct domain *d) >>> +{ >>> + struct ffa_ctx *ctx = d->arch.tee; >>> + >>> + if ( !ctx ) >>> + return 0; >>> + >>> if ( ctx->rx ) >>> rxtx_unmap(ctx); >>> >>> - XFREE(d->arch.tee); >>> + ffa_domain_teardown_continue(ctx, true /* first_time */); >>> >>> return 0; >>> } >>> @@ -1739,6 +1891,9 @@ static bool ffa_probe(void) >>> if ( !init_sps() ) >>> goto err_free_ffa_tx; >>> >>> + INIT_LIST_HEAD(&ffa_teardown_head); >>> + init_timer(&ffa_teardown_timer, ffa_teardown_timer_callback, NULL, 0); >>> + >>> return true; >>> >>> err_free_ffa_tx: >>> -- >>> 2.34.1 >>> >> >> Regards >> Bertrand
|
Lists.xenproject.org is hosted with RackSpace, monitoring our |