|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v3 06/11] optee: add std call handling
Hello Julien,
Julien Grall writes:
> Hi Volodymyr,
>
> On 18/12/2018 21:11, Volodymyr Babchuk wrote:
>> From: Volodymyr Babchuk <vlad.babchuk@xxxxxxxxx>
>>
>> The main way to communicate with OP-TEE is to issue standard SMCCC
>> call. "Standard" is a SMCCC term and it means that call can be
>> interrupted and OP-TEE can return control to NW before completing
>> the call.
>>
>> In contrast with fast calls, where arguments and return values
>> are passed in registers, standard calls use shared memory. Register
>> pair a1,a2 holds 64-bit PA of command buffer, where all arguments
>> are stored and which is used to return data. OP-TEE internally
>> copies contents of this buffer into own secure memory before accessing
>> and validating any data in command buffer. This is done to make sure
>> that NW will not change contents of the validated parameters.
>>
>> Mediator needs to do the same for number of reasons:
>>
>> 1. To make sure that guest will not change data after validation.
>> 2. To translate IPAs to PAs in the command buffer (this is not done
>> in this patch).
>> 3. To hide translated address from guest, so it will not be able
>> to do IPA->PA translation by misusing mediator.
>>
>> During standard call OP-TEE can issue multiple "RPC returns", asking
>> NW to do some work for OP-TEE. NW then issues special call
>> OPTEE_SMC_CALL_RETURN_FROM_RPC to resume handling of the original call.
>> Thus, mediator needs to maintain context for original standard call
>> during multiple SMCCC calls.
>>
>> Standard call is considered complete, when returned value is
>> not a RPC request.
>>
>> Signed-off-by: Volodymyr Babchuk <vlad.babchuk@xxxxxxxxx>
>> ---
>>
>> Changes from v2:
>> - renamed struct domain_ctx to struct optee_domain
>> - fixed coding style
>> - Now I use access_guest_memory_by_ipa() instead of mappings
>> to read command buffer
>> - Added tracking for in flight calls, so guest can't resume
>> the same call from two CPUs simultaniously
>>
>> xen/arch/arm/tee/optee.c | 319 ++++++++++++++++++++++++++++++++++-
>> xen/include/asm-arm/domain.h | 3 +
>> 2 files changed, 320 insertions(+), 2 deletions(-)
>>
>> diff --git a/xen/arch/arm/tee/optee.c b/xen/arch/arm/tee/optee.c
>> index 584241b03a..dc90e2ed8e 100644
>> --- a/xen/arch/arm/tee/optee.c
>> +++ b/xen/arch/arm/tee/optee.c
>> @@ -12,6 +12,8 @@
>> */
>> #include <xen/device_tree.h>
>> +#include <xen/domain_page.h>
>> +#include <xen/guest_access.h>
>> #include <xen/sched.h>
>> #include <asm/smccc.h>
>> #include <asm/tee/tee.h>
>> @@ -22,11 +24,38 @@
>> /* Client ID 0 is reserved for hypervisor itself */
>> #define OPTEE_CLIENT_ID(domain) (domain->domain_id + 1)
>> +/*
>> + * Maximal number of concurrent standard calls from one guest. This
>> + * corresponds to OPTEE configuration option CFG_NUM_THREADS, because
>> + * OP-TEE spawns a thread for every standard call.
>
> Looking at OP-TEE, CFG_NUM_THREADS will vary depending on the
> platform. Is there any way to probe that number of threads from Xen?
Yes, this is per-platform configuration.
Easiest way is to add Fast SMC to OP-TEE that will report this
parameter.
Jens, what do you think about adding additional call?
> In any case, I think we should update the comment to reflect that this
> seems to be the maximum CFG_NUM_THREADS supported by any upstream
> platform.
Actually, OP-TEE protocol have possibility to handle limited number of
threads correctly. OP-TEE can report that all threads are busy and
client will wait for a free one. XEN can do the same, although this is not
implemented in this patch series. But I can add this.
Basically this means that all can work correctly even with
MAX_STD_CALLS==1. It just will be not so efficient.
>> + */
>> +#define MAX_STD_CALLS 16
>> +
>> #define OPTEE_KNOWN_NSEC_CAPS OPTEE_SMC_NSEC_CAP_UNIPROCESSOR
>> #define OPTEE_KNOWN_SEC_CAPS (OPTEE_SMC_SEC_CAP_HAVE_RESERVED_SHM | \
>> OPTEE_SMC_SEC_CAP_UNREGISTERED_SHM | \
>> OPTEE_SMC_SEC_CAP_DYNAMIC_SHM)
>> +/*
>> + * Call context. OP-TEE can issue multiple RPC returns during one call.
>> + * We need to preserve context during them.
>> + */
>> +struct optee_std_call {
>> + struct list_head list;
>> + struct optee_msg_arg *xen_arg;
>> + paddr_t guest_arg_ipa;
>> + int optee_thread_id;
>> + int rpc_op;
>> + bool in_flight;
>> +};
>> +
>> +/* Domain context */
>> +struct optee_domain {
>> + struct list_head call_list;
>> + atomic_t call_count;
>> + spinlock_t lock;
>> +};
>> +
>> static bool optee_probe(void)
>> {
>> struct dt_device_node *node;
>> @@ -52,6 +81,11 @@ static bool optee_probe(void)
>> static int optee_enable(struct domain *d)
>> {
>> struct arm_smccc_res resp;
>> + struct optee_domain *ctx;
>> +
>> + ctx = xzalloc(struct optee_domain);
>> + if ( !ctx )
>> + return -ENOMEM;
>> /*
>> * Inform OP-TEE about a new guest.
>> @@ -69,9 +103,16 @@ static int optee_enable(struct domain *d)
>> {
>> gprintk(XENLOG_WARNING, "Unable to create OPTEE client: rc =
>> 0x%X\n",
>> (uint32_t)resp.a0);
>> + xfree(ctx);
>> return -ENODEV;
>> }
>> + INIT_LIST_HEAD(&ctx->call_list);
>> + atomic_set(&ctx->call_count, 0);
>> + spin_lock_init(&ctx->lock);
>> +
>> + d->arch.tee = ctx;
>> +
>> return 0;
>> }
>> @@ -111,9 +152,86 @@ static void set_return(struct cpu_user_regs
>> *regs, uint32_t ret)
>> set_user_reg(regs, 7, 0);
>> }
>> +static struct optee_std_call *allocate_std_call(struct
>> optee_domain *ctx)
>> +{
>> + struct optee_std_call *call;
>> + int count;
>> +
>> + /* Make sure that guest does not execute more than MAX_STD_CALLS */
>> + count = atomic_add_unless(&ctx->call_count, 1, MAX_STD_CALLS);
>> + if ( count == MAX_STD_CALLS )
>> + return NULL;
>> +
>> + call = xzalloc(struct optee_std_call);
>> + if ( !call )
>> + {
>> + atomic_dec(&ctx->call_count);
>> + return NULL;
>> + }
>> +
>> + call->optee_thread_id = -1;
>> + call->in_flight = true;
>> +
>> + spin_lock(&ctx->lock);
>> + list_add_tail(&call->list, &ctx->call_list);
>> + spin_unlock(&ctx->lock);
>> +
>> + return call;
>> +}
>> +
>> +static void free_std_call(struct optee_domain *ctx,
>> + struct optee_std_call *call)
>> +{
>> + atomic_dec(&ctx->call_count);
>> +
>> + spin_lock(&ctx->lock);
>> + list_del(&call->list);
>> + spin_unlock(&ctx->lock);
>> +
>> + ASSERT(!call->in_flight);
>> + xfree(call->xen_arg);
>> + xfree(call);
>> +}
>> +
>> +static struct optee_std_call *get_std_call(struct optee_domain *ctx,
>> + int thread_id)
>> +{
>> + struct optee_std_call *call;
>> +
>> + spin_lock(&ctx->lock);
>> + list_for_each_entry( call, &ctx->call_list, list )
>> + {
>> + if ( call->optee_thread_id == thread_id )
>> + {
>> + if ( call->in_flight )
>> + {
>> + gprintk(XENLOG_WARNING, "Guest tries to execute call which
>> is already in flight");
>> + goto out;
>> + }
>> + call->in_flight = true;
>> + spin_unlock(&ctx->lock);
>> + return call;
>> + }
>> + }
>> +out:
>> + spin_unlock(&ctx->lock);
>> +
>> + return NULL;
>> +}
>> +
>> +static void put_std_call(struct optee_domain *ctx, struct optee_std_call
>> *call)
>> +{
>> + spin_lock(&ctx->lock);
>> + ASSERT(call->in_flight);
>> + call->in_flight = false;
>> + spin_unlock(&ctx->lock);
>> +}
>> +
>> static void optee_domain_destroy(struct domain *d)
>> {
>> struct arm_smccc_res resp;
>> + struct optee_std_call *call, *call_tmp;
>> + struct optee_domain *ctx = d->arch.tee;
>>
>> /* At this time all domain VCPUs should be stopped */
>> @@ -124,6 +242,199 @@ static void optee_domain_destroy(struct
>> domain *d)
>> */
>> arm_smccc_smc(OPTEE_SMC_VM_DESTROYED, OPTEE_CLIENT_ID(d), 0, 0, 0, 0,
>> 0, 0,
>> &resp);
>
> This function can be called without enable and should be
> idempotent. So I woudld check d->arch.tee before and...
Nothing bad will happen if it called without call to enable.
But yes, I need to check for ctx != NULL.
>> + ASSERT(!spin_is_locked(&ctx->lock));
>> +
>> + list_for_each_entry_safe( call, call_tmp, &ctx->call_list, list )
>> + free_std_call(ctx, call);
>> +
>> + ASSERT(!atomic_read(&ctx->call_count));
>> +
>> + xfree(d->arch.tee);
>
> use XFREE here.
Oh, looks like I missed this macro introduction. Thank you for pointing
out.
>> +}
>> +
>> +/*
>> + * Copy command buffer into xen memory to:
>> + * 1) Hide translated addresses from guest
>> + * 2) Make sure that guest wouldn't change data in command buffer during
>> call
>> + */
>> +static bool copy_std_request(struct cpu_user_regs *regs,
>> + struct optee_std_call *call)
>> +{
>> + paddr_t xen_addr;
>> +
>> + call->guest_arg_ipa = (paddr_t)get_user_reg(regs, 1) << 32 |
>> + get_user_reg(regs, 2);
>
> NIT: The indentation looks weird here.
>
>> +
>> + /*
>> + * Command buffer should start at page boundary.
>> + * This is OP-TEE ABI requirement.
>> + */
>> + if ( call->guest_arg_ipa & (OPTEE_MSG_NONCONTIG_PAGE_SIZE - 1) )
>> + return false;
>> +
>> + call->xen_arg = _xmalloc(OPTEE_MSG_NONCONTIG_PAGE_SIZE,
>> + OPTEE_MSG_NONCONTIG_PAGE_SIZE);
>> + if ( !call->xen_arg )
>> + return false;
>> +
>> + BUILD_BUG_ON(OPTEE_MSG_NONCONTIG_PAGE_SIZE > PAGE_SIZE);
>
> As you use _xmalloc, you should not need this. This is only necessary
> if you use alloc_xenheap_page.
>
Yes, right. This is leftover from time when I mapped guest page into
XEN. I'll move it down, to place where I map that page.
> I am wondering whether it is wise to allocate the memory from xenheap
> and not domheap. While on Arm64 (for now) xenheap and domheap are the
> same, on Arm32 they are different. The xenheap is at most 1GB, so
> pretty limited.
Honestly, I have no opinion there. What are limitations of domheap in
this case?
> Furthermore, using domheap would have the advantage to allow in the
> future accounting the allocation to the guest and add more safety
> (there are discussion to make domheap per domain).
>
>> +
>> + access_guest_memory_by_ipa(current->domain, call->guest_arg_ipa,
>> + call->xen_arg, OPTEE_MSG_NONCONTIG_PAGE_SIZE,
>> + false);
>
> You need to check the return of access_guest_memory_by_ipa as this
> function can fail.
Will do.
>> +
>> + xen_addr = virt_to_maddr(call->xen_arg);
>> +
>> + set_user_reg(regs, 1, xen_addr >> 32);
>> + set_user_reg(regs, 2, xen_addr & 0xFFFFFFFF);
>> +
>> + return true;
>> +}
>> +
>> +static void copy_std_request_back(struct optee_domain *ctx,
>> + struct cpu_user_regs *regs,
>> + struct optee_std_call *call)
>
> Can you add a comment on top of the function explaining what it does?
Yes, sure.
"Copy OP-TEE response back into guest's request buffer" will be sufficient?
>> +{
>> + struct optee_msg_arg *guest_arg;
>> + struct page_info *page;
>> + unsigned int i;
>> + uint32_t attr;
>> +
>> + /* copy_std_request() validated IPA for us */
>
> Not really, the guest is free to modify the stage-2 mapping on another
> vCPU while this is happening. I agree that the guest will shoot
> himself, but we at least need to not have weird behavior happening.
>
> In that case, I would check that the type is p2m_ram_rw as you don't
> want to write in read-only or foreign mapping.
How I can do this atomically? I.e. what if guest will mangle p2m right
after the check?
> Also, as copy_std_request() and copy_std_request_back may not be
> called in the same "thread" it would be useful if you specify a bit
> more the interaction.
I not sure what do you mean there.
>
>> + page = get_page_from_gfn(current->domain,
>> paddr_to_pfn(call->guest_arg_ipa),
>
> Please use gfn_x(gaddr_to_gfn(...)) to clarify this is a gfn. The
> gfn_x will be unnecessary soon with a cleanup that is currently under
> review.
So there are chances, that it will be removed when time will come to
merge this patch series? :)
>
>> + NULL, P2M_ALLOC);
>> + if ( !page )
>> + return;
>> +
>> + guest_arg = map_domain_page(page_to_mfn(page));
>
> So here you assume that PAGE_SIZE ==
> OPTEE_MSG_NONCONTIG_PAGE_SIZE. Can you add a BUILD_BUG_ON just above
> (with a comment) so we don't get some nasty surprise with 64K support.
Yes, sure.
> Also, you should be able to use __map_domain_page(page) here.
Oh, thank you for pointing to this macro. Not the best descriptive name,
I must say.
>
>> +
>> + guest_arg->ret = call->xen_arg->ret;
>> + guest_arg->ret_origin = call->xen_arg->ret_origin;
>> + guest_arg->session = call->xen_arg->session;
>
> NIT: newline here please.
>
>> + for ( i = 0; i < call->xen_arg->num_params; i++ )
>> + {
>> + attr = call->xen_arg->params[i].attr;
>> +
>> + switch ( attr & OPTEE_MSG_ATTR_TYPE_MASK )
>> + {
>> + case OPTEE_MSG_ATTR_TYPE_TMEM_OUTPUT:
>> + case OPTEE_MSG_ATTR_TYPE_TMEM_INOUT:
>> + guest_arg->params[i].u.tmem.size =
>> + call->xen_arg->params[i].u.tmem.size;
>> + continue;
>> + case OPTEE_MSG_ATTR_TYPE_VALUE_OUTPUT:
>> + case OPTEE_MSG_ATTR_TYPE_VALUE_INOUT:
>> + guest_arg->params[i].u.value.a =
>> + call->xen_arg->params[i].u.value.a;
>> + guest_arg->params[i].u.value.b =
>> + call->xen_arg->params[i].u.value.b;
>> + continue;
>> + case OPTEE_MSG_ATTR_TYPE_RMEM_OUTPUT:
>> + case OPTEE_MSG_ATTR_TYPE_RMEM_INOUT:
>> + guest_arg->params[i].u.rmem.size =
>> + call->xen_arg->params[i].u.rmem.size;
>> + continue;
>> + case OPTEE_MSG_ATTR_TYPE_NONE:
>> + case OPTEE_MSG_ATTR_TYPE_RMEM_INPUT:
>> + case OPTEE_MSG_ATTR_TYPE_TMEM_INPUT:
>> + continue;
>> + }
>> + }
>> +
>> + unmap_domain_page(guest_arg);
>> + put_page(page);
>> +}
>> +
>> +static void execute_std_call(struct optee_domain *ctx,
>> + struct cpu_user_regs *regs,
>> + struct optee_std_call *call)
>> +{
>> + register_t optee_ret;
>> +
>> + forward_call(regs);
>> +
>> + optee_ret = get_user_reg(regs, 0);
>> + if ( OPTEE_SMC_RETURN_IS_RPC(optee_ret) )
>> + {
>> + call->optee_thread_id = get_user_reg(regs, 3);
>> + call->rpc_op = OPTEE_SMC_RETURN_GET_RPC_FUNC(optee_ret);
>> + put_std_call(ctx, call);
>> + return;
>> + }
>> +
>> + copy_std_request_back(ctx, regs, call);
>> +
>> + put_std_call(ctx, call);
>> + free_std_call(ctx, call);
>> +}
>
> Most of the code in this patch is self-explaining, which is quite nice
> :). However, I think this function would require explaining a bit the
> logic. For instance in which case the call will be freed.
Thank you :)
Yes, this depends on if call was completed or it was interrupted due to
RPC, in which case it will be resumed later. I'll add comment.
>> +
>> +static bool handle_std_call(struct optee_domain *ctx,
>> + struct cpu_user_regs *regs)
>> +{
>> + struct optee_std_call *call = allocate_std_call(ctx);
>> +
>> + if ( !call )
>> + return false;
>> +
>> + if ( !copy_std_request(regs, call) )
>> + goto err;
>> +
>> + /* Now we can safely examine contents of command buffer */
>> + if ( OPTEE_MSG_GET_ARG_SIZE(call->xen_arg->num_params) >
>> + OPTEE_MSG_NONCONTIG_PAGE_SIZE )
>> + goto err;
>> +
>> + switch ( call->xen_arg->cmd )
>> + {
>> + case OPTEE_MSG_CMD_OPEN_SESSION:
>> + case OPTEE_MSG_CMD_CLOSE_SESSION:
>> + case OPTEE_MSG_CMD_INVOKE_COMMAND:
>> + case OPTEE_MSG_CMD_CANCEL:
>> + case OPTEE_MSG_CMD_REGISTER_SHM:
>> + case OPTEE_MSG_CMD_UNREGISTER_SHM:
>> + break;
>> + default:
>> + goto err;
>> + }
>> +
>> + execute_std_call(ctx, regs, call);
>> +
>> + return true;
>
> This function is a bit odd to read. I think it would be more clear if
> you move this code before the break.
Yes, you are right.
>> +
>> +err:
>> + put_std_call(ctx, call);
>> + free_std_call(ctx, call);
>> +
>> + return false;
>> +}
>> +
>> +static bool handle_rpc(struct optee_domain *ctx, struct cpu_user_regs *regs)
>> +{
>> + struct optee_std_call *call;
>> + int optee_thread_id = get_user_reg(regs, 3);
>> +
>> + call = get_std_call(ctx, optee_thread_id);
>> +
>> + if ( !call )
>> + return false;
>> +
>> + switch ( call->rpc_op )
>> + {
>> + case OPTEE_SMC_RPC_FUNC_ALLOC:
>> + /* TODO: Add handling */
>> + break;
>> + case OPTEE_SMC_RPC_FUNC_FREE:
>> + /* TODO: Add handling */
>> + break;
>> + case OPTEE_SMC_RPC_FUNC_FOREIGN_INTR:
>> + break;
>> + case OPTEE_SMC_RPC_FUNC_CMD:
>> + /* TODO: Add handling */
>> + break;
>> + }
>> +
>> + execute_std_call(ctx, regs, call);
>> + return true;
>> }
>> static bool handle_exchange_capabilities(struct cpu_user_regs
>> *regs)
>> @@ -161,6 +472,8 @@ static bool handle_exchange_capabilities(struct
>> cpu_user_regs *regs)
>> static bool optee_handle_call(struct cpu_user_regs *regs)
>> {
>> + struct optee_domain *ctx = current->domain->arch.tee;
>> +
>> switch ( get_user_reg(regs, 0) )
>> {
>> case OPTEE_SMC_CALLS_COUNT:
>> @@ -170,8 +483,6 @@ static bool optee_handle_call(struct cpu_user_regs *regs)
>> case OPTEE_SMC_FUNCID_GET_OS_REVISION:
>> case OPTEE_SMC_ENABLE_SHM_CACHE:
>> case OPTEE_SMC_DISABLE_SHM_CACHE:
>> - case OPTEE_SMC_CALL_WITH_ARG:
>> - case OPTEE_SMC_CALL_RETURN_FROM_RPC:
>> forward_call(regs);
>> return true;
>> case OPTEE_SMC_GET_SHM_CONFIG:
>> @@ -180,6 +491,10 @@ static bool optee_handle_call(struct cpu_user_regs
>> *regs)
>> return true;
>> case OPTEE_SMC_EXCHANGE_CAPABILITIES:
>> return handle_exchange_capabilities(regs);
>> + case OPTEE_SMC_CALL_WITH_ARG:
>> + return handle_std_call(ctx, regs);
>> + case OPTEE_SMC_CALL_RETURN_FROM_RPC:
>> + return handle_rpc(ctx, regs);
>> default:
>> return false;
>> }
>> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
>> index 175de44927..88b48697bd 100644
>> --- a/xen/include/asm-arm/domain.h
>> +++ b/xen/include/asm-arm/domain.h
>> @@ -97,6 +97,9 @@ struct arch_domain
>> struct vpl011 vpl011;
>> #endif
>> +#ifdef CONFIG_TEE
>> + void *tee;
>> +#endif
>
> Did you look whether there are any hole in arch_domain that could be re-used?
I thought about that. But what are chances to find 64bit-wide,
64bit-aligned hole in a structure? If I remember C standard correctly,
there are no reasons for compiler to leave such holes.
I'm talking about aarch64 there, but I assume, that this is true for
aarch32/armv7.
>> } __cacheline_aligned;
>> struct arch_vcpu
>>
>
> Cheers,
--
Best regards,Volodymyr Babchuk
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |